1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
5
6 This file is part of the m17n library.
7
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
12
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301 USA. */
22
23 /***en
24 @addtogroup m17nConv
25 @brief Coding system objects and API for them.
26
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
30
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
33
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
37
38 /***ja
39 @addtogroup m17nConv
40 @brief �����ɷϥ��֥������ȤȤ���˴ؤ��� API.
41
42 m17n �饤�֥��ϡ���沽ʸ������ (coded character set; CCS)
43 ��ʸ����粽���� (character encoding scheme; CES) �� @e �����ɷ�
44 �ȸƤ֥��֥������Ȥ�ɽ�����롣
45 ���ץꥱ�������ץ������ȼ��˥����ɷϤ��ɲä��뤳�Ȥ�Ǥ��롣
46
47 �����ɥݥ���Ȥ���ʸ�������ɤؤ��Ѵ��� @e ������
48 �ȸƤӡ�ʸ�������ɤ��饳���ɥݥ���Ȥؤ��Ѵ��� @e �ǥ����� �ȸƤ֡�
49
50 ���ץꥱ�������ץ����ϡ����ꤵ�줿�����ɷϤǥХ������ǥ����ɤ��뤳�Ȥˤ�ä�
51 M-text �����뤳�Ȥ��Ǥ��롣�ޤ��դˡ����ꤵ�줿�����ɷϤ� M-text
52 ���ɤ����뤳�Ȥˤ�äƥХ���������뤳�Ȥ��Ǥ��롣 */
53
54 /*=*/
55
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
58 @{ */
59
60 #include <config.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <ctype.h>
64 #include <string.h>
65 #include <sys/types.h>
66 #include <unistd.h>
67 #include <errno.h>
68
69 #include "m17n.h"
70 #include "m17n-misc.h"
71 #include "internal.h"
72 #include "plist.h"
73 #include "character.h"
74 #include "charset.h"
75 #include "coding.h"
76 #include "mtext.h"
77 #include "symbol.h"
78 #include "mlocale.h"
79
80 #define NUM_SUPPORTED_CHARSETS 32
81
82 /** Structure for coding system object. */
83
84 typedef struct
85 {
86 /** Name of the coding system. */
87 MSymbol name;
88
89 /** Type of the coding system. */
90 MSymbol type;
91
92 /* Number of supported charsets. */
93 int ncharsets;
94
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
97
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
101
102 int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
104
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
108
109 /** If non-zero, the coding system decode/encode ASCII characters as
110 is. */
111 int ascii_compatible;
112
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
115 void *extra_info;
116
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
119 is not yet setup. */
120 void *extra_spec;
121
122 int ready;
123 } MCodingSystem;
124
125 struct MCodingList
126 {
127 int size, inc, used;
128 MCodingSystem **codings;
129 };
130
131 static struct MCodingList coding_list;
132
133 static MPlist *coding_definition_list;
134
135 typedef struct {
136 /**en
137 Pointer to a structure of a coding system. */
138 /**ja
139 �����ɷϤ�ɽ�魯�ǡ�����¤�ؤΥݥ��� */
140 MCodingSystem *coding;
141
142 /**en
143 Buffer for carryover bytes generated while decoding. */
144 /**ja
145 �ǥ�������Υ���ꥣ�����С��Х����ѥХåե� */
146 unsigned char carryover[256];
147
148 /**en
149 Number of carryover bytes. */
150 /**ja
151 ����ꥣ�����С��Х��ȿ� */
152 int carryover_bytes;
153
154 /**en
155 Beginning of the byte sequence bound to this converter. */
156 /**ja
157 ���Υ���С����˷���դ���줿�Х��������Ƭ���� */
158 union {
159 const unsigned char *in;
160 unsigned char *out;
161 } buf;
162
163 /**en
164 Size of buf. */
165 /**ja
166 buf ���礭�� */
167 int bufsize;
168
169 /**en
170 Number of bytes already consumed in buf. */
171 /**ja
172 buf ��Ǥ��Ǥ˾��줿�Х��ȿ� */
173 int used;
174
175 /**en
176 Stream bound to this converter. */
177 /**ja
178 ���Υ���С����˷���դ���줿���ȥ�� */
179 FILE *fp;
180
181 /**en
182 Which of above two is in use. */
183 /**ja
184 �嵭2�ԤΤ����줬�Ȥ��Ƥ��뤫 */
185 int binding;
186
187 /**en
188 Buffer for unget. */
189 /**ja
190 Unget �ѥХåե� */
191 MText *unread;
192
193 /**en
194 Working area. */
195 /**ja
196 ����ΰ� */
197 MText *work_mt;
198
199 int seekable;
200 } MConverterStatus;
201
202
203
204 /* Local macros and functions. */
205
206 /** At first, set SRC_BASE to SRC. Then check if we have already
207 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
208 source_end. Otherwise, get one more byte C from SRC. In that
209 case, if SRC == SRC_END, jump to the label source_end. */
210
211 #define ONE_MORE_BASE_BYTE(c) \
212 do { \
213 src_base = src; \
214 if (nchars == at_most) \
215 { \
216 src_end = src; \
217 goto source_end; \
218 } \
219 if (src == src_stop) \
220 { \
221 if (src == src_end) \
222 goto source_end; \
223 src_base = src = source; \
224 if (src == src_end) \
225 goto source_end; \
226 src_stop = src_end; \
227 } \
228 (c) = *src++; \
229 } while (0)
230
231
232 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
233 label source_end. */
234
235 #define ONE_MORE_BYTE(c) \
236 do { \
237 if (src == src_stop) \
238 { \
239 if (src == src_end) \
240 goto source_end; \
241 src = source; \
242 if (src == src_end) \
243 goto source_end; \
244 src_stop = src_end; \
245 } \
246 (c) = *src++; \
247 } while (0)
248
249
250 #define REWIND_SRC_TO_BASE() \
251 do { \
252 if (src_base < source || src_base >= src_end) \
253 src_stop = internal->carryover + internal->carryover_bytes; \
254 src = src_base; \
255 } while (0)
256
257
258 /** Push back byte C to SRC. */
259
260 #define UNGET_ONE_BYTE(c) \
261 do { \
262 if (src > source) \
263 src--; \
264 else \
265 { \
266 internal->carryover[0] = c; \
267 internal->carryover_bytes = 1; \
268 src = internal->carryover; \
269 src_stop = src + 1; \
270 } \
271 } while (0);
272
273
274 /** Store multibyte representation of character C at DST and increment
275 DST to the next of the produced bytes. DST must be a pointer to
276 data area of M-text MT. If the produced bytes are going to exceed
277 DST_END, enlarge the data area of MT. */
278
279 #define EMIT_CHAR(c) \
280 do { \
281 int bytes = CHAR_BYTES (c); \
282 int len; \
283 \
284 if (dst + bytes + 1 > dst_end) \
285 { \
286 len = dst - mt->data; \
287 bytes = mt->allocated + bytes + (src_stop - src); \
288 mtext__enlarge (mt, bytes); \
289 dst = mt->data + len; \
290 dst_end = mt->data + mt->allocated; \
291 } \
292 dst += CHAR_STRING (c, dst); \
293 nchars++; \
294 } while (0)
295
296
297 /* Check if there is enough room to produce LEN bytes at DST. If not,
298 go to the label insufficient_destination. */
299
300 #define CHECK_DST(len) \
301 do { \
302 if (dst + (len) > dst_end) \
303 goto insufficient_destination; \
304 } while (0)
305
306
307 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
308 (MT->data + MT->nbytes) into MT, and put charset property on
309 them with CHARSET->name. */
310
311 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
312 do { \
313 int chars = (num_chars); \
314 \
315 if (chars > 0) \
316 { \
317 mtext__takein ((mt), chars, (num_bytes)); \
318 if (charset) \
319 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
320 Mcharset, (void *) ((charset)->name)); \
321 } \
322 } while (0)
323
324
325 #define SET_SRC(mt, format, from, to) \
326 do { \
327 if (format <= MTEXT_FORMAT_UTF_8) \
328 { \
329 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
330 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
331 } \
332 else if (format <= MTEXT_FORMAT_UTF_16BE) \
333 { \
334 src \
335 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
336 src_end \
337 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
338 } \
339 else \
340 { \
341 src = mt->data + (sizeof (int)) * from; \
342 src_end = mt->data + (sizeof (int)) * to; \
343 } \
344 } while (0)
345
346
347 #define ONE_MORE_CHAR(c, bytes, format) \
348 do { \
349 if (src == src_end) \
350 goto finish; \
351 if (format <= MTEXT_FORMAT_UTF_8) \
352 c = STRING_CHAR_AND_BYTES (src, bytes); \
353 else if (format <= MTEXT_FORMAT_UTF_16BE) \
354 { \
355 c = mtext_ref_char (mt, from++); \
356 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
357 } \
358 else \
359 { \
360 c = ((unsigned *) (mt->data))[from++]; \
361 bytes = sizeof (int); \
362 } \
363 } while (0)
364
365
366 static int
encode_unsupporeted_char(int c,unsigned char * dst,unsigned char * dst_end,MText * mt,int pos)367 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
368 MText *mt, int pos)
369 {
370 int len;
371 char *format;
372
373 len = c < 0x10000 ? 8 : 10;
374 if (dst + len > dst_end)
375 return 0;
376
377 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
378 format = (c < 0xD800 ? "<U+%04X>"
379 : c < 0xE000 ? "<M+%04X>"
380 : c < 0x10000 ? "<U+%04X>"
381 : c < 0x110000 ? "<U+%06X>"
382 : "<M+%06X>");
383 sprintf ((char *) dst, format, c);
384 return len;
385 }
386
387
388
389 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
390 characters by CONVERTER into M-text MT. SRC is a pointer to the
391 not-yet processed bytes. ERROR is 1 iff an invalid byte was
392 found. */
393
394 static int
finish_decoding(MText * mt,MConverter * converter,int nchars,const unsigned char * source,const unsigned char * src_end,const unsigned char * src,int error)395 finish_decoding (MText *mt, MConverter *converter, int nchars,
396 const unsigned char *source, const unsigned char *src_end,
397 const unsigned char *src,
398 int error)
399 {
400 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
401
402 if (src == src_end)
403 internal->carryover_bytes = 0;
404 else if (error
405 || (converter->last_block
406 && ! converter->lenient))
407 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
408 else if (! converter->last_block)
409 {
410 unsigned char *dst = internal->carryover;
411
412 if (src < source || src > src_end)
413 {
414 dst += internal->carryover_bytes;
415 src = source;
416 }
417 while (src < src_end)
418 *dst++ = *src++;
419 internal->carryover_bytes = dst - internal->carryover;
420 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
421 }
422 else
423 {
424 unsigned char *dst = mt->data + mt->nbytes;
425 unsigned char *dst_end = mt->data + mt->allocated;
426 const unsigned char *src_stop = src_end;
427 int c;
428 int last_nchars = nchars;
429
430 if (src < source || src > src_end)
431 src_stop = internal->carryover + internal->carryover_bytes;
432 while (1)
433 {
434 if (converter->at_most && nchars == converter->at_most)
435 break;
436 if (src == src_stop)
437 {
438 if (src == src_end)
439 break;
440 src = source;
441 if (src == src_end)
442 break;
443 src_stop = src_end;
444 }
445 c = *src++;
446 EMIT_CHAR (c);
447 }
448 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
449 mcharset__binary);
450 internal->carryover_bytes = 0;
451 }
452
453 converter->nchars += nchars;
454 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
455 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
456 }
457
458
459
460 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
461
462 static int
setup_coding_charset(MCodingSystem * coding)463 setup_coding_charset (MCodingSystem *coding)
464 {
465 int ncharsets = coding->ncharsets;
466 unsigned *code_charset_table;
467
468 if (ncharsets > 1)
469 {
470 /* At first, reorder charset list by dimensions (a charset of
471 smaller dimension comes first). As the number of charsets is
472 usually very small (at most 32), we do a simple sort. */
473 MCharset **charsets;
474 int idx = 0;
475 int i, j;
476
477 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
478 memcpy (charsets, coding->charsets,
479 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
480 for (i = 0; i < 4; i++)
481 for (j = 0; j < ncharsets; j++)
482 if (charsets[j]->dimension == i)
483 coding->charsets[idx++] = charsets[j];
484 }
485
486 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
487 while (ncharsets--)
488 {
489 int dim = coding->charsets[ncharsets]->dimension;
490 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
491 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
492
493 if (coding->charsets[ncharsets]->ascii_compatible)
494 coding->ascii_compatible = 1;
495 while (from <= to)
496 code_charset_table[from++] |= 1 << ncharsets;
497 }
498
499 coding->extra_spec = (void *) code_charset_table;
500 return 0;
501 }
502
503 static int
reset_coding_charset(MConverter * converter)504 reset_coding_charset (MConverter *converter)
505 {
506 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
507 MCodingSystem *coding = internal->coding;
508
509 if (! coding->ready
510 && setup_coding_charset (coding) < 0)
511 return -1;
512 coding->ready = 1;
513 return 0;
514 }
515
516 static int
decode_coding_charset(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)517 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
518 MConverter *converter)
519 {
520 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
521 MCodingSystem *coding = internal->coding;
522 const unsigned char *src = internal->carryover;
523 const unsigned char *src_stop = src + internal->carryover_bytes;
524 const unsigned char *src_end = source + src_bytes;
525 const unsigned char *src_base;
526 unsigned char *dst = mt->data + mt->nbytes;
527 unsigned char *dst_end = mt->data + mt->allocated;
528 int nchars = 0;
529 int last_nchars = 0;
530 int at_most = converter->at_most > 0 ? converter->at_most : -1;
531
532 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
533 MCharset **charsets = coding->charsets;
534 MCharset *charset = mcharset__ascii;
535 int error = 0;
536
537 while (1)
538 {
539 MCharset *this_charset = NULL;
540 int c;
541 unsigned mask;
542
543 ONE_MORE_BASE_BYTE (c);
544 mask = code_charset_table[c];
545 if (mask)
546 {
547 int idx = 0;
548 unsigned code = c;
549 int nbytes = 1;
550 int dim;
551
552 while (mask)
553 {
554 while (! (mask & 1)) mask >>= 1, idx++;
555 this_charset = charsets[idx];
556 dim = this_charset->dimension;
557 while (nbytes < dim)
558 {
559 ONE_MORE_BYTE (c);
560 code = (code << 8) | c;
561 nbytes++;
562 }
563 c = DECODE_CHAR (this_charset, code);
564 if (c >= 0)
565 goto emit_char;
566 mask >>= 1, idx++;
567 }
568 }
569
570 if (! converter->lenient)
571 break;
572 REWIND_SRC_TO_BASE ();
573 c = *src++;
574 this_charset = mcharset__binary;
575
576 emit_char:
577 if (this_charset != mcharset__ascii
578 && this_charset != charset)
579 {
580 TAKEIN_CHARS (mt, nchars - last_nchars,
581 dst - (mt->data + mt->nbytes), charset);
582 charset = this_charset;
583 last_nchars = nchars;
584 }
585 EMIT_CHAR (c);
586 }
587 /* We reach here because of an invalid byte. */
588 error = 1;
589
590 source_end:
591 TAKEIN_CHARS (mt, nchars - last_nchars,
592 dst - (mt->data + mt->nbytes), charset);
593 return finish_decoding (mt, converter, nchars,
594 source, src_end, src_base, error);
595 }
596
597 static int
encode_coding_charset(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)598 encode_coding_charset (MText *mt, int from, int to,
599 unsigned char *destination, int dst_bytes,
600 MConverter *converter)
601 {
602 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
603 MCodingSystem *coding = internal->coding;
604 unsigned char *src, *src_end;
605 unsigned char *dst = destination;
606 unsigned char *dst_end = dst + dst_bytes;
607 int nchars = 0;
608 int ncharsets = coding->ncharsets;
609 MCharset **charsets = coding->charsets;
610 int ascii_compatible = coding->ascii_compatible;
611 enum MTextFormat format = mt->format;
612
613 SET_SRC (mt, format, from, to);
614 while (1)
615 {
616 int c, bytes;
617
618 ONE_MORE_CHAR (c, bytes, format);
619
620 if (c < 0x80 && ascii_compatible)
621 {
622 CHECK_DST (1);
623 *dst++ = c;
624 }
625 else
626 {
627 unsigned code;
628 MCharset *charset = NULL;
629 int i = 0;
630
631 while (1)
632 {
633 charset = charsets[i];
634 code = ENCODE_CHAR (charset, c);
635 if (code != MCHAR_INVALID_CODE)
636 break;
637 if (++i == ncharsets)
638 goto unsupported_char;
639 }
640
641 CHECK_DST (charset->dimension);
642 if (charset->dimension == 1)
643 {
644 *dst++ = code;
645 }
646 else if (charset->dimension == 2)
647 {
648 *dst++ = code >> 8;
649 *dst++ = code & 0xFF;
650 }
651 else if (charset->dimension == 3)
652 {
653 *dst++ = code >> 16;
654 *dst++ = (code >> 8) & 0xFF;
655 *dst++ = code & 0xFF;
656 }
657 else
658 {
659 *dst++ = code >> 24;
660 *dst++ = (code >> 16) & 0xFF;
661 *dst++ = (code >> 8) & 0xFF;
662 *dst++ = code & 0xFF;
663 }
664 }
665 src += bytes;
666 nchars++;
667 continue;
668
669 unsupported_char:
670 {
671 int len;
672
673 if (! converter->lenient)
674 break;
675 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
676 if (len == 0)
677 goto insufficient_destination;
678 dst += len;
679 src += bytes;
680 nchars++;
681 }
682 }
683 /* We reach here because of an unsupported char. */
684 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
685 goto finish;
686
687 insufficient_destination:
688 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
689
690 finish:
691 converter->nchars += nchars;
692 converter->nbytes += dst - destination;
693 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
694 }
695
696
697 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
698
699 #define UTF8_CHARSET(p) \
700 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
701 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
702 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
703 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
704 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
705 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
706 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
707 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
708 ? (mcharset__unicode) \
709 : (mcharset__m17n)) \
710 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
711 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
712 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
713 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
714 : (mcharset__binary))
715
716
717 static int
decode_coding_utf_8(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)718 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
719 MConverter *converter)
720 {
721 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
722 MCodingSystem *coding = internal->coding;
723 const unsigned char *src = internal->carryover;
724 const unsigned char *src_stop = src + internal->carryover_bytes;
725 const unsigned char *src_end = source + src_bytes;
726 const unsigned char *src_base;
727 unsigned char *dst = mt->data + mt->nbytes;
728 unsigned char *dst_end = mt->data + mt->allocated;
729 int nchars = 0;
730 int last_nchars = 0;
731 int at_most = converter->at_most > 0 ? converter->at_most : -1;
732 int error = 0;
733 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
734 MCharset *charset = NULL;
735
736 while (1)
737 {
738 int c, c1, bytes;
739 MCharset *this_charset = NULL;
740
741 ONE_MORE_BASE_BYTE (c);
742
743 if (!(c & 0x80))
744 bytes = 1;
745 else if (!(c & 0x40))
746 goto invalid_byte;
747 else if (!(c & 0x20))
748 bytes = 2, c &= 0x1F;
749 else if (!(c & 0x10))
750 bytes = 3, c &= 0x0F;
751 else if (!(c & 0x08))
752 bytes = 4, c &= 0x07;
753 else if (!(c & 0x04))
754 bytes = 5, c &= 0x03;
755 else if (!(c & 0x02))
756 bytes = 6, c &= 0x01;
757 else
758 goto invalid_byte;
759
760 while (bytes-- > 1)
761 {
762 ONE_MORE_BYTE (c1);
763 if ((c1 & 0xC0) != 0x80)
764 goto invalid_byte;
765 c = (c << 6) | (c1 & 0x3F);
766 }
767
768 if (full
769 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
770 goto emit_char;
771
772 invalid_byte:
773 if (! converter->lenient)
774 break;
775 REWIND_SRC_TO_BASE ();
776 c = *src++;
777 this_charset = mcharset__binary;
778
779 emit_char:
780 if (this_charset != charset)
781 {
782 TAKEIN_CHARS (mt, nchars - last_nchars,
783 dst - (mt->data + mt->nbytes), charset);
784 charset = this_charset;
785 last_nchars = nchars;
786 }
787 EMIT_CHAR (c);
788 }
789 /* We reach here because of an invalid byte. */
790 error = 1;
791
792 source_end:
793 TAKEIN_CHARS (mt, nchars - last_nchars,
794 dst - (mt->data + mt->nbytes), charset);
795 return finish_decoding (mt, converter, nchars,
796 source, src_end, src_base, error);
797 }
798
799 static int
encode_coding_utf_8(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)800 encode_coding_utf_8 (MText *mt, int from, int to,
801 unsigned char *destination, int dst_bytes,
802 MConverter *converter)
803 {
804 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
805 MCodingSystem *coding = internal->coding;
806 unsigned char *src, *src_end;
807 unsigned char *dst = destination;
808 unsigned char *dst_end = dst + dst_bytes;
809 int nchars = 0;
810 enum MTextFormat format = mt->format;
811
812 SET_SRC (mt, format, from, to);
813
814 if (format <= MTEXT_FORMAT_UTF_8
815 && (converter->lenient
816 || coding->charsets[0] == mcharset__m17n))
817 {
818 if (dst_bytes < src_end - src)
819 {
820 int byte_pos = (src + dst_bytes) - mt->data;
821
822 to = POS_BYTE_TO_CHAR (mt, byte_pos);
823 byte_pos = POS_CHAR_TO_BYTE (mt, to);
824 src_end = mt->data + byte_pos;
825 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
826 }
827 memcpy (destination, src, src_end - src);
828 nchars = to - from;
829 dst += src_end - src;
830 goto finish;
831 }
832
833 while (1)
834 {
835 int c, bytes;
836
837 ONE_MORE_CHAR (c, bytes, format);
838
839 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
840 break;
841 CHECK_DST (bytes);
842 dst += CHAR_STRING (c, dst);
843 src += bytes;
844 nchars++;
845 }
846 /* We reach here because of an unsupported char. */
847 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
848 goto finish;
849
850 insufficient_destination:
851 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
852
853 finish:
854 converter->nchars += nchars;
855 converter->nbytes += dst - destination;
856 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
857 }
858
859
860 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
861
862 enum utf_bom
863 {
864 UTF_BOM_MAYBE,
865 UTF_BOM_NO,
866 UTF_BOM_YES,
867 UTF_BOM_MAX
868 };
869
870 enum utf_endian
871 {
872 UTF_BIG_ENDIAN,
873 UTF_LITTLE_ENDIAN,
874 UTF_ENDIAN_MAX
875 };
876
877 struct utf_status
878 {
879 int surrogate;
880 enum utf_bom bom;
881 enum utf_endian endian;
882 };
883
884 static int
setup_coding_utf(MCodingSystem * coding)885 setup_coding_utf (MCodingSystem *coding)
886 {
887 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
888 MCodingInfoUTF *spec;
889
890 if (info->code_unit_bits == 8)
891 coding->ascii_compatible = 1;
892 else if (info->code_unit_bits == 16
893 || info->code_unit_bits == 32)
894 {
895 if (info->bom < 0 || info->bom > 2
896 || info->endian < 0 || info->endian > 1)
897 MERROR (MERROR_CODING, -1);
898 }
899 else
900 return -1;
901
902 MSTRUCT_CALLOC (spec, MERROR_CODING);
903 *spec = *info;
904 coding->extra_spec = (void *) (spec);
905 return 0;
906 }
907
908 static int
reset_coding_utf(MConverter * converter)909 reset_coding_utf (MConverter *converter)
910 {
911 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
912 MCodingSystem *coding = internal->coding;
913 struct utf_status *status = (struct utf_status *) &(converter->status);
914
915 if (! coding->ready
916 && setup_coding_utf (coding) < 0)
917 return -1;
918 coding->ready = 1;
919
920 status->surrogate = 0;
921 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
922 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
923 return 0;
924 }
925
926 static int
decode_coding_utf_16(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)927 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
928 MConverter *converter)
929 {
930 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
931 const unsigned char *src = internal->carryover;
932 const unsigned char *src_stop = src + internal->carryover_bytes;
933 const unsigned char *src_end = source + src_bytes;
934 const unsigned char *src_base;
935 unsigned char *dst = mt->data + mt->nbytes;
936 unsigned char *dst_end = mt->data + mt->allocated;
937 int nchars = 0;
938 int last_nchars = 0;
939 int at_most = converter->at_most > 0 ? converter->at_most : -1;
940 struct utf_status *status = (struct utf_status *) &(converter->status);
941 unsigned char b1, b2;
942 MCharset *charset = NULL;
943 int error = 0;
944
945 if (status->bom != UTF_BOM_NO)
946 {
947 int c;
948
949 ONE_MORE_BASE_BYTE (b1);
950 ONE_MORE_BYTE (b2);
951 c = (b1 << 8) | b2;
952 if (c == 0xFEFF)
953 status->endian = UTF_BIG_ENDIAN;
954 else if (c == 0xFFFE)
955 status->endian = UTF_LITTLE_ENDIAN;
956 else if (status->bom == UTF_BOM_MAYBE
957 || converter->lenient)
958 {
959 status->endian = UTF_BIG_ENDIAN;
960 REWIND_SRC_TO_BASE ();
961 }
962 else
963 {
964 error = 1;
965 goto source_end;
966 }
967 status->bom = UTF_BOM_NO;
968 }
969
970 while (1)
971 {
972 int c, c1;
973 MCharset *this_charset = NULL;
974
975 ONE_MORE_BASE_BYTE (b1);
976 ONE_MORE_BYTE (b2);
977 if (status->endian == UTF_BIG_ENDIAN)
978 c = ((b1 << 8) | b2);
979 else
980 c = ((b2 << 8) | b1);
981 if (c < 0xD800 || c >= 0xE000)
982 goto emit_char;
983 else if (c < 0xDC00)
984 {
985 ONE_MORE_BYTE (b1);
986 ONE_MORE_BYTE (b2);
987 if (status->endian == UTF_BIG_ENDIAN)
988 c1 = ((b1 << 8) | b2);
989 else
990 c1 = ((b2 << 8) | b1);
991 if (c1 < 0xDC00 || c1 >= 0xE000)
992 goto invalid_byte;
993 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
994 goto emit_char;
995 }
996
997 invalid_byte:
998 if (! converter->lenient)
999 break;
1000 REWIND_SRC_TO_BASE ();
1001 ONE_MORE_BYTE (b1);
1002 ONE_MORE_BYTE (b2);
1003 if (status->endian == UTF_BIG_ENDIAN)
1004 c = ((b1 << 8) | b2);
1005 else
1006 c = ((b2 << 8) | b1);
1007 this_charset = mcharset__binary;
1008
1009 emit_char:
1010 if (this_charset != charset)
1011 {
1012 TAKEIN_CHARS (mt, nchars - last_nchars,
1013 dst - (mt->data + mt->nbytes), charset);
1014 charset = this_charset;
1015 last_nchars = nchars;
1016 }
1017 EMIT_CHAR (c);
1018 }
1019 /* We reach here because of an invalid byte. */
1020 error = 1;
1021
1022 source_end:
1023 TAKEIN_CHARS (mt, nchars - last_nchars,
1024 dst - (mt->data + mt->nbytes), charset);
1025 return finish_decoding (mt, converter, nchars,
1026 source, src_end, src_base, error);
1027 }
1028
1029
1030 static int
decode_coding_utf_32(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)1031 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1032 MConverter *converter)
1033 {
1034 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1035 const unsigned char *src = internal->carryover;
1036 const unsigned char *src_stop = src + internal->carryover_bytes;
1037 const unsigned char *src_end = source + src_bytes;
1038 const unsigned char *src_base;
1039 unsigned char *dst = mt->data + mt->nbytes;
1040 unsigned char *dst_end = mt->data + mt->allocated;
1041 int nchars = 0;
1042 int last_nchars = 0;
1043 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1044 struct utf_status *status = (struct utf_status *) &(converter->status);
1045 unsigned char b1, b2, b3, b4;
1046 MCharset *charset = NULL;
1047 int error = 0;
1048
1049 if (status->bom != UTF_BOM_NO)
1050 {
1051 unsigned c;
1052
1053 ONE_MORE_BASE_BYTE (b1);
1054 ONE_MORE_BYTE (b2);
1055 ONE_MORE_BYTE (b3);
1056 ONE_MORE_BYTE (b4);
1057 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1058 if (c == 0x0000FEFF)
1059 status->endian = UTF_BIG_ENDIAN;
1060 else if (c == 0xFFFE0000)
1061 status->endian = UTF_LITTLE_ENDIAN;
1062 else if (status->bom == UTF_BOM_MAYBE
1063 || converter->lenient)
1064 {
1065 status->endian = UTF_BIG_ENDIAN;
1066 REWIND_SRC_TO_BASE ();
1067 }
1068 else
1069 {
1070 error = 1;
1071 goto source_end;
1072 }
1073 status->bom = UTF_BOM_NO;
1074 }
1075
1076 while (1)
1077 {
1078 unsigned c;
1079 MCharset *this_charset = NULL;
1080
1081 ONE_MORE_BASE_BYTE (b1);
1082 ONE_MORE_BYTE (b2);
1083 ONE_MORE_BYTE (b3);
1084 ONE_MORE_BYTE (b4);
1085 if (status->endian == UTF_BIG_ENDIAN)
1086 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1087 else
1088 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1089 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1090 goto emit_char;
1091
1092 if (! converter->lenient)
1093 break;
1094 REWIND_SRC_TO_BASE ();
1095 ONE_MORE_BYTE (c);
1096 this_charset = mcharset__binary;
1097
1098 emit_char:
1099 if (this_charset != charset)
1100 {
1101 TAKEIN_CHARS (mt, nchars - last_nchars,
1102 dst - (mt->data + mt->nbytes), charset);
1103 charset = this_charset;
1104 last_nchars = nchars;
1105 }
1106 EMIT_CHAR (c);
1107 }
1108 /* We reach here because of an invalid byte. */
1109 error = 1;
1110
1111 source_end:
1112 TAKEIN_CHARS (mt, nchars - last_nchars,
1113 dst - (mt->data + mt->nbytes), charset);
1114 return finish_decoding (mt, converter, nchars,
1115 source, src_end, src_base, error);
1116 }
1117
1118
1119 static int
encode_coding_utf_16(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)1120 encode_coding_utf_16 (MText *mt, int from, int to,
1121 unsigned char *destination, int dst_bytes,
1122 MConverter *converter)
1123 {
1124 unsigned char *src, *src_end;
1125 unsigned char *dst = destination;
1126 unsigned char *dst_end = dst + dst_bytes;
1127 int nchars = 0;
1128 struct utf_status *status = (struct utf_status *) &(converter->status);
1129 int big_endian = status->endian == UTF_BIG_ENDIAN;
1130 enum MTextFormat format = mt->format;
1131
1132 SET_SRC (mt, format, from, to);
1133
1134 if (status->bom != UTF_BOM_NO)
1135 {
1136 CHECK_DST (2);
1137 if (big_endian)
1138 *dst++ = 0xFE, *dst++ = 0xFF;
1139 else
1140 *dst++ = 0xFF, *dst++ = 0xFE;
1141 status->bom = UTF_BOM_NO;
1142 }
1143
1144 while (1)
1145 {
1146 int c, bytes;
1147
1148 ONE_MORE_CHAR (c, bytes, format);
1149
1150 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1151 {
1152 CHECK_DST (2);
1153 if (big_endian)
1154 *dst++ = c >> 8, *dst++ = c & 0xFF;
1155 else
1156 *dst++ = c & 0xFF, *dst++ = c >> 8;
1157 }
1158 else if (c >= 0x10000 && c < 0x110000)
1159 {
1160 int c1, c2;
1161
1162 CHECK_DST (4);
1163 c -= 0x10000;
1164 c1 = (c >> 10) + 0xD800;
1165 c2 = (c & 0x3FF) + 0xDC00;
1166 if (big_endian)
1167 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1168 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1169 else
1170 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1171 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1172 }
1173 else
1174 {
1175 unsigned char buf[11];
1176 int len, i;
1177
1178 if (! converter->lenient)
1179 break;
1180 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1181 mt, from + nchars);
1182 if (len == 0)
1183 goto insufficient_destination;
1184 if (big_endian)
1185 for (i = 0; i < len; i++)
1186 *dst++ = 0, *dst++ = buf[i];
1187 else
1188 for (i = 0; i < len; i++)
1189 *dst++ = buf[i], *dst++ = 0;
1190 }
1191 src += bytes;
1192 nchars++;
1193 }
1194 /* We reach here because of an unsupported char. */
1195 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1196 goto finish;
1197
1198 insufficient_destination:
1199 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1200
1201 finish:
1202 converter->nchars += nchars;
1203 converter->nbytes += dst - destination;
1204 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1205 }
1206
1207 static int
encode_coding_utf_32(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)1208 encode_coding_utf_32 (MText *mt, int from, int to,
1209 unsigned char *destination, int dst_bytes,
1210 MConverter *converter)
1211 {
1212 unsigned char *src, *src_end;
1213 unsigned char *dst = destination;
1214 unsigned char *dst_end = dst + dst_bytes;
1215 int nchars = 0;
1216 struct utf_status *status = (struct utf_status *) &(converter->status);
1217 int big_endian = status->endian == UTF_BIG_ENDIAN;
1218 enum MTextFormat format = mt->format;
1219
1220 SET_SRC (mt, format, from, to);
1221
1222 if (status->bom != UTF_BOM_NO)
1223 {
1224 CHECK_DST (4);
1225 if (big_endian)
1226 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1227 else
1228 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1229 status->bom = UTF_BOM_NO;
1230 }
1231
1232 while (1)
1233 {
1234 int c, bytes;
1235
1236 ONE_MORE_CHAR (c, bytes, format);
1237
1238 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1239 {
1240 CHECK_DST (4);
1241 if (big_endian)
1242 *dst++ = 0x00, *dst++ = c >> 16,
1243 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1244 else
1245 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1246 *dst++ = c >> 16, *dst++ = 0x00;
1247 }
1248 else
1249 {
1250 unsigned char buf[11];
1251 int len, i;
1252
1253 if (! converter->lenient)
1254 break;
1255 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1256 mt, from + nchars);
1257 if (len == 0)
1258 goto insufficient_destination;
1259 if (big_endian)
1260 for (i = 0; i < len; i++)
1261 *dst++ = 0, *dst++ = buf[i];
1262 else
1263 for (i = 0; i < len; i++)
1264 *dst++ = buf[i], *dst++ = 0;
1265 }
1266 src += bytes;
1267 nchars++;
1268 }
1269 /* We reach here because of an unsupported char. */
1270 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1271 goto finish;
1272
1273 insufficient_destination:
1274 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1275
1276 finish:
1277 converter->nchars += nchars;
1278 converter->nbytes += dst - destination;
1279 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1280 }
1281
1282
1283 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1284
1285 #define ISO_CODE_STX 0x02 /* start text */
1286 #define ISO_CODE_SO 0x0E /* shift-out */
1287 #define ISO_CODE_SI 0x0F /* shift-in */
1288 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1289 #define ISO_CODE_ESC 0x1B /* escape */
1290 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1291 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1292
1293 /** Structure pointed by MCodingSystem.extra_spec. */
1294
1295 struct iso_2022_spec
1296 {
1297 unsigned flags;
1298
1299 /** Initial graphic registers (0..3) invoked to each graphic
1300 plane left and right. */
1301 int initial_invocation[2];
1302
1303 /** Initially designated charsets for each graphic register. */
1304 MCharset *initial_designation[4];
1305
1306 int n_designations;
1307 char *designations;
1308
1309 int use_esc;
1310 };
1311
1312 struct iso_2022_status
1313 {
1314 int invocation[2];
1315 MCharset *designation[4];
1316 unsigned single_shifting : 1;
1317 unsigned bol : 1;
1318 unsigned r2l : 1;
1319 unsigned utf8_shifting : 1;
1320 MCharset *non_standard_charset;
1321 int non_standard_charset_bytes;
1322 int non_standard_encoding;
1323 };
1324
1325 enum iso_2022_code_class {
1326 ISO_control_0, /* Control codes in the range
1327 0x00..0x1F and 0x7F, except for the
1328 following 4 codes. */
1329 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1330 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1331 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1332 ISO_escape, /* ISO_CODE_SO (0x1B) */
1333 ISO_control_1, /* Control codes in the range
1334 0x80..0x9F, except for the
1335 following 3 codes. */
1336 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1337 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1338 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1339 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1340 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1341 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1342 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1343 } iso_2022_code_class[256];
1344
1345
1346 #define MCODING_ISO_DESIGNATION_MASK \
1347 (MCODING_ISO_DESIGNATION_G0 \
1348 | MCODING_ISO_DESIGNATION_G1 \
1349 | MCODING_ISO_DESIGNATION_CTEXT \
1350 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1351
1352 static int
setup_coding_iso_2022(MCodingSystem * coding)1353 setup_coding_iso_2022 (MCodingSystem *coding)
1354 {
1355 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1356 int ncharsets = coding->ncharsets;
1357 struct iso_2022_spec *spec;
1358 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1359 int i;
1360
1361 coding->ascii_compatible = 0;
1362
1363 MSTRUCT_CALLOC (spec, MERROR_CODING);
1364
1365 spec->flags = info->flags;
1366 spec->initial_invocation[0] = info->initial_invocation[0];
1367 spec->initial_invocation[1] = info->initial_invocation[1];
1368 for (i = 0; i < 4; i++)
1369 spec->initial_designation[i] = NULL;
1370 if (designation_policy)
1371 {
1372 spec->n_designations = ncharsets;
1373 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1374 spec->n_designations += mcharset__iso_2022_table.used;
1375 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1376 for (i = 0; i < spec->n_designations; i++)
1377 spec->designations[i] = -1;
1378 }
1379 else
1380 {
1381 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1382 MERROR (MERROR_CODING, -1);
1383 spec->designations = NULL;
1384 }
1385
1386 for (i = 0; i < ncharsets; i++)
1387 {
1388 int reg = info->designations[i];
1389
1390 if (reg != -5
1391 && coding->charsets[i]->final_byte > 0
1392 && (reg < -4 || reg > 3))
1393 MERROR (MERROR_CODING, -1);
1394 if (reg >= 0)
1395 {
1396 if (spec->initial_designation[reg])
1397 MERROR (MERROR_CODING, -1);
1398 spec->initial_designation[reg] = coding->charsets[i];
1399 }
1400 else if (reg >= -4)
1401 {
1402 if (! designation_policy
1403 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1404 MERROR (MERROR_CODING, -1);
1405 reg += 4;
1406 }
1407
1408 if (designation_policy)
1409 spec->designations[i] = reg;
1410 if (coding->charsets[i] == mcharset__ascii)
1411 coding->ascii_compatible = 1;
1412 }
1413
1414 if (coding->ascii_compatible
1415 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1416 | MCODING_ISO_DESIGNATION_CTEXT
1417 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1418 | MCODING_ISO_LOCKING_SHIFT)))
1419 coding->ascii_compatible = 0;
1420
1421 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1422 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1423 {
1424 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1425
1426 spec->designations[ncharsets + i]
1427 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1428 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1429 ? (charset->code_range[0] == 32
1430 || charset->code_range[1] == 255)
1431 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1432 }
1433
1434 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1435 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1436 && (spec->initial_designation[2]
1437 || spec->initial_designation[3]))
1438 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1439 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1440 || (spec->flags & MCODING_ISO_ISO6429));
1441
1442 coding->extra_spec = (void *) spec;
1443
1444 return 0;
1445 }
1446
1447 static int
reset_coding_iso_2022(MConverter * converter)1448 reset_coding_iso_2022 (MConverter *converter)
1449 {
1450 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1451 MCodingSystem *coding = internal->coding;
1452 struct iso_2022_status *status
1453 = (struct iso_2022_status *) &(converter->status);
1454 struct iso_2022_spec *spec;
1455 int i;
1456
1457 if (! coding->ready
1458 && setup_coding_iso_2022 (coding) < 0)
1459 return -1;
1460 coding->ready = 1;
1461
1462 spec = (struct iso_2022_spec *) coding->extra_spec;
1463 status->invocation[0] = spec->initial_invocation[0];
1464 status->invocation[1] = spec->initial_invocation[1];
1465 for (i = 0; i < 4; i++)
1466 status->designation[i] = spec->initial_designation[i];
1467 status->single_shifting = 0;
1468 status->bol = 1;
1469 status->r2l = 0;
1470
1471 return 0;
1472 }
1473
1474 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1475 do { \
1476 MCharset *charset; \
1477 \
1478 if ((final) < '0' || (final) >= 128) \
1479 goto invalid_byte; \
1480 if (rev < 0) \
1481 { \
1482 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1483 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1484 { \
1485 int i; \
1486 \
1487 for (i = 0; i < coding->ncharsets; i++) \
1488 if (charset == coding->charsets[i]) \
1489 break; \
1490 if (i == coding->ncharsets) \
1491 goto invalid_byte; \
1492 } \
1493 } \
1494 else \
1495 { \
1496 int i; \
1497 \
1498 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1499 { \
1500 charset = mcharset__iso_2022_table.charsets[i]; \
1501 if (charset->revision == (rev) \
1502 && charset->dimension == (dim) \
1503 && charset->final_byte == (final) \
1504 && (charset->code_range[1] == (chars) \
1505 || ((chars) == 96 && charset->code_range[1] == 255))) \
1506 break; \
1507 } \
1508 if (i == mcharset__iso_2022_table.used) \
1509 goto invalid_byte; \
1510 } \
1511 status->designation[reg] = charset; \
1512 } while (0)
1513
1514
1515 static MCharset *
find_ctext_non_standard_charset(char * charset_name)1516 find_ctext_non_standard_charset (char *charset_name)
1517 {
1518 MCharset *charset;
1519
1520 if (! strcmp (charset_name, "koi8-r"))
1521 charset = MCHARSET (msymbol ("koi8-r"));
1522 else if (! strcmp (charset_name, "big5-0"))
1523 charset = MCHARSET (msymbol ("big5"));
1524 else
1525 charset = NULL;
1526 return charset;
1527 }
1528
1529 static int
decode_coding_iso_2022(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)1530 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1531 MConverter *converter)
1532 {
1533 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1534 MCodingSystem *coding = internal->coding;
1535 const unsigned char *src = internal->carryover;
1536 const unsigned char *src_stop = src + internal->carryover_bytes;
1537 const unsigned char *src_end = source + src_bytes;
1538 const unsigned char *src_base;
1539 unsigned char *dst = mt->data + mt->nbytes;
1540 unsigned char *dst_end = mt->data + mt->allocated;
1541 int nchars = 0;
1542 int last_nchars = 0;
1543 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1544 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1545 struct iso_2022_status *status
1546 = (struct iso_2022_status *) &(converter->status);
1547 MCharset *charset0, *charset1, *charset;
1548 int error = 0;
1549 MCharset *cns_charsets[15];
1550
1551 charset0 = (status->invocation[0] >= 0
1552 ? status->designation[status->invocation[0]] : NULL);
1553 charset1 = (status->invocation[1] >= 0
1554 ? status->designation[status->invocation[1]] : NULL);
1555 charset = mcharset__ascii;
1556
1557 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1558 {
1559 int i;
1560
1561 memset (cns_charsets, 0, sizeof (cns_charsets));
1562 for (i = 0; i < coding->ncharsets; i++)
1563 if (coding->charsets[i]->dimension == 2
1564 && coding->charsets[i]->code_range[1] == 126)
1565 {
1566 int final = coding->charsets[i]->final_byte;
1567
1568 if (final >= 'G' && final <= 'M')
1569 cns_charsets[final - 'G'] = coding->charsets[i];
1570 else if (final < 0)
1571 cns_charsets[14] = coding->charsets[i];
1572 }
1573 }
1574
1575 while (1)
1576 {
1577 MCharset *this_charset = NULL;
1578 int c1, c2, c3;
1579
1580 ONE_MORE_BASE_BYTE (c1);
1581
1582 if (status->utf8_shifting)
1583 {
1584 int buf[6];
1585 int bytes = CHAR_BYTES_BY_HEAD (c1);
1586 int i;
1587
1588 buf[0] = c1;
1589 for (i = 1; i < bytes; i++)
1590 {
1591 ONE_MORE_BYTE (c1);
1592 buf[i] = c1;
1593 }
1594 this_charset = UTF8_CHARSET (buf);
1595 c1 = STRING_CHAR_UTF8 (buf);
1596 goto emit_char;
1597 }
1598
1599 if (status->non_standard_encoding > 0)
1600 {
1601 int i;
1602
1603 this_charset = status->non_standard_charset;
1604 for (i = 1; i < status->non_standard_charset_bytes; i++)
1605 {
1606 ONE_MORE_BYTE (c2);
1607 c1 = (c1 << 8) | c2;
1608 }
1609 c1 = DECODE_CHAR (this_charset, c1);
1610 goto emit_char;
1611 }
1612
1613 switch (iso_2022_code_class[c1])
1614 {
1615 case ISO_graphic_plane_0:
1616 this_charset = charset0;
1617 break;
1618
1619 case ISO_0x20_or_0x7F:
1620 if (! charset0
1621 || (charset0->code_range[0] != 32
1622 && charset0->code_range[1] != 255))
1623 /* This is SPACE or DEL. */
1624 this_charset = mcharset__ascii;
1625 else
1626 /* This is a graphic character of plane 0. */
1627 this_charset = charset0;
1628 break;
1629
1630 case ISO_graphic_plane_1:
1631 if (!charset1)
1632 goto invalid_byte;
1633 this_charset = charset1;
1634 break;
1635
1636 case ISO_0xA0_or_0xFF:
1637 if (! charset1
1638 || charset1->code_range[0] == 33
1639 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1640 goto invalid_byte;
1641 /* This is a graphic character of plane 1. */
1642 if (! charset1)
1643 goto invalid_byte;
1644 this_charset = charset1;
1645 break;
1646
1647 case ISO_control_0:
1648 this_charset = mcharset__ascii;
1649 break;
1650
1651 case ISO_control_1:
1652 goto invalid_byte;
1653
1654 case ISO_shift_out:
1655 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1656 && status->designation[1])
1657 {
1658 status->invocation[0] = 1;
1659 charset0 = status->designation[1];
1660 continue;
1661 }
1662 this_charset = mcharset__ascii;
1663 break;
1664
1665 case ISO_shift_in:
1666 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1667 {
1668 status->invocation[0] = 0;
1669 charset0 = status->designation[0];
1670 continue;
1671 }
1672 this_charset = mcharset__ascii;
1673 break;
1674
1675 case ISO_single_shift_2_7:
1676 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1677 {
1678 this_charset = mcharset__ascii;
1679 break;
1680 }
1681 c1 = 'N';
1682 goto label_escape_sequence;
1683
1684 case ISO_single_shift_2:
1685 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1686 {
1687 ONE_MORE_BYTE (c1);
1688 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1689 || ! cns_charsets[c1 - 0xA1])
1690 goto invalid_byte;
1691 status->designation[2] = cns_charsets[c1 - 0xA1];
1692 }
1693 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1694 goto invalid_byte;
1695 /* SS2 is handled as an escape sequence of ESC 'N' */
1696 c1 = 'N';
1697 goto label_escape_sequence;
1698
1699 case ISO_single_shift_3:
1700 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1701 goto invalid_byte;
1702 /* SS2 is handled as an escape sequence of ESC 'O' */
1703 c1 = 'O';
1704 goto label_escape_sequence;
1705
1706 case ISO_control_sequence_introducer:
1707 /* CSI is handled as an escape sequence of ESC '[' ... */
1708 c1 = '[';
1709 goto label_escape_sequence;
1710
1711 case ISO_escape:
1712 if (! spec->use_esc)
1713 {
1714 this_charset = mcharset__ascii;
1715 break;
1716 }
1717 ONE_MORE_BYTE (c1);
1718 label_escape_sequence:
1719 /* Escape sequences handled here are invocation,
1720 designation, and direction specification. */
1721 switch (c1)
1722 {
1723 case '&': /* revision of following character set */
1724 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1725 goto unused_escape_sequence;
1726 ONE_MORE_BYTE (c1);
1727 if (c1 < '@' || c1 > '~')
1728 goto invalid_byte;
1729 ONE_MORE_BYTE (c1);
1730 if (c1 != ISO_CODE_ESC)
1731 goto invalid_byte;
1732 ONE_MORE_BYTE (c1);
1733 goto label_escape_sequence;
1734
1735 case '$': /* designation of 2-byte character set */
1736 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1737 goto unused_escape_sequence;
1738 ONE_MORE_BYTE (c1);
1739 if (c1 >= '@' && c1 <= 'B')
1740 { /* designation of JISX0208.1978, GB2312.1980, or
1741 JISX0208.1980 */
1742 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1743 }
1744 else if (c1 >= 0x28 && c1 <= 0x2B)
1745 { /* designation of (dimension 2, chars 94) character set */
1746 ONE_MORE_BYTE (c2);
1747 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1748 }
1749 else if (c1 >= 0x2C && c1 <= 0x2F)
1750 { /* designation of (dimension 2, chars 96) character set */
1751 ONE_MORE_BYTE (c2);
1752 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1753 }
1754 else
1755 goto invalid_byte;
1756 /* We must update these variables now. */
1757 if (status->invocation[0] >= 0)
1758 charset0 = status->designation[status->invocation[0]];
1759 if (status->invocation[1] >= 0)
1760 charset1 = status->designation[status->invocation[1]];
1761 continue;
1762
1763 case 'n': /* invocation of locking-shift-2 */
1764 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1765 || ! status->designation[2])
1766 goto invalid_byte;
1767 status->invocation[0] = 2;
1768 charset0 = status->designation[2];
1769 continue;
1770
1771 case 'o': /* invocation of locking-shift-3 */
1772 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1773 || ! status->designation[3])
1774 goto invalid_byte;
1775 status->invocation[0] = 3;
1776 charset0 = status->designation[3];
1777 continue;
1778
1779 case 'N': /* invocation of single-shift-2 */
1780 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1781 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1782 || ! status->designation[2])
1783 goto invalid_byte;
1784 this_charset = status->designation[2];
1785 ONE_MORE_BYTE (c1);
1786 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1787 goto invalid_byte;
1788 break;
1789
1790 case 'O': /* invocation of single-shift-3 */
1791 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1792 || ! status->designation[3])
1793 goto invalid_byte;
1794 this_charset = status->designation[3];
1795 ONE_MORE_BYTE (c1);
1796 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1797 goto invalid_byte;
1798 break;
1799
1800 case '[': /* specification of direction */
1801 if (! (spec->flags & MCODING_ISO_ISO6429))
1802 goto invalid_byte;
1803 /* For the moment, nested direction is not supported.
1804 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1805 left-to-right, and nonzero means right-to-left. */
1806 ONE_MORE_BYTE (c1);
1807 switch (c1)
1808 {
1809 case ']': /* end of the current direction */
1810 case '0': /* end of the current direction */
1811 status->r2l = 0;
1812 break;
1813
1814 case '1': /* start of left-to-right direction */
1815 ONE_MORE_BYTE (c1);
1816 if (c1 != ']')
1817 goto invalid_byte;
1818 status->r2l = 0;
1819 break;
1820
1821 case '2': /* start of right-to-left direction */
1822 ONE_MORE_BYTE (c1);
1823 if (c1 != ']')
1824 goto invalid_byte;
1825 status->r2l = 1;
1826 break;
1827
1828 default:
1829 goto invalid_byte;
1830 }
1831 continue;
1832
1833 case '%':
1834 {
1835 char charset_name[16];
1836 int bytes;
1837 int i;
1838
1839 if (! (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT))
1840 goto invalid_byte;
1841 /* Compound-text uses these escape sequences:
1842
1843 ESC % G -- utf-8 bytes -- ESC % @
1844 ESC % / 1 M L -- charset name -- STX -- bytes --
1845 ESC % / 2 M L -- charset name -- STX -- bytes --
1846 ESC % / 3 M L -- charset name -- STX -- bytes --
1847 ESC % / 4 M L -- charset name -- STX -- bytes --
1848
1849 It also uses this sequence but that is not yet
1850 supported here.
1851
1852 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1853
1854 ONE_MORE_BYTE (c1);
1855 if (c1 == 'G')
1856 {
1857 status->utf8_shifting = 1;
1858 continue;
1859 }
1860 if (c1 == '@')
1861 {
1862 if (! status->utf8_shifting)
1863 goto invalid_byte;
1864 status->utf8_shifting = 0;
1865 continue;
1866 }
1867 if (c1 != '/')
1868 goto invalid_byte;
1869 ONE_MORE_BYTE (c1);
1870 if (c1 < '1' || c1 > '4')
1871 goto invalid_byte;
1872 status->non_standard_charset_bytes = c1 - '0';
1873 ONE_MORE_BYTE (c1);
1874 ONE_MORE_BYTE (c2);
1875 if (c1 < 128 || c2 < 128)
1876 goto invalid_byte;
1877 bytes = (c1 - 128) * 128 + (c2 - 128);
1878 for (i = 0; i < 16; i++)
1879 {
1880 ONE_MORE_BYTE (c1);
1881 if (c1 == ISO_CODE_STX)
1882 break;
1883 charset_name[i] = TOLOWER (c1);
1884 }
1885 if (i == 16)
1886 goto invalid_byte;
1887 charset_name[i++] = '\0';
1888 this_charset = find_ctext_non_standard_charset (charset_name);
1889 if (! this_charset)
1890 goto invalid_byte;
1891 status->non_standard_charset = this_charset;
1892 status->non_standard_encoding = bytes - i;
1893 continue;
1894 }
1895
1896 default:
1897 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1898 goto unused_escape_sequence;
1899 if (c1 >= 0x28 && c1 <= 0x2B)
1900 { /* designation of (dimension 1, chars 94) charset */
1901 ONE_MORE_BYTE (c2);
1902 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1903 }
1904 else if (c1 >= 0x2C && c1 <= 0x2F)
1905 { /* designation of (dimension 1, chars 96) charset */
1906 ONE_MORE_BYTE (c2);
1907 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1908 }
1909 else
1910 goto invalid_byte;
1911 /* We must update these variables now. */
1912 if (status->invocation[0] >= 0)
1913 charset0 = status->designation[status->invocation[0]];
1914 if (status->invocation[1] >= 0)
1915 charset1 = status->designation[status->invocation[1]];
1916 continue;
1917
1918 unused_escape_sequence:
1919 UNGET_ONE_BYTE (c1);
1920 c1 = ISO_CODE_ESC;
1921 this_charset = mcharset__ascii;
1922 }
1923 }
1924
1925 if (this_charset->dimension == 1)
1926 {
1927 if (this_charset->code_range[1] <= 128)
1928 c1 &= 0x7F;
1929 }
1930 else if (this_charset->dimension == 2)
1931 {
1932 ONE_MORE_BYTE (c2);
1933 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1934 }
1935 else /* i.e. (dimension == 3) */
1936 {
1937 ONE_MORE_BYTE (c2);
1938 ONE_MORE_BYTE (c3);
1939 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1940 }
1941 c1 = DECODE_CHAR (this_charset, c1);
1942 goto emit_char;
1943
1944 invalid_byte:
1945 if (! converter->lenient)
1946 break;
1947 REWIND_SRC_TO_BASE ();
1948 c1 = *src++;
1949 this_charset = mcharset__binary;
1950
1951 emit_char:
1952 if (this_charset != mcharset__ascii
1953 && this_charset != charset)
1954 {
1955 TAKEIN_CHARS (mt, nchars - last_nchars,
1956 dst - (mt->data + mt->nbytes), charset);
1957 charset = this_charset;
1958 last_nchars = nchars;
1959 }
1960 EMIT_CHAR (c1);
1961 if (status->non_standard_encoding > 0)
1962 status->non_standard_encoding -= status->non_standard_charset_bytes;
1963 }
1964 /* We reach here because of an invalid byte. */
1965 error = 1;
1966
1967
1968
1969 source_end:
1970 TAKEIN_CHARS (mt, nchars - last_nchars,
1971 dst - (mt->data + mt->nbytes), charset);
1972 return finish_decoding (mt, converter, nchars,
1973 source, src_end, src_base, error);
1974
1975 }
1976
1977 /* Produce codes (escape sequence) for designating CHARSET to graphic
1978 register REG at DST, and increment DST. If CHARSET->final-char is
1979 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1980 sequence of short-form. Update STATUS->designation. */
1981
1982 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1983 do { \
1984 char *intermediate_char_94 = "()*+"; \
1985 char *intermediate_char_96 = ",-./"; \
1986 \
1987 if (dst + 4 > dst_end) \
1988 goto memory_shortage; \
1989 *dst++ = ISO_CODE_ESC; \
1990 if (charset->dimension == 1) \
1991 { \
1992 if (charset->code_range[0] != 32 \
1993 && charset->code_range[1] != 255) \
1994 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1995 else \
1996 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1997 } \
1998 else \
1999 { \
2000 *dst++ = '$'; \
2001 if (charset->code_range[0] != 32 \
2002 && charset->code_range[1] != 255) \
2003 { \
2004 if (spec->flags & MCODING_ISO_LONG_FORM \
2005 || reg != 0 \
2006 || charset->final_byte < '@' || charset->final_byte > 'B') \
2007 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2008 } \
2009 else \
2010 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2011 } \
2012 *dst++ = charset->final_byte; \
2013 \
2014 status->designation[reg] = charset; \
2015 } while (0)
2016
2017
2018 /* The following two macros produce codes (control character or escape
2019 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2020 single-shift-3). */
2021
2022 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2023 do { \
2024 if (dst + 2 > dst_end) \
2025 goto memory_shortage; \
2026 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2027 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2028 else \
2029 *dst++ = ISO_CODE_SS2; \
2030 status->single_shifting = 1; \
2031 } while (0)
2032
2033
2034 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2035 do { \
2036 if (dst + 2 > dst_end) \
2037 goto memory_shortage; \
2038 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2039 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2040 else \
2041 *dst++ = ISO_CODE_SS3; \
2042 status->single_shifting = 1; \
2043 } while (0)
2044
2045
2046 /* The following four macros produce codes (control character or
2047 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2048 shift-out, locking-shift-2, and locking-shift-3). */
2049
2050 #define ISO2022_ENCODE_SHIFT_IN(status) \
2051 do { \
2052 if (dst + 1 > dst_end) \
2053 goto memory_shortage; \
2054 *dst++ = ISO_CODE_SI; \
2055 status->invocation[0] = 0; \
2056 } while (0)
2057
2058
2059 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2060 do { \
2061 if (dst + 1 > dst_end) \
2062 goto memory_shortage; \
2063 *dst++ = ISO_CODE_SO; \
2064 status->invocation[0] = 1; \
2065 } while (0)
2066
2067
2068 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2069 do { \
2070 if (dst + 2 > dst_end) \
2071 goto memory_shortage; \
2072 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2073 status->invocation[0] = 2; \
2074 } while (0)
2075
2076
2077 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2078 do { \
2079 if (dst + 2 > dst_end) \
2080 goto memory_shortage; \
2081 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2082 status->invocation[0] = 3; \
2083 } while (0)
2084
2085 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2086 do { \
2087 CHECK_DST (3 + len); \
2088 *dst++ = ISO_CODE_ESC; \
2089 *dst++ = '%'; \
2090 *dst++ = 'G'; \
2091 status->utf8_shifting = 1; \
2092 } while (0)
2093
2094
2095 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2096 do { \
2097 CHECK_DST (3); \
2098 *dst++ = ISO_CODE_ESC; \
2099 *dst++ = '%'; \
2100 *dst++ = '@'; \
2101 status->utf8_shifting = 0; \
2102 } while (0)
2103
2104
2105 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2106 do { \
2107 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2108 non_standard_begin = dst; \
2109 *dst++ = ISO_CODE_ESC; \
2110 *dst++ = '%'; \
2111 *dst++ = '/'; \
2112 *dst++ = '0' + non_standard_charset_bytes; \
2113 *dst++ = 0, *dst++ = 0; /* filled later */ \
2114 memcpy (dst, name, len); \
2115 dst += len; \
2116 *dst++ = ISO_CODE_STX; \
2117 non_standard_bytes = len + 1; \
2118 } while (0)
2119
2120
2121 static char *
find_ctext_non_standard_name(MCharset * charset,int * bytes)2122 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2123 {
2124 char *name = msymbol_name (charset->name);
2125
2126 if (! strcmp (name, "koi8-r"))
2127 *bytes = 1;
2128 else if (! strcmp (name, "big5"))
2129 name = "big5-0", *bytes = 2;
2130 else
2131 return NULL;
2132 return name;
2133 }
2134
2135 /* Designate CHARSET to a graphic register specified in
2136 SPEC->designation. If the register is not yet invoked to graphic
2137 left not right, invoke it to graphic left. DSTP points to a
2138 variable containing a memory address where the output must go.
2139 DST_END is the limit of that memory.
2140
2141 Return 0 if it succeeds. Return -1 otherwise, which means that the
2142 memory area is too short. By side effect, update the variable that
2143 DSTP points to. */
2144
2145 static int
iso_2022_designate_invoke_charset(MCodingSystem * coding,MCharset * charset,struct iso_2022_spec * spec,struct iso_2022_status * status,unsigned char ** dstp,unsigned char * dst_end)2146 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2147 MCharset *charset,
2148 struct iso_2022_spec *spec,
2149 struct iso_2022_status *status,
2150 unsigned char **dstp,
2151 unsigned char *dst_end)
2152 {
2153 int i;
2154 unsigned char *dst = *dstp;
2155
2156 for (i = 0; i < 4; i++)
2157 if (charset == status->designation[i])
2158 break;
2159
2160 if (i >= 4)
2161 {
2162 /* CHARSET is not yet designated to any graphic registers. */
2163 for (i = 0; i < coding->ncharsets; i++)
2164 if (charset == coding->charsets[i])
2165 break;
2166 if (i == coding->ncharsets)
2167 {
2168 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2169 if (charset == mcharset__iso_2022_table.charsets[i])
2170 break;
2171 i += coding->ncharsets;
2172 }
2173 i = spec->designations[i];
2174 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2175 }
2176
2177 if (status->invocation[0] != i
2178 && status->invocation[1] != i)
2179 {
2180 /* Graphic register I is not yet invoked. */
2181 switch (i)
2182 {
2183 case 0: /* graphic register 0 */
2184 ISO2022_ENCODE_SHIFT_IN (status);
2185 break;
2186
2187 case 1: /* graphic register 1 */
2188 ISO2022_ENCODE_SHIFT_OUT (status);
2189 break;
2190
2191 case 2: /* graphic register 2 */
2192 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2193 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2194 else
2195 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2196 break;
2197
2198 case 3: /* graphic register 3 */
2199 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2200 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2201 else
2202 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2203 break;
2204 }
2205 }
2206 *dstp = dst;
2207 return 0;
2208
2209 memory_shortage:
2210 *dstp = dst;
2211 return -1;
2212 }
2213
2214
2215 /* Reset the invocation/designation status to the initial one. SPEC
2216 and STATUS contain information about the current and initial
2217 invocation /designation status respectively. DSTP points to a
2218 variable containing a memory address where the output must go.
2219 DST_END is the limit of that memory.
2220
2221 Return 0 if it succeeds. Return -1 otherwise, which means that the
2222 memory area is too short. By side effect, update the variable that
2223 DSTP points to. */
2224
2225 static int
iso_2022_reset_invocation_designation(struct iso_2022_spec * spec,struct iso_2022_status * status,unsigned char ** dstp,unsigned char * dst_end)2226 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2227 struct iso_2022_status *status,
2228 unsigned char **dstp,
2229 unsigned char *dst_end)
2230 {
2231 unsigned char *dst = *dstp;
2232 int i;
2233
2234 /* Reset the invocation status of GL. We have not yet supported GR
2235 invocation. */
2236 if (status->invocation[0] != spec->initial_invocation[0]
2237 && spec->initial_invocation[0] >= 0)
2238 {
2239 if (spec->initial_invocation[0] == 0)
2240 ISO2022_ENCODE_SHIFT_IN (status);
2241 else if (spec->initial_invocation[0] == 1)
2242 ISO2022_ENCODE_SHIFT_OUT (status);
2243 else if (spec->initial_invocation[0] == 2)
2244 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2245 else /* i.e. spec->initial_invocation[0] == 3 */
2246 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2247 }
2248
2249 /* Reset the designation status of G0..G3. */
2250 for (i = 0; i < 4; i++)
2251 if (status->designation[i] != spec->initial_designation[i]
2252 && spec->initial_designation[i])
2253 {
2254 MCharset *charset = spec->initial_designation[i];
2255
2256 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2257 }
2258
2259 *dstp = dst;
2260 return 0;
2261
2262 memory_shortage:
2263 *dstp = dst;
2264 return -1;
2265 }
2266
2267
2268 static int
encode_coding_iso_2022(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)2269 encode_coding_iso_2022 (MText *mt, int from, int to,
2270 unsigned char *destination, int dst_bytes,
2271 MConverter *converter)
2272 {
2273 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2274 MCodingSystem *coding = internal->coding;
2275 unsigned char *src, *src_end;
2276 unsigned char *dst = destination;
2277 unsigned char *dst_end = dst + dst_bytes;
2278 int nchars = 0;
2279 unsigned char *dst_base;
2280 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2281 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2282 struct iso_2022_status *status
2283 = (struct iso_2022_status *) &(converter->status);
2284 MCharset *primary, *charset0, *charset1;
2285 int next_primary_change;
2286 int ncharsets = coding->ncharsets;
2287 MCharset **charsets = coding->charsets;
2288 MCharset *cns_charsets[15];
2289 int ascii_compatible = coding->ascii_compatible;
2290 MCharset *non_standard_charset = NULL;
2291 int non_standard_charset_bytes = 0;
2292 int non_standard_bytes = 0;
2293 unsigned char *non_standard_begin = NULL;
2294 enum MTextFormat format = mt->format;
2295
2296 SET_SRC (mt, format, from, to);
2297
2298 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2299 {
2300 int i;
2301
2302 memset (cns_charsets, 0, sizeof (cns_charsets));
2303 for (i = 0; i < ncharsets; i++)
2304 if (charsets[i]->dimension == 2)
2305 {
2306 int final = charsets[i]->final_byte;
2307
2308 if (final >= 'G' && final <= 'M')
2309 cns_charsets[final - 'G'] = charsets[i];
2310 else if (final < 0)
2311 cns_charsets[14] = charsets[i];
2312 }
2313 }
2314
2315 next_primary_change = from;
2316 primary = NULL;
2317 charset0 = status->designation[status->invocation[0]];
2318 charset1 = (status->invocation[1] < 0 ? NULL
2319 : status->designation[status->invocation[1]]);
2320
2321 while (1)
2322 {
2323 int bytes, c;
2324
2325 dst_base = dst;
2326 ONE_MORE_CHAR (c, bytes, format);
2327
2328 if (c < 128 && ascii_compatible)
2329 {
2330 if (status->utf8_shifting)
2331 ISO2022_ENCODE_UTF8_SHIFT_END ();
2332 CHECK_DST (1);
2333 *dst++ = c;
2334 }
2335 else if (c <= 32 || c == 127)
2336 {
2337 if (status->utf8_shifting)
2338 ISO2022_ENCODE_UTF8_SHIFT_END ();
2339 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2340 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2341 {
2342 if (iso_2022_reset_invocation_designation (spec, status,
2343 &dst, dst_end) < 0)
2344 goto insufficient_destination;
2345 charset0 = status->designation[status->invocation[0]];
2346 charset1 = (status->invocation[1] < 0 ? NULL
2347 : status->designation[status->invocation[1]]);
2348 }
2349 CHECK_DST (1);
2350 *dst++ = c;
2351 }
2352 else
2353 {
2354 unsigned code = MCHAR_INVALID_CODE;
2355 MCharset *charset = NULL;
2356 int gr_mask;
2357 int pos = from + nchars;
2358
2359 if (pos >= next_primary_change)
2360 {
2361 MSymbol primary_charset
2362 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2363 primary = MCHARSET (primary_charset);
2364 if (primary && primary != mcharset__binary)
2365 {
2366 if (primary->final_byte <= 0)
2367 primary = NULL;
2368 else if (! full_support)
2369 {
2370 int i;
2371
2372 for (i = 0; i < ncharsets; i++)
2373 if (primary == charsets[i])
2374 break;
2375 if (i == ncharsets)
2376 primary = NULL;
2377 }
2378 }
2379
2380 mtext_prop_range (mt, Mcharset, pos,
2381 NULL, &next_primary_change, 0);
2382 }
2383
2384 if (primary && primary != mcharset__binary)
2385 {
2386 code = ENCODE_CHAR (primary, c);
2387 if (code != MCHAR_INVALID_CODE)
2388 charset = primary;
2389 }
2390 if (! charset)
2391 {
2392 if (c <= 32 || c == 127)
2393 {
2394 code = c;
2395 charset = mcharset__ascii;
2396 }
2397 else
2398 {
2399 int i;
2400
2401 for (i = 0; i < ncharsets; i++)
2402 {
2403 charset = charsets[i];
2404 code = ENCODE_CHAR (charset, c);
2405 if (code != MCHAR_INVALID_CODE)
2406 break;
2407 }
2408 if (i == ncharsets)
2409 {
2410 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2411 {
2412 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2413 {
2414 charset = mcharset__iso_2022_table.charsets[i];
2415 code = ENCODE_CHAR (charset, c);
2416 if (code != MCHAR_INVALID_CODE)
2417 break;
2418 }
2419 if (i == mcharset__iso_2022_table.used)
2420 {
2421 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2422 goto unsupported_char;
2423 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2424 goto finish;
2425 }
2426 }
2427 else
2428 goto unsupported_char;
2429 }
2430 }
2431 }
2432
2433 if (charset
2434 && (charset->final_byte >= 0
2435 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2436 {
2437 if (code >= 0x80 && code < 0xA0)
2438 goto unsupported_char;
2439 code &= 0x7F7F7F7F;
2440 if (status->utf8_shifting)
2441 ISO2022_ENCODE_UTF8_SHIFT_END ();
2442 if (charset == charset0)
2443 gr_mask = 0;
2444 else if (charset == charset1)
2445 gr_mask = 0x80;
2446 else
2447 {
2448 unsigned char *p = NULL;
2449
2450 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2451 {
2452 int i;
2453
2454 if (cns_charsets[0] == charset)
2455 {
2456 CHECK_DST (2);
2457 }
2458 else
2459 {
2460 for (i = 1; i < 15; i++)
2461 if (cns_charsets[i] == charset)
2462 break;
2463 CHECK_DST (4);
2464 *dst++ = ISO_CODE_SS2;
2465 *dst++ = 0xA1 + i;
2466 }
2467 status->single_shifting = 1;
2468 p = dst;
2469 }
2470 else
2471 {
2472 if (iso_2022_designate_invoke_charset
2473 (coding, charset, spec, status, &dst, dst_end) < 0)
2474 goto insufficient_destination;
2475 charset0 = status->designation[status->invocation[0]];
2476 charset1 = (status->invocation[1] < 0 ? NULL
2477 : status->designation[status->invocation[1]]);
2478 }
2479 if (status->single_shifting)
2480 gr_mask
2481 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2482 else if (charset == charset0)
2483 gr_mask = 0;
2484 else
2485 gr_mask = 0x80;
2486 }
2487 if (charset->dimension == 1)
2488 {
2489 CHECK_DST (1);
2490 *dst++ = code | gr_mask;
2491 }
2492 else if (charset->dimension == 2)
2493 {
2494 CHECK_DST (2);
2495 *dst++ = (code >> 8) | gr_mask;
2496 *dst++ = (code & 0xFF) | gr_mask;
2497 }
2498 else
2499 {
2500 CHECK_DST (3);
2501 *dst++ = (code >> 16) | gr_mask;
2502 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2503 *dst++ = (code & 0xFF) | gr_mask;
2504 }
2505 status->single_shifting = 0;
2506 }
2507 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2508 {
2509 if (charset != non_standard_charset)
2510 {
2511 char *name = (find_ctext_non_standard_name
2512 (charset, &non_standard_charset_bytes));
2513
2514 if (name)
2515 {
2516 int len = strlen (name);
2517
2518 ISO2022_ENCODE_NON_STANDARD (name, len);
2519 non_standard_charset = charset;
2520 }
2521 else
2522 non_standard_charset = NULL;
2523 }
2524
2525 if (non_standard_charset)
2526 {
2527 if (dst + non_standard_charset_bytes > dst_end)
2528 goto insufficient_destination;
2529 non_standard_bytes += non_standard_charset_bytes;
2530 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2531 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2532 if (non_standard_charset_bytes == 1)
2533 *dst++ = code;
2534 else if (non_standard_charset_bytes == 2)
2535 *dst++ = code >> 8, *dst++ = code & 0xFF;
2536 else if (non_standard_charset_bytes == 3)
2537 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2538 *dst++ = code & 0xFF;
2539 else /* i.e non_standard_charset_bytes == 3 */
2540 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2541 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2542 }
2543 else
2544 {
2545 int len = CHAR_BYTES (c);
2546
2547 if (c >= 0x110000)
2548 goto unsupported_char;
2549 if (! status->utf8_shifting)
2550 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2551 else
2552 CHECK_DST (len);
2553 CHAR_STRING (c, dst);
2554 }
2555 }
2556 else
2557 goto unsupported_char;
2558 }
2559 src += bytes;
2560 nchars++;
2561 continue;
2562
2563 unsupported_char:
2564 {
2565 int len;
2566
2567 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2568 spec, status,
2569 &dst, dst_end) < 0)
2570 goto insufficient_destination;
2571 if (! converter->lenient)
2572 break;
2573 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2574 if (len == 0)
2575 goto insufficient_destination;
2576 dst += len;
2577 src += bytes;
2578 nchars++;
2579 }
2580 }
2581 /* We reach here because of an unsupported char. */
2582 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2583 goto finish;
2584
2585 insufficient_destination:
2586 dst = dst_base;
2587 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2588
2589 finish:
2590 if (converter->result == MCONVERSION_RESULT_SUCCESS
2591 && converter->last_block)
2592 {
2593 if (status->utf8_shifting)
2594 {
2595 ISO2022_ENCODE_UTF8_SHIFT_END ();
2596 dst_base = dst;
2597 }
2598 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2599 && charset0 != spec->initial_designation[0])
2600 {
2601 if (iso_2022_reset_invocation_designation (spec, status,
2602 &dst, dst_end) < 0)
2603 goto insufficient_destination;
2604 }
2605 }
2606 converter->nchars += nchars;
2607 converter->nbytes += dst - destination;
2608 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2609 }
2610
2611
2612 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2613
2614 /* For SJIS handling... */
2615
2616 #define SJIS_TO_JIS(s1, s2) \
2617 (s2 >= 0x9F \
2618 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2619 | (s2 - 0x7E)) \
2620 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2621 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2622
2623 #define JIS_TO_SJIS(c1, c2) \
2624 ((c1 & 1) \
2625 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2626 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2627 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2628 | (c2 + 0x7E)))
2629
2630
2631 static int
reset_coding_sjis(MConverter * converter)2632 reset_coding_sjis (MConverter *converter)
2633 {
2634 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2635 MCodingSystem *coding = internal->coding;
2636
2637 if (! coding->ready)
2638 {
2639 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2640 MCharset *kanji = MCHARSET (kanji_sym);
2641 MSymbol kana_sym = msymbol ("jisx0201-kana");
2642 MCharset *kana = MCHARSET (kana_sym);
2643
2644 if (! kanji || ! kana)
2645 return -1;
2646 coding->ncharsets = 3;
2647 coding->charsets[1] = kanji;
2648 coding->charsets[2] = kana;
2649 }
2650 coding->ready = 1;
2651 return 0;
2652 }
2653
2654 static int
decode_coding_sjis(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)2655 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2656 MConverter *converter)
2657 {
2658 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2659 MCodingSystem *coding = internal->coding;
2660 const unsigned char *src = internal->carryover;
2661 const unsigned char *src_stop = src + internal->carryover_bytes;
2662 const unsigned char *src_end = source + src_bytes;
2663 const unsigned char *src_base;
2664 unsigned char *dst = mt->data + mt->nbytes;
2665 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2666 int nchars = 0;
2667 int last_nchars = 0;
2668 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2669
2670 MCharset *charset_roman = coding->charsets[0];
2671 MCharset *charset_kanji = coding->charsets[1];
2672 MCharset *charset_kana = coding->charsets[2];
2673 MCharset *charset = mcharset__ascii;
2674 int error = 0;
2675
2676 while (1)
2677 {
2678 MCharset *this_charset;
2679 int c, c1, c2;
2680
2681 ONE_MORE_BASE_BYTE (c1);
2682
2683 c2 = -1;
2684 if (c1 < 0x80)
2685 {
2686 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2687 ? mcharset__ascii
2688 : charset_roman);
2689 }
2690 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2691 {
2692 ONE_MORE_BYTE (c2);
2693 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2694 {
2695 this_charset = charset_kanji;
2696 c1 = SJIS_TO_JIS (c1, c2);
2697 }
2698 else
2699 goto invalid_byte;
2700 }
2701 else if (c1 >= 0xA1 && c1 <= 0xDF)
2702 {
2703 this_charset = charset_kana;
2704 c1 &= 0x7F;
2705 }
2706 else
2707 goto invalid_byte;
2708
2709 c = DECODE_CHAR (this_charset, c1);
2710 if (c >= 0)
2711 goto emit_char;
2712
2713 invalid_byte:
2714 if (! converter->lenient)
2715 break;
2716 REWIND_SRC_TO_BASE ();
2717 c = *src++;
2718 this_charset = mcharset__binary;
2719
2720 emit_char:
2721 if (this_charset != mcharset__ascii
2722 && this_charset != charset)
2723 {
2724 TAKEIN_CHARS (mt, nchars - last_nchars,
2725 dst - (mt->data + mt->nbytes), charset);
2726 charset = this_charset;
2727 last_nchars = nchars;
2728 }
2729 EMIT_CHAR (c);
2730 }
2731 /* We reach here because of an invalid byte. */
2732 error = 1;
2733
2734 source_end:
2735 TAKEIN_CHARS (mt, nchars - last_nchars,
2736 dst - (mt->data + mt->nbytes), charset);
2737 return finish_decoding (mt, converter, nchars,
2738 source, src_end, src_base, error);
2739 }
2740
2741 static int
encode_coding_sjis(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)2742 encode_coding_sjis (MText *mt, int from, int to,
2743 unsigned char *destination, int dst_bytes,
2744 MConverter *converter)
2745 {
2746 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2747 MCodingSystem *coding = internal->coding;
2748 unsigned char *src, *src_end;
2749 unsigned char *dst = destination;
2750 unsigned char *dst_end = dst + dst_bytes;
2751 int nchars = 0;
2752 MCharset *charset_roman = coding->charsets[0];
2753 MCharset *charset_kanji = coding->charsets[1];
2754 MCharset *charset_kana = coding->charsets[2];
2755 enum MTextFormat format = mt->format;
2756
2757 SET_SRC (mt, format, from, to);
2758
2759 while (1)
2760 {
2761 int c, bytes, len;
2762 unsigned code;
2763
2764 ONE_MORE_CHAR (c, bytes, format);
2765
2766 if (c <= 0x20 || c == 0x7F)
2767 {
2768 CHECK_DST (1);
2769 *dst++ = c;
2770 }
2771 else
2772 {
2773 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2774 {
2775 CHECK_DST (1);
2776 *dst++ = c;
2777 }
2778 else if ((code = ENCODE_CHAR (charset_kanji, c))
2779 != MCHAR_INVALID_CODE)
2780 {
2781 int c1 = code >> 8, c2 = code & 0xFF;
2782 code = JIS_TO_SJIS (c1, c2);
2783 CHECK_DST (2);
2784 *dst++ = code >> 8;
2785 *dst++ = code & 0xFF;
2786 }
2787 else if ((code = ENCODE_CHAR (charset_kana, c))
2788 != MCHAR_INVALID_CODE)
2789 {
2790 CHECK_DST (1);
2791 *dst++ = code | 0x80;
2792 }
2793 else
2794 {
2795 if (! converter->lenient)
2796 break;
2797 len = encode_unsupporeted_char (c, dst, dst_end,
2798 mt, from + nchars);
2799 if (len == 0)
2800 goto insufficient_destination;
2801 dst += len;
2802 }
2803 }
2804 src += bytes;
2805 nchars++;
2806 }
2807 /* We reach here because of an unsupported char. */
2808 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2809 goto finish;
2810
2811 insufficient_destination:
2812 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2813
2814 finish:
2815 converter->nchars += nchars;
2816 converter->nbytes += dst - destination;
2817 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2818 }
2819
2820
2821 static MCodingSystem *
find_coding(MSymbol name)2822 find_coding (MSymbol name)
2823 {
2824 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2825
2826 if (! coding)
2827 {
2828 MPlist *plist, *pl;
2829 MSymbol sym = msymbol__canonicalize (name);
2830
2831 plist = mplist_find_by_key (coding_definition_list, sym);
2832 if (! plist)
2833 return NULL;
2834 pl = MPLIST_PLIST (plist);
2835 name = MPLIST_VAL (pl);
2836 mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2837 NULL, NULL, NULL, NULL);
2838 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2839 plist = mplist_pop (plist);
2840 M17N_OBJECT_UNREF (plist);
2841 }
2842 return coding;
2843 }
2844
2845 #define BINDING_NONE 0
2846 #define BINDING_BUFFER 1
2847 #define BINDING_STREAM 2
2848
2849 #define CONVERT_WORKSIZE 0x10000
2850
2851
2852 /* Internal API */
2853
2854 int
mcoding__init(void)2855 mcoding__init (void)
2856 {
2857 int i;
2858 MPlist *param, *charsets, *pl;
2859
2860 MLIST_INIT1 (&coding_list, codings, 128);
2861 coding_definition_list = mplist ();
2862
2863 /* ISO-2022 specific initialize routine. */
2864 for (i = 0; i < 0x20; i++)
2865 iso_2022_code_class[i] = ISO_control_0;
2866 for (i = 0x21; i < 0x7F; i++)
2867 iso_2022_code_class[i] = ISO_graphic_plane_0;
2868 for (i = 0x80; i < 0xA0; i++)
2869 iso_2022_code_class[i] = ISO_control_1;
2870 for (i = 0xA1; i < 0xFF; i++)
2871 iso_2022_code_class[i] = ISO_graphic_plane_1;
2872 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2873 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2874 iso_2022_code_class[0x0E] = ISO_shift_out;
2875 iso_2022_code_class[0x0F] = ISO_shift_in;
2876 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2877 iso_2022_code_class[0x1B] = ISO_escape;
2878 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2879 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2880 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2881
2882 Mcoding = msymbol ("coding");
2883
2884 Mutf = msymbol ("utf");
2885 Miso_2022 = msymbol ("iso-2022");
2886
2887 Mreset_at_eol = msymbol ("reset-at-eol");
2888 Mreset_at_cntl = msymbol ("reset-at-cntl");
2889 Meight_bit = msymbol ("eight-bit");
2890 Mlong_form = msymbol ("long-form");
2891 Mdesignation_g0 = msymbol ("designation-g0");
2892 Mdesignation_g1 = msymbol ("designation-g1");
2893 Mdesignation_ctext = msymbol ("designation-ctext");
2894 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2895 Mlocking_shift = msymbol ("locking-shift");
2896 Msingle_shift = msymbol ("single-shift");
2897 Msingle_shift_7 = msymbol ("single-shift-7");
2898 Meuc_tw_shift = msymbol ("euc-tw-shift");
2899 Miso_6429 = msymbol ("iso-6429");
2900 Mrevision_number = msymbol ("revision-number");
2901 Mfull_support = msymbol ("full-support");
2902 Mmaybe = msymbol ("maybe");
2903
2904 Mtype = msymbol ("type");
2905 Mcharsets = msymbol_as_managing_key ("charsets");
2906 Mflags = msymbol_as_managing_key ("flags");
2907 Mdesignation = msymbol_as_managing_key ("designation");
2908 Minvocation = msymbol_as_managing_key ("invocation");
2909 Mcode_unit = msymbol ("code-unit");
2910 Mbom = msymbol ("bom");
2911 Mlittle_endian = msymbol ("little-endian");
2912
2913 param = mplist ();
2914 charsets = mplist ();
2915 pl = param;
2916 /* Setup predefined codings. */
2917 mplist_set (charsets, Msymbol, Mcharset_ascii);
2918 pl = mplist_add (pl, Mtype, Mcharset);
2919 pl = mplist_add (pl, Mcharsets, charsets);
2920 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2921 NULL, NULL, NULL, NULL);
2922
2923 {
2924 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2925 MCodingSystem *coding
2926 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2927
2928 msymbol_put (alias, Mcoding, coding);
2929 alias = msymbol__canonicalize (alias);
2930 msymbol_put (alias, Mcoding, coding);
2931 }
2932
2933 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2934 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2935 NULL, NULL, NULL, NULL);
2936
2937 mplist_set (charsets, Msymbol, Mcharset_m17n);
2938 mplist_put (param, Mtype, Mutf);
2939 mplist_put (param, Mcode_unit, (void *) 8);
2940 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2941 NULL, NULL, NULL, NULL);
2942
2943 mplist_set (charsets, Msymbol, Mcharset_unicode);
2944 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2945 NULL, NULL, NULL, NULL);
2946
2947 mplist_put (param, Mcode_unit, (void *) 16);
2948 mplist_put (param, Mbom, Mmaybe);
2949 #ifndef WORDS_BIGENDIAN
2950 mplist_put (param, Mlittle_endian, Mt);
2951 #endif
2952 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2953 NULL, NULL, NULL, NULL);
2954
2955 mplist_put (param, Mcode_unit, (void *) 32);
2956 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2957 NULL, NULL, NULL, NULL);
2958
2959 mplist_put (param, Mcode_unit, (void *) 16);
2960 mplist_put (param, Mbom, Mnil);
2961 mplist_put (param, Mlittle_endian, Mnil);
2962 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2963 NULL, NULL, NULL, NULL);
2964
2965 mplist_put (param, Mcode_unit, (void *) 32);
2966 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2967 NULL, NULL, NULL, NULL);
2968
2969 mplist_put (param, Mcode_unit, (void *) 16);
2970 mplist_put (param, Mlittle_endian, Mt);
2971 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2972 NULL, NULL, NULL, NULL);
2973
2974 mplist_put (param, Mcode_unit, (void *) 32);
2975 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2976 NULL, NULL, NULL, NULL);
2977
2978 mplist_put (param, Mtype, Mnil);
2979 pl = mplist ();
2980 mplist_add (pl, Msymbol, msymbol ("Shift_JIS"));
2981 mplist_put (param, Maliases, pl);
2982 mplist_set (charsets, Msymbol, Mcharset_ascii);
2983 Mcoding_sjis = mconv_define_coding ("sjis", param,
2984 reset_coding_sjis,
2985 decode_coding_sjis,
2986 encode_coding_sjis, NULL);
2987
2988 M17N_OBJECT_UNREF (charsets);
2989 M17N_OBJECT_UNREF (param);
2990 M17N_OBJECT_UNREF (pl);
2991
2992 return 0;
2993 }
2994
2995 void
mcoding__fini(void)2996 mcoding__fini (void)
2997 {
2998 int i;
2999 MPlist *plist;
3000
3001 for (i = 0; i < coding_list.used; i++)
3002 {
3003 MCodingSystem *coding = coding_list.codings[i];
3004
3005 if (coding->extra_info)
3006 free (coding->extra_info);
3007 if (coding->extra_spec)
3008 {
3009 if (coding->type == Miso_2022)
3010 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3011 free (coding->extra_spec);
3012 }
3013 free (coding);
3014 }
3015 MLIST_FREE1 (&coding_list, codings);
3016 MPLIST_DO (plist, coding_definition_list)
3017 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3018 M17N_OBJECT_UNREF (coding_definition_list);
3019 }
3020
3021 void
mconv__register_charset_coding(MSymbol sym)3022 mconv__register_charset_coding (MSymbol sym)
3023 {
3024 MSymbol name = msymbol__canonicalize (sym);
3025
3026 if (! mplist_find_by_key (coding_definition_list, name))
3027 {
3028 MPlist *param = mplist (), *charsets = mplist ();
3029
3030 mplist_set (charsets, Msymbol, sym);
3031 mplist_add (param, Msymbol, sym);
3032 mplist_add (param, Mtype, Mcharset);
3033 mplist_add (param, Mcharsets, charsets);
3034 mplist_put (coding_definition_list, name, param);
3035 M17N_OBJECT_UNREF (charsets);
3036 }
3037 }
3038
3039
3040 int
mcoding__load_from_database()3041 mcoding__load_from_database ()
3042 {
3043 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3044 MPlist *def_list, *plist;
3045 MPlist *definitions = coding_definition_list;
3046 int mdebug_flag = MDEBUG_CODING;
3047
3048 if (! mdb)
3049 return 0;
3050 MDEBUG_PUSH_TIME ();
3051 def_list = (MPlist *) mdatabase_load (mdb);
3052 MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to load the data."));
3053 MDEBUG_POP_TIME ();
3054 if (! def_list)
3055 return -1;
3056
3057 MDEBUG_PUSH_TIME ();
3058 MPLIST_DO (plist, def_list)
3059 {
3060 MPlist *pl, *aliases;
3061 MSymbol name, canonicalized;
3062
3063 if (! MPLIST_PLIST_P (plist))
3064 MERROR (MERROR_CHARSET, -1);
3065 pl = MPLIST_PLIST (plist);
3066 if (! MPLIST_SYMBOL_P (pl))
3067 MERROR (MERROR_CHARSET, -1);
3068 name = MPLIST_SYMBOL (pl);
3069 canonicalized = msymbol__canonicalize (name);
3070 pl = mplist__from_plist (MPLIST_NEXT (pl));
3071 mplist_push (pl, Msymbol, name);
3072 definitions = mplist_add (definitions, canonicalized, pl);
3073 aliases = mplist_get (pl, Maliases);
3074 if (aliases)
3075 MPLIST_DO (aliases, aliases)
3076 if (MPLIST_SYMBOL_P (aliases))
3077 {
3078 name = MPLIST_SYMBOL (aliases);
3079 canonicalized = msymbol__canonicalize (name);
3080 definitions = mplist_add (definitions, canonicalized, pl);
3081 M17N_OBJECT_REF (pl);
3082 }
3083 }
3084
3085 M17N_OBJECT_UNREF (def_list);
3086 MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to parse the loaded data."));
3087 MDEBUG_POP_TIME ();
3088 return 0;
3089 }
3090
3091 /*** @} */
3092 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3093
3094
3095 /* External API */
3096
3097 /*** @addtogroup m17nConv */
3098 /*** @{ */
3099 /*=*/
3100
3101 /***en @name Variables: Symbols representing coding systems */
3102 /***ja @name �ѿ�: ����Ѥߥ����ɷϤ���ꤹ�뤿��Υ���ܥ� */
3103 /*** @{ */
3104 /*=*/
3105
3106 /***en
3107 @brief Symbol for the coding system US-ASCII.
3108
3109 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3110 represents a coding system for the CES US-ASCII. */
3111
3112 /***ja
3113 @brief US-ASCII �����ɷϤΥ���ܥ�.
3114
3115 ����ܥ� #Mcoding_us_ascii �� <tt>"us-ascii"</tt> �Ȥ���̾���������
3116 CES US-ASCII �ѤΥ����ɷϤ���
3117 */
3118 MSymbol Mcoding_us_ascii;
3119 /*=*/
3120
3121 /***en
3122 @brief Symbol for the coding system ISO-8859-1.
3123
3124 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3125 represents a coding system for the CES ISO-8859-1. */
3126
3127 /***ja
3128 @brief ISO-8859-1 �����ɷϤΥ���ܥ�.
3129
3130 ����ܥ� #Mcoding_iso_8859_1 �� <tt>"iso-8859-1"</tt>
3131 �Ȥ���̾���������CES ISO-8859-1 �ѤΥ����ɷϤ��� */
3132
3133 MSymbol Mcoding_iso_8859_1;
3134 /*=*/
3135
3136 /***en
3137 @brief Symbol for the coding system UTF-8.
3138
3139 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3140 a coding system for the CES UTF-8. */
3141
3142 /***ja
3143 @brief UTF-8 �����ɷϤΥ���ܥ�.
3144
3145 ����ܥ� #Mcoding_utf_8 �� <tt>"utf-8"</tt> �Ȥ���̾���������CES
3146 UTF-8 �ѤΥ����ɷϤ���
3147 */
3148
3149 MSymbol Mcoding_utf_8;
3150 /*=*/
3151
3152 /***en
3153 @brief Symbol for the coding system UTF-8-FULL.
3154
3155 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3156 represents a coding system that is a extension of UTF-8. This
3157 coding system uses the same encoding algorithm as UTF-8 but is not
3158 limited to the Unicode characters. It can encode all characters
3159 supported by the m17n library. */
3160
3161 /***ja
3162 @brief UTF-8-FULL �����ɷϤΥ���ܥ�.
3163
3164 ����ܥ� #Mcoding_utf_8_full �� <tt>"utf-8-full"</tt>
3165 �Ȥ���̾���������<tt>"UTF-8"</tt> �γ�ĥ�Ǥ��륳���ɷϤ���
3166 ���Υ����ɷϤ� UTF-8 ��Ʊ�������ǥ����르�ꥺ����Ѥ��뤬���оݤ�
3167 Unicode ʸ���ˤϸ��ꤵ��ʤ���
3168 �ޤ�m17n �饤�֥�꤬�������Ƥ�ʸ�����ɤ��뤳�Ȥ��Ǥ��롣
3169 */
3170
3171 MSymbol Mcoding_utf_8_full;
3172 /*=*/
3173
3174 /***en
3175 @brief Symbol for the coding system UTF-16.
3176
3177 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3178 represents a coding system for the CES UTF-16 (RFC 2279). */
3179 /***ja
3180 @brief UTF-16 �����ɷϤΥ���ܥ�.
3181
3182 ����ܥ� #Mcoding_utf_16 �� <tt>"utf-16"</tt> �Ȥ���̾���������
3183 CES UTF-16 (RFC 2279) �ѤΥ����ɷϤ���
3184 */
3185
3186 MSymbol Mcoding_utf_16;
3187 /*=*/
3188
3189 /***en
3190 @brief Symbol for the coding system UTF-16BE.
3191
3192 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3193 represents a coding system for the CES UTF-16BE (RFC 2279). */
3194
3195 /***ja
3196 @brief UTF-16BE �����ɷϤΥ���ܥ�.
3197
3198 ����ܥ� #Mcoding_utf_16be �� <tt>"utf-16be"</tt> �Ȥ���̾���������
3199 CES UTF-16BE (RFC 2279) �ѤΥ����ɷϤ��� */
3200
3201 MSymbol Mcoding_utf_16be;
3202 /*=*/
3203
3204 /***en
3205 @brief Symbol for the coding system UTF-16LE.
3206
3207 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3208 represents a coding system for the CES UTF-16LE (RFC 2279). */
3209
3210 /***ja
3211 @brief UTF-16LE �����ɷϤΥ���ܥ�.
3212
3213 ����ܥ� #Mcoding_utf_16le �� <tt>"utf-16le"</tt> �Ȥ���̾���������
3214 CES UTF-16LE (RFC 2279) �ѤΥ����ɷϤ��� */
3215
3216 MSymbol Mcoding_utf_16le;
3217 /*=*/
3218
3219 /***en
3220 @brief Symbol for the coding system UTF-32.
3221
3222 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3223 represents a coding system for the CES UTF-32 (RFC 2279). */
3224
3225 /***ja
3226 @brief UTF-32 �����ɷϤΥ���ܥ�.
3227
3228 ����ܥ� #Mcoding_utf_32 �� <tt>"utf-32"</tt> �Ȥ���̾���������
3229 CES UTF-32 (RFC 2279) �ѤΥ����ɷϤ��� */
3230
3231 MSymbol Mcoding_utf_32;
3232 /*=*/
3233
3234 /***en
3235 @brief Symbol for the coding system UTF-32BE.
3236
3237 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3238 represents a coding system for the CES UTF-32BE (RFC 2279). */
3239 /***ja
3240 @brief UTF-32BE �����ɷϤΥ���ܥ�.
3241
3242 ����ܥ� #Mcoding_utf_32be �� <tt>"utf-32be"</tt> �Ȥ���̾���������
3243 CES UTF-32BE (RFC 2279) �ѤΥ����ɷϤ��� */
3244
3245 MSymbol Mcoding_utf_32be;
3246 /*=*/
3247
3248 /***en
3249 @brief Symbol for the coding system UTF-32LE.
3250
3251 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3252 represents a coding system for the CES UTF-32LE (RFC 2279). */
3253 /***ja
3254 @brief UTF-32LE �����ɷϤΥ���ܥ�.
3255
3256 ����ܥ� #Mcoding_utf_32le �� <tt>"utf-32le"</tt> �Ȥ���̾���������
3257 CES UTF-32LE (RFC 2279) �ѤΥ����ɷϤ��� */
3258
3259 MSymbol Mcoding_utf_32le;
3260 /*=*/
3261
3262 /***en
3263 @brief Symbol for the coding system SJIS.
3264
3265 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3266 system for the CES Shift-JIS. */
3267 /***ja
3268 @brief SJIS �����ɷϤΥ���ܥ�.
3269
3270 ����ܥ� #Mcoding_sjis has �� <tt>"sjis"</tt> �Ȥ���̾���������
3271 CES Shift-JIS�ѤΥ����ɷϤ��� */
3272
3273 MSymbol Mcoding_sjis;
3274 /*** @} */
3275 /*=*/
3276
3277 /***en
3278 @name Variables: Parameter keys for mconv_define_coding (). */
3279 /***ja
3280 @name �ѿ�: mconv_define_coding () �ѥѥ������� */
3281 /*** @{ */
3282 /*=*/
3283
3284 /***en
3285 Parameter key for mconv_define_coding () (which see). */
3286 /***ja
3287 mconv_define_coding () �ѥѥ������� (�ܺ٤� mconv_define_coding ()����). */
3288 MSymbol Mtype;
3289 MSymbol Mcharsets;
3290 MSymbol Mflags;
3291 MSymbol Mdesignation;
3292 MSymbol Minvocation;
3293 MSymbol Mcode_unit;
3294 MSymbol Mbom;
3295 MSymbol Mlittle_endian;
3296 /*** @} */
3297 /*=*/
3298
3299 /***en
3300 @name Variables: Symbols representing coding system types. */
3301 /***ja
3302 @name �ѿ��� �����ɷϤΥ����פ�����ܥ�. */
3303 /*** @{ */
3304 /*=*/
3305
3306 /***en
3307 Symbol that can be a value of the #Mtype parameter of a coding
3308 system used in an argument to the mconv_define_coding () function
3309 (which see). */
3310 /***ja
3311 �ؿ� mconv_define_coding () �ΰ����Ȥ����Ѥ����륳���ɷϤΥѥ���
3312 #Mtype ���ͤȤʤ����륷��ܥ롣(�ܺ٤�
3313 mconv_define_coding ()����)�� */
3314
3315 MSymbol Mutf;
3316 /*=*/
3317 MSymbol Miso_2022;
3318 /*=*/
3319 /*** @} */
3320 /*=*/
3321
3322 /***en
3323 @name Variables: Symbols appearing in the value of Mflags parameter. */
3324 /***ja
3325 @name �ѿ��� �ѥ��� Mflags ���ͤȤʤ����륷��ܥ�. */
3326 /*** @{ */
3327 /***en
3328 Symbols that can be a value of the @b Mflags parameter of a coding
3329 system used in an argument to the mconv_define_coding () function
3330 (which see). */
3331 /***ja
3332 �ؿ� mconv_define_coding () �ΰ����Ȥ����Ѥ����륳���ɷϤΥѥ���
3333 @b Mflags ���ͤȤʤ����륷��ܥ롣(�ܺ٤�
3334 mconv_define_coding ()����)�� */
3335 MSymbol Mreset_at_eol;
3336 MSymbol Mreset_at_cntl;
3337 MSymbol Meight_bit;
3338 MSymbol Mlong_form;
3339 MSymbol Mdesignation_g0;
3340 MSymbol Mdesignation_g1;
3341 MSymbol Mdesignation_ctext;
3342 MSymbol Mdesignation_ctext_ext;
3343 MSymbol Mlocking_shift;
3344 MSymbol Msingle_shift;
3345 MSymbol Msingle_shift_7;
3346 MSymbol Meuc_tw_shift;
3347 MSymbol Miso_6429;
3348 MSymbol Mrevision_number;
3349 MSymbol Mfull_support;
3350 /*** @} */
3351 /*=*/
3352
3353 /***en
3354 @name Variables: Others
3355
3356 Remaining variables. */
3357 /***ja @name �ѿ�: ����¾
3358
3359 �ۤ����ѿ��� */
3360 /*** @{ */
3361 /*=*/
3362 /***en
3363 @brief Symbol whose name is "maybe".
3364
3365 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3366 used a value of @b Mbom parameter of the function
3367 mconv_define_coding () (which see). */
3368 /***ja
3369 @brief "maybe"�Ȥ���̾������ĥ���ܥ�.
3370
3371 �ѿ� #Mmaybe �� <tt>"maybe"</tt> �Ȥ���̾������ġ�����ϴؿ�
3372 mconv_define_coding () �ѥ��� @b Mbom ���ͤȤ����Ѥ����롣
3373 (�ܺ٤� mconv_define_coding () ����)�� */
3374
3375 MSymbol Mmaybe;
3376 /*=*/
3377
3378 /***en
3379 @brief The symbol @c Mcoding.
3380
3381 Any decoded M-text has a text property whose key is the predefined
3382 symbol @c Mcoding. The name of @c Mcoding is
3383 <tt>"coding"</tt>. */
3384
3385 /***ja
3386 @brief ����ܥ� @c Mcoding.
3387
3388 �ǥ����ɤ��줿 M-text �Ϥ��٤ơ�����������Ѥߥ���ܥ� @c Mcoding
3389 �Ǥ���褦�ʥƥ����ȥץ�ѥƥ�����ġ�����ܥ� @c Mcoding ��
3390 <tt>"coding"</tt> �Ȥ���̾������ġ� */
3391
3392 MSymbol Mcoding;
3393 /*=*/
3394 /*** @} */
3395
3396 /***en
3397 @brief Define a coding system.
3398
3399 The mconv_define_coding () function defines a new coding system
3400 and makes it accessible via a symbol whose name is $NAME. $PLIST
3401 specifies parameters of the coding system as below:
3402
3403 <ul>
3404
3405 <li> Key is @c Mtype, value is a symbol
3406
3407 The value specifies the type of the coding system. It must be
3408 @b Mcharset, @b Mutf, @b Miso_2022, or @b Mnil.
3409
3410 If the type is @b Mcharset, $EXTRA_INFO is ignored.
3411
3412 If the type is @b Mutf, $EXTRA_INFO must be a pointer to
3413 #MCodingInfoUTF.
3414
3415 If the type is @b Miso_2022, $EXTRA_INFO must be a pointer to
3416 #MCodingInfoISO2022.
3417
3418 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3419 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3420 they can be @c NULL and the m17n library provides proper defaults.
3421
3422 <li> Key is @b Mcharsets, value is a plist
3423
3424 The value specifies a list charsets supported by the coding
3425 system. The keys of the plist must be #Msymbol, and the values
3426 must be symbols representing charsets.
3427
3428 <li> Key is @b Mflags, value is a plist
3429
3430 If the type is @b Miso_2022, the values specifies flags to control
3431 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3432 and values must be one of the following.
3433
3434 <ul>
3435
3436 <li> @b Mreset_at_eol
3437
3438 If this flag exists, designation and invocation status is reset to
3439 the initial state at the end of line.
3440
3441 <li> @b Mreset_at_cntl
3442
3443 If this flag exists, designation and invocation status is reset to
3444 the initial state at a control character.
3445
3446 <li> @b Meight_bit
3447
3448 If this flag exists, the graphic plane right is used.
3449
3450 <li> @b Mlong_form
3451
3452 If this flag exists, the over-long escape sequences (ESC '$' '('
3453 \<final_byte\>) are used for designating the CCS JISX0208.1978,
3454 GB2312, and JISX0208.
3455
3456 <li> @b Mdesignation_g0
3457
3458 If this flag and @b Mfull_support exists, designates charsets not
3459 listed in the charset list to the graphic register G0.
3460
3461 <li> @b Mdesignation_g1
3462
3463 If this flag and @b Mfull_support exists, designates charsets not
3464 listed in the charset list to the graphic register G1.
3465
3466 <li> @b Mdesignation_ctext
3467
3468 If this flag and @b Mfull_support exists, designates charsets not
3469 listed in the charset list to a graphic register G0 or G1 based on
3470 the criteria of the Compound Text.
3471
3472 <li> @b Mdesignation_ctext_ext
3473
3474 If this flag and @b Mfull_support exists, designates charsets not
3475 listed in the charset list to a graphic register G0 or G1, or use
3476 extended segment for such charsets based on the criteria of the
3477 Compound Text.
3478
3479 <li> @b Mlocking_shift
3480
3481 If this flag exists, use locking shift.
3482
3483 <li> @b Msingle_shift
3484
3485 If this flag exists, use single shift.
3486
3487 <li> @b Msingle_shift_7
3488
3489 If this flag exists, use 7-bit single shift code (0x19).
3490
3491 <li> @b Meuc_tw_shift
3492
3493 If this flag exists, use a special shifting according to EUC-TW.
3494
3495 <li> @b Miso_6429
3496
3497 This flag is currently ignored.
3498
3499 <li> @b Mrevision_number
3500
3501 If this flag exists, use a revision number escape sequence to
3502 designate a charset that has a revision number.
3503
3504 <li> @b Mfull_support
3505
3506 If this flag exists, support all charsets registered in the
3507 International Registry.
3508
3509 </ul>
3510
3511 <li> Key is @b Mdesignation, value is a plist
3512
3513 If the type is @b Miso_2022, the value specifies how to designate
3514 each supported characters. The keys of the plist must be
3515 #Minteger, and the values must be numbers indicating a graphic
3516 registers. The Nth element value is for the Nth charset of the
3517 charset list. The value 0..3 means that it is assumed that a
3518 charset is already designated to the graphic register 0..3. The
3519 negative value G (-4..-1) means that a charset is not designated
3520 to any register at first, and if necessary, is designated to the
3521 (G+4) graphic register.
3522
3523 <li> Key is @b Minvocation, value is a plist
3524
3525 If the type is @b Miso_2022, the value specifies how to invocate
3526 each graphic registers. The plist length must be one or two. The
3527 keys of the plist must be #Minteger, and the values must be
3528 numbers indicating a graphic register. The value of the first
3529 element specifies which graphic register is invocated to the
3530 graphic plane left. If the length is one, no graphic register is
3531 invocated to the graphic plane right. Otherwise, the value of the
3532 second element specifies which graphic register is invocated to
3533 the graphic plane right.
3534
3535 <li> Key is @b Mcode_unit, value is an integer
3536
3537 If the type is @b Mutf, the value specifies the bit length of a
3538 code-unit. It must be 8, 16, or 32.
3539
3540 <li> Key is @b Mbom, value is a symbol
3541
3542 If the type is @b Mutf and the code-unit bit length is 16 or 32,
3543 it specifies whether or not to use BOM (Byte Order Mark). If the
3544 value is #Mnil (default), BOM is not used, else if the value is
3545 #Mmaybe, the existence of BOM is detected at decoding time, else
3546 BOM is used.
3547
3548 <li> Key is @b Mlittle_endian, value is a symbol
3549
3550 If the type is @b Mutf and the code-unit bit length is 16 or 32,
3551 it specifies whether or not the encoding is little endian. If the
3552 value is #Mnil (default), it is big endian, else it is little
3553 endian.
3554
3555 </ul>
3556
3557 $RESETTER is a pointer to a function that resets a converter for
3558 the coding system to the initial status. The pointed function is
3559 called with one argument, a pointer to a converter object.
3560
3561 $DECODER is a pointer to a function that decodes a byte sequence
3562 according to the coding system. The pointed function is called
3563 with four arguments:
3564
3565 @li A pointer to the byte sequence to decode.
3566 @li The number of bytes to decode.
3567 @li A pointer to an M-text to which the decoded characters are appended.
3568 @li A pointer to a converter object.
3569
3570 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3571
3572 $ENCODER is a pointer to a function that encodes an M-text
3573 according to the coding system. The pointed function is called
3574 with six arguments:
3575
3576 @li A pointer to the M-text to encode.
3577 @li The starting position of the encoding.
3578 @li The ending position of the encoding.
3579 @li A pointer to a memory area where the produced bytes are stored.
3580 @li The size of the memory area.
3581 @li A pointer to a converter object.
3582
3583 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3584
3585 $EXTRA_INFO is a pointer to a data structure that contains extra
3586 information about the coding system. The type of the data
3587 structure depends on $TYPE.
3588
3589 @return
3590
3591 If the operation was successful, mconv_define_coding () returns a
3592 symbol whose name is $NAME. If an error is detected, it returns
3593 #Mnil and assigns an error code to the external variable #merror_code. */
3594
3595 /***ja
3596 @brief �����ɷϤ��������.
3597
3598 �ؿ� mconv_define_coding () �ϡ������������ɷϤ�������������
3599 $NAME �Ȥ���̾���Υ���ܥ��ͳ�ǥ��������Ǥ���褦�ˤ��롣 $PLIST
3600 �Ǥ�������륳���ɷϤΥѥ�����ʲ��Τ褦�˻��ꤹ�롣
3601
3602 <ul>
3603
3604 <li> ������ @c Mtype ���ͤ�����ܥ�λ�
3605
3606 �ͤϥ����ɷϤΥ����פ�ɽ����@b Mcharset, @b Mutf, @b Miso_2022, #Mnil
3607 �Τ����줫�Ǥʤ��ƤϤʤ�ʤ���
3608
3609 �����פ� @b Mcharset �ʤ�� $EXTRA_INFO ��̵�뤵��롣
3610
3611 �����פ� @b Mutf �ʤ�� $EXTRA_INFO �� #MCodingInfoUTF
3612 �ؤΥݥ��Ǥʤ��ƤϤʤ�ʤ���
3613
3614 �����פ� @b Miso_2022�ʤ�� $EXTRA_INFO �� #MCodingInfoISO2022
3615 �ؤΥݥ��Ǥʤ��ƤϤʤ�ʤ���
3616
3617 �����פ� #Mnil �ʤ�С����� $RESETTER, $DECODER, $ENCODER
3618 ��Ϳ���ʤ��ƤϤʤ�ʤ���$EXTRA_INFO ��̵�뤵��롣
3619 ����ʳ��ξ��ˤϤ����� @c NULL �Ǥ褯��
3620 m17n �饤�֥�꤬Ŭ�ڤʥǥե�����ͤ�Ϳ���롣
3621
3622 <li> ������ @b Mcharsets ���ͤ� plist �λ�
3623
3624 �ͤϤ��Υ����ɷϤǥ��ݡ��Ȥ����ʸ�����åȤΥꥹ�ȤǤ��롣plist�Υ�����
3625 #Msymbol���ͤ�ʸ�����åȤ�����ܥ�Ǥʤ��ƤϤʤ�ʤ���
3626
3627 <li> ������ @b Mflags �ͤ� plist �λ�
3628
3629 �����פ� @b Miso_2022 �ʤ�С������ͤ�, ISO 2022
3630 ���ץ�Ѥ�����ե�å�����plist �Υ����� #Msymbol
3631 �Ǥ��ꡢ�ͤϰʲ��Τ����줫�Ǥ��롣
3632
3633 <ul>
3634
3635 <li> @b Mreset_at_eol
3636
3637 ���Υե饰������С���ʸ������λؼ���ƽФϹ����ǥꥻ�åȤ��������ξ��֤���롣
3638
3639 <li> @b Mreset_at_cntl
3640
3641 ���Υե饰������С���ʸ������λؼ���ƽФ�����ʸ���˽в�ä������ǥꥻ�åȤ��������ξ��֤���롣
3642
3643 <li> @b Meight_bit
3644
3645 ���Υե饰������С���ʸ������α�Ⱦ�̤��Ѥ����롣
3646
3647 <li> @b Mlong_form
3648
3649 ���Υե饰������С�ʸ������ JISX0208.1978, GB2312, JISX0208
3650 ��ؼ�����ݤ� over-long ���������ץ������� (ESC '$' '('
3651 \<final_byte\>) ���Ѥ����롣
3652
3653 <li> @b Mdesignation_g0
3654
3655 ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3656 G0 ����˻ؼ����롣
3657
3658 <li> @b Mdesignation_g1
3659
3660 ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3661 G1 ����˻ؼ����롣
3662
3663 <li> @b Mdesignation_ctext
3664
3665 ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3666 G0 ����ޤ��� G1 ����ˡ�����ѥ���ɥƥ����Ȥδ��ˤ��äƻؼ����롣
3667
3668 <li> @b Mdesignation_ctext_ext
3669
3670 ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3671 G0 ����ޤ��� G1 ����ˡ����뤤�ϳ�ĥ�������Ȥ˥���ѥ���ɥƥ����Ȥδ��ˤ��äƻؼ����롣
3672
3673 <li> @b Mlocking_shift
3674
3675 ���Υե饰������С���å����եȤ��Ѥ��롣
3676
3677 <li> @b Msingle_shift
3678
3679 ���Υե饰������С����륷�եȤ��Ѥ��롣
3680
3681 <li> @b Msingle_shift_7
3682
3683 ���Υե饰������С�7-bit ���륷�եȥ����� (0x19) ���Ѥ��롣
3684
3685 <li> @b Meuc_tw_shift
3686
3687 ���Υե饰������С�EUC-TW �˱�ä����̤ʥ��եȤ��Ѥ��롣
3688
3689 <li> @b Miso_6429
3690
3691 �������Ǥ��Ѥ����Ƥ��ʤ���
3692
3693 <li> @b Mrevision_number
3694
3695 ���Υե饰������С�revision number �����ʸ�����åȤ�ؼ�����ݤ�
3696 revision number ���������ץ����������Ѥ��롣
3697
3698 <li> @b Mfull_support
3699
3700 ���Υե饰������С�the International Registry
3701 ����Ͽ����Ƥ�����ʸ�����åȤݡ��Ȥ��롣
3702
3703 </ul>
3704
3705 <li> ������ @b Mdesignation ���ͤ� plist �λ�
3706
3707 �����פ� @b Miso_2022 �ʤ�С��ͤϳ�ʸ����ɤΤ褦�˻ؼ����뤫����
3708 plist �Υ����� #Minteger���ͤϽ����graphic register��
3709 �������Ǥ��롣N���ܤ����Ǥ��ͤϡ�ʸ�����åȥꥹ�Ȥ� N
3710 ���ܤ�ʸ�����åȤ��б����롣�ͤ� 0..3 �Ǥ���С�ʸ�����åȤ����Ǥ�
3711 G0..G3 �˻ؼ� ����Ƥ��롣
3712
3713 �ͤ���(-4..-1) �Ǥ���С�������֤Ǥ�ʸ�����åȤ��ɤ��ˤ�ؼ�����Ƥ��ʤ����ȡ�ɬ�פʺݤˤ�
3714 G0..G3 �Τ��줾��˻ؼ����뤳�Ȥ��̣���롣
3715
3716 <li> ������ @b Minvocation ���ͤ� plist �λ�
3717
3718 �����פ� @b Miso_2022 �ʤ�С��ͤϳƽ����ɤΤ褦�˸ƤӽФ�������
3719 plist ��Ĺ���� 1 �ʤ��� 2 �Ǥ��롣plist �Υ�����
3720 #Minteger���ͤϽ����graphic register)�������Ǥ��롣
3721 �ǽ�����Ǥ��ͤ���ʸ�����纸Ⱦ�̤˸ƤӽФ���뽸�����
3722 plist ��Ĺ���� 1 �ʤ�С���Ⱦ�̤ˤϲ���ƤӽФ���ʤ���
3723 �����Ǥ���С����Ĥ�����Ǥ��ͤ���ʸ�����籦Ⱦ�̤˸ƤӽФ���뽸�����
3724
3725 <li> ������ @b Mcode_unit ���ͤ������ͤλ�
3726
3727 �����פ� @b Mutf �ʤ�С��ͤϥ����ɥ�˥åȤΥӥå�Ĺ�Ǥ��ꡢ8, 16,
3728 32 �Τ����줫�Ǥ��롣
3729
3730 <li> ������ @b Mbom ���ͤ�����ܥ�λ�
3731
3732 �����פ� @b Mutf �ǥ����ɥ�˥åȤΥӥå�Ĺ�� 16 �� 32�ʤ�С��ͤ�
3733 BOM (Byte Order Mark) ����Ѥ��뤫�ɤ��������ͤ��ǥե�����ͤ�
3734 #Mnil �ʤ�С����Ѥ��ʤ����ͤ� #Mmaybe �ʤ�Хǥ����ɻ��� BOM
3735 �����뤫�ɤ�����Ĵ�٤롣����ʳ��ʤ�л��Ѥ��롣
3736
3737 <li> ������ @b Mlittle_endian ���ͤ�����ܥ�λ�
3738
3739 �����פ� @b Mutf �ǥ����ɥ�˥åȤΥӥå�Ĺ�� 16 �� 32
3740 �ʤ�С��ͤϥ����ɤ� little endian ���ɤ��������ͤ��ǥե�����ͤ�
3741 #Mnil �ʤ�� big endian �Ǥ��ꡢ�����Ǥʤ���� little endian �Ǥ��롣
3742
3743 </ul>
3744
3745 $RESETTER
3746 �Ϥ��Υ����ɷ��ѤΥ���С����������֤˥ꥻ�åȤ���ؿ��ؤΥݥ��Ǥ��롣
3747 ���δؿ��ϥ���С������֥������ȤؤΥݥ��Ȥ�����������Ȥ롣
3748
3749 $DECODER �ϥХ�����Υ����ɷϤ˽��äƥǥ����ɤ���ؿ��ؤΥݥ��Ǥ��롣
3750 ���δؿ��ϰʲ��Σ�������Ȥ롣
3751
3752 @li �ǥ����ɤ���Х�����ؤΥݥ���
3753 @li �ǥ����ɤ��٤��Х��ȿ�
3754 @li �ǥ����ɷ�̤�ʸ�����ղä��� M-text �ؤΥݥ���
3755 @li ����С������֥������ȤؤΥݥ���
3756
3757 $DECODER �����������Ȥ��ˤ� 0 ���Ԥ����Ȥ��ˤ� -1
3758 ���֤��ʤ��ƤϤʤ�ʤ���
3759
3760 $ENCODER �� M-text �Υ����ɷϤ˽��äƥ����ɤ���ؿ��ؤΥݥ��Ǥ��롣
3761 ���δؿ��ϰʲ��Σ�������Ȥ롣
3762
3763 @li �����ɤ���M-text �ؤΥݥ���
3764 @li M-text �Υ����ɳ��ϰ���
3765 @li M-text �Υ����ɽ�λ����
3766 @li ���������Х��Ȥ��ݻ���������ΰ�ؤΥݥ���
3767 @li �����ΰ�Υ�����
3768 @li ����С������֥������ȤؤΥݥ���
3769
3770 $ENCODER �����������Ȥ��ˤ� 0 ���Ԥ����Ȥ��ˤ� -1
3771 ���֤��ʤ��ƤϤʤ�ʤ���
3772
3773 $EXTRA_INFO �ϥ����ǥ��������ƥ�˴ؤ����ɲþ����ޤ�ǡ�����¤�ؤΥݥ��Ǥ��롣
3774 ���Υǡ�����¤�η� $TYPE �˰�¸���롣
3775
3776 @return
3777
3778 ��������������� mconv_define_coding () �� $NAME
3779 �Ȥ���̾���Υ���ܥ���֤��� ���顼�����Ф��줿���� #Mnil
3780 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
3781 */
3782
3783 /***
3784 @errors
3785 @c MERROR_CODING */
3786
3787 MSymbol
mconv_define_coding(const char * name,MPlist * plist,int (* resetter)(MConverter *),int (* decoder)(const unsigned char *,int,MText *,MConverter *),int (* encoder)(MText *,int,int,unsigned char *,int,MConverter *),void * extra_info)3788 mconv_define_coding (const char *name, MPlist *plist,
3789 int (*resetter) (MConverter *),
3790 int (*decoder) (const unsigned char *, int, MText *,
3791 MConverter *),
3792 int (*encoder) (MText *, int, int,
3793 unsigned char *, int,
3794 MConverter *),
3795 void *extra_info)
3796 {
3797 MSymbol sym = msymbol (name);
3798 int i;
3799 MCodingSystem *coding;
3800 MPlist *pl;
3801
3802 MSTRUCT_MALLOC (coding, MERROR_CODING);
3803 coding->name = sym;
3804 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3805 coding->type = Mcharset;
3806 pl = (MPlist *) mplist_get (plist, Mcharsets);
3807 if (! pl)
3808 MERROR (MERROR_CODING, Mnil);
3809 coding->ncharsets = mplist_length (pl);
3810 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3811 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3812 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3813 {
3814 MSymbol charset_name;
3815
3816 if (MPLIST_KEY (pl) != Msymbol)
3817 MERROR (MERROR_CODING, Mnil);
3818 charset_name = MPLIST_SYMBOL (pl);
3819 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3820 MERROR (MERROR_CODING, Mnil);
3821 }
3822
3823 coding->resetter = resetter;
3824 coding->decoder = decoder;
3825 coding->encoder = encoder;
3826 coding->ascii_compatible = 0;
3827 coding->extra_info = extra_info;
3828 coding->extra_spec = NULL;
3829 coding->ready = 0;
3830
3831 if (coding->type == Mcharset)
3832 {
3833 if (! coding->resetter)
3834 coding->resetter = reset_coding_charset;
3835 if (! coding->decoder)
3836 coding->decoder = decode_coding_charset;
3837 if (! coding->encoder)
3838 coding->encoder = encode_coding_charset;
3839 }
3840 else if (coding->type == Mutf)
3841 {
3842 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3843 MSymbol val;
3844
3845 if (! coding->resetter)
3846 coding->resetter = reset_coding_utf;
3847
3848 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3849 if (info->code_unit_bits == 8)
3850 {
3851 if (! coding->decoder)
3852 coding->decoder = decode_coding_utf_8;
3853 if (! coding->encoder)
3854 coding->encoder = encode_coding_utf_8;
3855 }
3856 else if (info->code_unit_bits == 16)
3857 {
3858 if (! coding->decoder)
3859 coding->decoder = decode_coding_utf_16;
3860 if (! coding->encoder)
3861 coding->encoder = encode_coding_utf_16;
3862 }
3863 else if (info->code_unit_bits == 32)
3864 {
3865 if (! coding->decoder)
3866 coding->decoder = decode_coding_utf_32;
3867 if (! coding->encoder)
3868 coding->encoder = encode_coding_utf_32;
3869 }
3870 else
3871 MERROR (MERROR_CODING, Mnil);
3872 val = (MSymbol) mplist_get (plist, Mbom);
3873 if (val == Mnil)
3874 info->bom = 1;
3875 else if (val == Mmaybe)
3876 info->bom = 0;
3877 else
3878 info->bom = 2;
3879
3880 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3881 coding->extra_info = info;
3882 }
3883 else if (coding->type == Miso_2022)
3884 {
3885 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3886
3887 if (! coding->resetter)
3888 coding->resetter = reset_coding_iso_2022;
3889 if (! coding->decoder)
3890 coding->decoder = decode_coding_iso_2022;
3891 if (! coding->encoder)
3892 coding->encoder = encode_coding_iso_2022;
3893
3894 info->initial_invocation[0] = 0;
3895 info->initial_invocation[1] = -1;
3896 pl = (MPlist *) mplist_get (plist, Minvocation);
3897 if (pl)
3898 {
3899 if (MPLIST_KEY (pl) != Minteger)
3900 MERROR (MERROR_CODING, Mnil);
3901 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3902 if (! MPLIST_TAIL_P (pl))
3903 {
3904 pl = MPLIST_NEXT (pl);
3905 if (MPLIST_KEY (pl) != Minteger)
3906 MERROR (MERROR_CODING, Mnil);
3907 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3908 }
3909 }
3910 memset (info->designations, 0, sizeof (info->designations));
3911 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3912 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3913 i++, pl = MPLIST_NEXT (pl))
3914 info->designations[i] = MPLIST_INTEGER (pl);
3915
3916 info->flags = 0;
3917 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3918 {
3919 MSymbol val;
3920
3921 if (MPLIST_KEY (pl) != Msymbol)
3922 MERROR (MERROR_CODING, Mnil);
3923 val = MPLIST_SYMBOL (pl);
3924 if (val == Mreset_at_eol)
3925 info->flags |= MCODING_ISO_RESET_AT_EOL;
3926 else if (val == Mreset_at_cntl)
3927 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3928 else if (val == Meight_bit)
3929 info->flags |= MCODING_ISO_EIGHT_BIT;
3930 else if (val == Mlong_form)
3931 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3932 else if (val == Mdesignation_g0)
3933 info->flags |= MCODING_ISO_DESIGNATION_G0;
3934 else if (val == Mdesignation_g1)
3935 info->flags |= MCODING_ISO_DESIGNATION_G1;
3936 else if (val == Mdesignation_ctext)
3937 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3938 else if (val == Mdesignation_ctext_ext)
3939 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3940 else if (val == Mlocking_shift)
3941 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3942 else if (val == Msingle_shift)
3943 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3944 else if (val == Msingle_shift_7)
3945 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3946 else if (val == Meuc_tw_shift)
3947 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3948 else if (val == Miso_6429)
3949 info->flags |= MCODING_ISO_ISO6429;
3950 else if (val == Mrevision_number)
3951 info->flags |= MCODING_ISO_REVISION_NUMBER;
3952 else if (val == Mfull_support)
3953 info->flags |= MCODING_ISO_FULL_SUPPORT;
3954 }
3955
3956 coding->extra_info = info;
3957 }
3958 else
3959 {
3960 if (! coding->decoder || ! coding->encoder)
3961 MERROR (MERROR_CODING, Mnil);
3962 if (! coding->resetter)
3963 coding->ready = 1;
3964 }
3965
3966 msymbol_put (sym, Mcoding, coding);
3967 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3968 plist = (MPlist *) mplist_get (plist, Maliases);
3969 if (plist)
3970 {
3971 MPLIST_DO (pl, plist)
3972 {
3973 MSymbol alias;
3974
3975 if (MPLIST_KEY (pl) != Msymbol)
3976 continue;
3977 alias = MPLIST_SYMBOL (pl);
3978 msymbol_put (alias, Mcoding, coding);
3979 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3980 }
3981 }
3982
3983 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3984
3985 return sym;
3986 }
3987
3988 /*=*/
3989
3990 /***en
3991 @brief Resolve coding system name.
3992
3993 The mconv_resolve_coding () function returns $SYMBOL if it
3994 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3995 a coding system name, and if the canonicalized name represents a
3996 coding system, return it. Otherwise, return #Mnil. */
3997 /***ja
3998 @brief �����ɷϤ�̾�����褹��.
3999
4000 �ؿ� mconv_resolve_coding () �� $SYMBOL �������ɷϤ��Ƥ���Ф�����֤���
4001 �����Ǥʤ���Х����ɷϤ�̾���Ȥ��� $SYMBOL
4002 ���������������줬�����ɷϤ�ɽ���Ƥ�������������� $SYMBOL ���֤���
4003 �����Ǥʤ����#Mnil ���֤��� */
4004
4005
4006
4007 MSymbol
mconv_resolve_coding(MSymbol symbol)4008 mconv_resolve_coding (MSymbol symbol)
4009 {
4010 MCodingSystem *coding = find_coding (symbol);
4011
4012 if (! coding)
4013 {
4014 symbol = msymbol__canonicalize (symbol);
4015 coding = find_coding (symbol);
4016 }
4017 return (coding ? coding->name : Mnil);
4018 }
4019
4020 /*=*/
4021
4022
4023 /***en
4024 @brief List symbols representing coding systems.
4025
4026 The mconv_list_codings () function makes an array of symbols
4027 representing a coding system, stores the pointer to the array in a
4028 place pointed to by $SYMBOLS, and returns the length of the array. */
4029 /***ja
4030 @brief �����ɷϤ�ɽ�魯����ܥ�����.
4031
4032 �ؿ� mchar_list_codings () �ϡ������ɷϤ�����ܥ���¤٤�������ꡢ
4033 $SYMBOLS �ǥݥ���Ȥ��줿���ˤ�������ؤΥݥ����֤��������Ĺ�����֤��� */
4034
4035 int
mconv_list_codings(MSymbol ** symbols)4036 mconv_list_codings (MSymbol **symbols)
4037 {
4038 int i = coding_list.used + mplist_length (coding_definition_list);
4039 int j;
4040 MPlist *plist;
4041
4042 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4043 i = 0;
4044 MPLIST_DO (plist, coding_definition_list)
4045 {
4046 MPlist *pl = MPLIST_VAL (plist);
4047 (*symbols)[i++] = MPLIST_SYMBOL (pl);
4048 }
4049 for (j = 0; j < coding_list.used; j++)
4050 if (! mplist_find_by_key (coding_definition_list,
4051 coding_list.codings[j]->name))
4052 (*symbols)[i++] = coding_list.codings[j]->name;
4053 return i;
4054 }
4055
4056 /*=*/
4057
4058 /***en
4059 @brief Create a code converter bound to a buffer.
4060
4061 The mconv_buffer_converter () function creates a pointer to a code
4062 converter for coding system $NAME. The code converter is bound
4063 to buffer area of $N bytes pointed to by $BUF. Subsequent
4064 decodings and encodings are done to/from this buffer area.
4065
4066 $NAME can be #Mnil. In this case, a coding system associated
4067 with the current locale (LC_CTYPE) is used.
4068
4069 @return
4070 If the operation was successful, mconv_buffer_converter () returns
4071 the created code converter. Otherwise it returns @c NULL and
4072 assigns an error code to the external variable #merror_code. */
4073
4074 /***ja
4075 @brief �Хåե��˷���դ���줿�����ɥ���С�������.
4076
4077 �ؿ� mconv_buffer_converter () �ϡ������ɷ� $NAME
4078 �ѤΥ����ɥ���С������롣���Υ����ɥ���С����ϡ�$BUF �Ǽ�������礭�� $N
4079 �Х��ȤΥХåե��ΰ�˷���դ����롣
4080 ����ʹߤΥǥ����ɤ���ӥ����ɤϡ����ΥХåե��ΰ���Ф��ƹԤʤ��롣
4081
4082 $NAME �� #Mnil �Ǥ��äƤ�褤�����ξ��ϸ��ߤΥ�����
4083 (LC_CTYPE) �˴�Ϣ�դ���줿�����ɷϤ��Ȥ��롣
4084
4085 @return
4086 �⤷��������������� mconv_buffer_converter () �� �������������ɥ���С������֤���
4087 �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4088 �˥��顼�����ɤ����ꤹ�롣
4089
4090 @latexonly \IPAlabel{mconverter} @endlatexonly */
4091
4092 /***
4093 @errors
4094 @c MERROR_SYMBOL, @c MERROR_CODING
4095
4096 @seealso
4097 mconv_stream_converter () */
4098
4099 MConverter *
mconv_buffer_converter(MSymbol name,const unsigned char * buf,int n)4100 mconv_buffer_converter (MSymbol name, const unsigned char *buf, int n)
4101 {
4102 MCodingSystem *coding;
4103 MConverter *converter;
4104 MConverterStatus *internal;
4105
4106 if (name == Mnil)
4107 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4108 coding = find_coding (name);
4109 if (! coding)
4110 MERROR (MERROR_CODING, NULL);
4111 MSTRUCT_CALLOC (converter, MERROR_CODING);
4112 MSTRUCT_CALLOC (internal, MERROR_CODING);
4113 converter->internal_info = internal;
4114 internal->coding = coding;
4115 if (coding->resetter
4116 && (*coding->resetter) (converter) < 0)
4117 {
4118 free (internal);
4119 free (converter);
4120 MERROR (MERROR_CODING, NULL);
4121 }
4122
4123 internal->unread = mtext ();
4124 internal->work_mt = mtext ();
4125 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4126 internal->buf.in = buf;
4127 internal->used = 0;
4128 internal->bufsize = n;
4129 internal->binding = BINDING_BUFFER;
4130
4131 return converter;
4132 }
4133
4134 /*=*/
4135
4136 /***en
4137 @brief Create a code converter bound to a stream.
4138
4139 The mconv_stream_converter () function creates a pointer to a code
4140 converter for coding system $NAME. The code converter is bound
4141 to stream $FP. Subsequent decodings and encodings are done
4142 to/from this stream.
4143
4144 $NAME can be #Mnil. In this case, a coding system associated
4145 with the current locale (LC_CTYPE) is used.
4146
4147 @return
4148 If the operation was successful, mconv_stream_converter ()
4149 returns the created code converter. Otherwise it returns @c NULL
4150 and assigns an error code to the external variable
4151 #merror_code. */
4152
4153 /***ja
4154 @brief ���ȥ��˷���դ���줿�����ɥ���С�������.
4155
4156 �ؿ� mconv_stream_converter () �ϡ������ɷ� $NAME
4157 �ѤΥ����ɥ���С������롣���Υ����ɥ���С����ϡ����ȥ�� $FP
4158 �˷���դ����롣
4159 ����ʹߤΥǥ����ɤ���ӥ����ɤϡ����Υ��ȥ����Ф��ƹԤʤ��롣
4160
4161 $NAME �� #Mnil �Ǥ��äƤ�褤�����ξ��ϸ��ߤΥ�����
4162 (LC_CTYPE) �˴�Ϣ�դ���줿�����ɷϤ��Ȥ��롣
4163
4164 @return
4165 �⤷��������������С�mconv_stream_converter ()
4166 �Ϻ������������ɥ���С������֤��������Ǥʤ���� @c NULL
4167 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
4168
4169 @latexonly \IPAlabel{mconverter} @endlatexonly */
4170
4171 /***
4172 @errors
4173 @c MERROR_SYMBOL, @c MERROR_CODING
4174
4175 @seealso
4176 mconv_buffer_converter () */
4177
4178 MConverter *
mconv_stream_converter(MSymbol name,FILE * fp)4179 mconv_stream_converter (MSymbol name, FILE *fp)
4180 {
4181 MCodingSystem *coding;
4182 MConverter *converter;
4183 MConverterStatus *internal;
4184
4185 if (name == Mnil)
4186 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4187 coding = find_coding (name);
4188 if (! coding)
4189 MERROR (MERROR_CODING, NULL);
4190 MSTRUCT_CALLOC (converter, MERROR_CODING);
4191 MSTRUCT_CALLOC (internal, MERROR_CODING);
4192 converter->internal_info = internal;
4193 internal->coding = coding;
4194 if (coding->resetter
4195 && (*coding->resetter) (converter) < 0)
4196 {
4197 free (internal);
4198 free (converter);
4199 MERROR (MERROR_CODING, NULL);
4200 }
4201
4202 if (fseek (fp, 0, SEEK_CUR) < 0)
4203 {
4204 if (errno == EBADF)
4205 {
4206 free (internal);
4207 free (converter);
4208 return NULL;
4209 }
4210 internal->seekable = 0;
4211 }
4212 else
4213 internal->seekable = 1;
4214 internal->unread = mtext ();
4215 internal->work_mt = mtext ();
4216 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4217 internal->fp = fp;
4218 internal->binding = BINDING_STREAM;
4219
4220 return converter;
4221 }
4222
4223 /*=*/
4224
4225 /***en
4226 @brief Reset a code converter.
4227
4228 The mconv_reset_converter () function resets code converter
4229 $CONVERTER to the initial state.
4230
4231 @return
4232 If $CONVERTER->coding has its own reseter function,
4233 mconv_reset_converter () returns the result of that function
4234 applied to $CONVERTER. Otherwise it returns 0. */
4235
4236 /***ja
4237 @brief �����ɥ���С�����ꥻ�åȤ���.
4238
4239 �ؿ� mconv_reset_converter () �ϥ����ɥ���С��� $CONVERTER
4240 �������֤��᤹��
4241
4242 @return
4243 �⤷ $CONVERTER->coding �˥ꥻ�å��Ѥδؿ����������Ƥ���ʤ�С�
4244 mconv_reset_converter () �Ϥ��δؿ��� $CONVERTER
4245 ��Ŭ�Ѥ�����̤��֤��������Ǥʤ����0���֤��� */
4246
4247 int
mconv_reset_converter(MConverter * converter)4248 mconv_reset_converter (MConverter *converter)
4249 {
4250 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4251
4252 converter->nchars = converter->nbytes = 0;
4253 converter->result = MCONVERSION_RESULT_SUCCESS;
4254 internal->carryover_bytes = 0;
4255 internal->used = 0;
4256 mtext_reset (internal->unread);
4257 if (internal->coding->resetter)
4258 return (*internal->coding->resetter) (converter);
4259 return 0;
4260 }
4261
4262 /*=*/
4263
4264 /***en
4265 @brief Free a code converter.
4266
4267 The mconv_free_converter () function frees the code converter
4268 $CONVERTER. */
4269
4270 /***ja
4271 @brief �����ɥ���С������������.
4272
4273 �ؿ� mconv_free_converter () �ϥ����ɥ���С��� $CONVERTER
4274 ��������롣 */
4275
4276 void
mconv_free_converter(MConverter * converter)4277 mconv_free_converter (MConverter *converter)
4278 {
4279 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4280
4281 M17N_OBJECT_UNREF (internal->work_mt);
4282 M17N_OBJECT_UNREF (internal->unread);
4283 free (internal);
4284 free (converter);
4285 }
4286
4287 /*=*/
4288
4289 /***en
4290 @brief Bind a buffer to a code converter.
4291
4292 The mconv_rebind_buffer () function binds buffer area of $N bytes
4293 pointed to by $BUF to code converter $CONVERTER. Subsequent
4294 decodings and encodings are done to/from this newly bound buffer
4295 area.
4296
4297 @return
4298 This function always returns $CONVERTER. */
4299
4300 /***ja
4301 @brief �����ɥ���С����˥Хåե��ΰ�����դ���.
4302
4303 �ؿ� mconv_rebind_buffer () �ϡ�$BUF �ˤ�äƻؤ��줿�礭�� $N
4304 �Х��ȤΥХåե��ΰ���ɥ���С��� $CONVERTER �˷���դ��롣
4305 ����ʹߤΥǥ����ɤ���ӥ����ɤϡ����ο����˷���դ���줿�Хåե��ΰ���Ф��ƹԤʤ���褦�ˤʤ롣
4306
4307 @return
4308 ���δؿ��Ͼ�� $CONVERTER ���֤���
4309
4310 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4311
4312 /***
4313 @seealso
4314 mconv_rebind_stream () */
4315
4316 MConverter *
mconv_rebind_buffer(MConverter * converter,const unsigned char * buf,int n)4317 mconv_rebind_buffer (MConverter *converter, const unsigned char *buf, int n)
4318 {
4319 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4320
4321 internal->buf.in = buf;
4322 internal->used = 0;
4323 internal->bufsize = n;
4324 internal->binding = BINDING_BUFFER;
4325 return converter;
4326 }
4327
4328 /*=*/
4329
4330 /***en
4331 @brief Bind a stream to a code converter.
4332
4333 The mconv_rebind_stream () function binds stream $FP to code
4334 converter $CONVERTER. Following decodings and encodings are done
4335 to/from this newly bound stream.
4336
4337 @return
4338 This function always returns $CONVERTER. */
4339
4340 /***ja
4341 @brief �����ɥ���С����˥��ȥ������դ���.
4342
4343 �ؿ� mconv_rebind_stream () �ϡ����ȥ�� $FP ���ɥ���С���
4344 $CONVERTER �˷���դ��롣
4345 ����ʹߤΥǥ����ɤ���ӥ����ɤϡ����ο����˷���դ���줿���ȥ����Ф��ƹԤʤ���褦�ˤʤ롣
4346
4347 @return
4348 ���δؿ��Ͼ�� $CONVERTER ���֤���
4349
4350 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4351
4352 /***
4353 @seealso
4354 mconv_rebind_buffer () */
4355
4356 MConverter *
mconv_rebind_stream(MConverter * converter,FILE * fp)4357 mconv_rebind_stream (MConverter *converter, FILE *fp)
4358 {
4359 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4360
4361 if (fseek (fp, 0, SEEK_CUR) < 0)
4362 {
4363 if (errno == EBADF)
4364 return NULL;
4365 internal->seekable = 0;
4366 }
4367 else
4368 internal->seekable = 1;
4369 internal->fp = fp;
4370 internal->binding = BINDING_STREAM;
4371 return converter;
4372 }
4373
4374 /*=*/
4375
4376 /***en
4377 @brief Decode a byte sequence into an M-text.
4378
4379 The mconv_decode () function decodes a byte sequence and appends
4380 the result at the end of M-text $MT. The source byte sequence is
4381 taken from either the buffer area or the stream that is currently
4382 bound to $CONVERTER.
4383
4384 @return
4385 If the operation was successful, mconv_decode () returns updated
4386 $MT. Otherwise it returns @c NULL and assigns an error code to
4387 the external variable #merror_code. */
4388
4389 /***ja
4390 @brief �Х������ M-text �˥ǥ����ɤ���.
4391
4392 �ؿ� mconv_decode () �ϡ��Х������ǥ����ɤ��Ƥ��η�̤� M-text
4393 $MT ���������ɲä��롣�ǥ����ɸ��ΥХ�����ϡ�$CONVERTER
4394 �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ�फ�����롣
4395
4396 @return
4397 �⤷��������������С�mconv_decode () �Ϲ������줿 $MT ���֤���
4398 �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4399 �˥��顼�����ɤ����ꤹ�롣 */
4400
4401 /***
4402 @errors
4403 @c MERROR_IO, @c MERROR_CODING
4404
4405 @seealso
4406 mconv_rebind_buffer (), mconv_rebind_stream (),
4407 mconv_encode (), mconv_encode_range (),
4408 mconv_decode_buffer (), mconv_decode_stream () */
4409
4410 MText *
mconv_decode(MConverter * converter,MText * mt)4411 mconv_decode (MConverter *converter, MText *mt)
4412 {
4413 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4414 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4415 int n;
4416
4417 M_CHECK_READONLY (mt, NULL);
4418
4419 if (mt->format != MTEXT_FORMAT_UTF_8)
4420 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4421
4422 if (! mt->data)
4423 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4424
4425 converter->nchars = converter->nbytes = 0;
4426 converter->result = MCONVERSION_RESULT_SUCCESS;
4427
4428 n = mtext_nchars (internal->unread);
4429 if (n > 0)
4430 {
4431 int limit = n;
4432 int i;
4433
4434 if (at_most > 0 && at_most < limit)
4435 limit = at_most;
4436
4437 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4438 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4439 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4440 if (at_most > 0)
4441 {
4442 if (at_most == limit)
4443 return mt;
4444 converter->at_most -= converter->nchars;
4445 }
4446 }
4447
4448 if (internal->binding == BINDING_BUFFER)
4449 {
4450 (*internal->coding->decoder) (internal->buf.in + internal->used,
4451 internal->bufsize - internal->used,
4452 mt, converter);
4453 internal->used += converter->nbytes;
4454 }
4455 else if (internal->binding == BINDING_STREAM)
4456 {
4457 unsigned char work[CONVERT_WORKSIZE];
4458 int last_block = converter->last_block;
4459 int use_fread = at_most < 0 && internal->seekable;
4460
4461 converter->last_block = 0;
4462 while (1)
4463 {
4464 int nbytes, prev_nbytes;
4465
4466 if (feof (internal->fp))
4467 nbytes = 0;
4468 else if (use_fread)
4469 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4470 internal->fp);
4471 else
4472 {
4473 int c = getc (internal->fp);
4474
4475 if (c != EOF)
4476 work[0] = c, nbytes = 1;
4477 else
4478 nbytes = 0;
4479 }
4480
4481 if (ferror (internal->fp))
4482 {
4483 converter->result = MCONVERSION_RESULT_IO_ERROR;
4484 break;
4485 }
4486
4487 if (nbytes == 0)
4488 converter->last_block = last_block;
4489 prev_nbytes = converter->nbytes;
4490 (*internal->coding->decoder) (work, nbytes, mt, converter);
4491 if (converter->nbytes - prev_nbytes < nbytes)
4492 {
4493 if (use_fread)
4494 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4495 SEEK_CUR);
4496 else
4497 ungetc (work[0], internal->fp);
4498 break;
4499 }
4500 if (nbytes == 0
4501 || (converter->at_most > 0
4502 && converter->nchars == converter->at_most))
4503 break;
4504 }
4505 converter->last_block = last_block;
4506 }
4507 else /* internal->binding == BINDING_NONE */
4508 MERROR (MERROR_CODING, NULL);
4509
4510 converter->at_most = at_most;
4511 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4512 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4513 ? mt : NULL);
4514 }
4515
4516 /*=*/
4517
4518 /***en
4519 @brief Decode a buffer area based on a coding system.
4520
4521 The mconv_decode_buffer () function decodes $N bytes of the buffer
4522 area pointed to by $BUF based on the coding system $NAME. A
4523 temporary code converter for decoding is automatically created
4524 and freed.
4525
4526 @return
4527 If the operation was successful, mconv_decode_buffer ()
4528 returns the resulting M-text. Otherwise it returns @c NULL and
4529 assigns an error code to the external variable #merror_code. */
4530
4531 /***ja
4532 @brief �����ɷϤ˴�Ť��ƥХåե��ΰ��ǥ����ɤ���.
4533
4534 �ؿ� mconv_decode_buffer () �ϡ�$BUF �ˤ�äƻؤ��줿 $N
4535 �Х��ȤΥХåե��ΰ�����ɷ� $NAME �˴�Ť��ƥǥ����ɤ��롣
4536 �ǥ����ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4537
4538 @return
4539 �⤷��������������С�mconv_decode_buffer () ������줿 M-text ���֤���
4540 �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4541 �˥��顼�����ɤ����ꤹ�롣 */
4542
4543 /***
4544 @errors
4545 @c MERROR_IO, @c MERROR_CODING
4546
4547 @seealso
4548 mconv_decode (), mconv_decode_stream () */
4549
4550 MText *
mconv_decode_buffer(MSymbol name,const unsigned char * buf,int n)4551 mconv_decode_buffer (MSymbol name, const unsigned char *buf, int n)
4552 {
4553 MConverter *converter = mconv_buffer_converter (name, buf, n);
4554 MText *mt;
4555
4556 if (! converter)
4557 return NULL;
4558 mt = mtext ();
4559 if (! mconv_decode (converter, mt))
4560 {
4561 M17N_OBJECT_UNREF (mt);
4562 mt = NULL;
4563 }
4564 mconv_free_converter (converter);
4565 return mt;
4566 }
4567
4568 /*=*/
4569
4570 /***en
4571 @brief Decode a stream input based on a coding system.
4572
4573 The mconv_decode_stream () function decodes the entire byte
4574 sequence read in from stream $FP based on the coding system $NAME.
4575 A code converter for decoding is automatically created and freed.
4576
4577 @return
4578 If the operation was successful, mconv_decode_stream () returns
4579 the resulting M-text. Otherwise it returns @c NULL and assigns an
4580 error code to the external variable #merror_code. */
4581
4582 /***ja
4583 @brief �����ɷϤ˴�Ť��ƥ��ȥ�����Ϥ�ǥ����ɤ���.
4584
4585 �ؿ� mconv_decode_stream () �ϡ����ȥ�� $FP
4586 �����ɤ߹��ޤ��Х��������Τ����ɷ� $NAME
4587 �˴�Ť��ƥǥ����ɤ��롣�ǥ����ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4588
4589 @return
4590 �⤷��������������С�mconv_decode_stream () ������줿 M-text
4591 ���֤��������Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4592 �˥��顼�����ɤ����ꤹ�롣 */
4593
4594 /***
4595 @errors
4596 @c MERROR_IO, @c MERROR_CODING
4597
4598 @seealso
4599 mconv_decode (), mconv_decode_buffer () */
4600
4601 MText *
mconv_decode_stream(MSymbol name,FILE * fp)4602 mconv_decode_stream (MSymbol name, FILE *fp)
4603 {
4604 MConverter *converter = mconv_stream_converter (name, fp);
4605 MText *mt;
4606
4607 if (! converter)
4608 return NULL;
4609 mt = mtext ();
4610 if (! mconv_decode (converter, mt))
4611 {
4612 M17N_OBJECT_UNREF (mt);
4613 mt = NULL;
4614 }
4615 mconv_free_converter (converter);
4616 return mt;
4617 }
4618
4619 /*=*/
4620
4621 /***en @brief Encode an M-text into a byte sequence.
4622
4623 The mconv_encode () function encodes M-text $MT and writes the
4624 resulting byte sequence into the buffer area or the stream that is
4625 currently bound to code converter $CONVERTER.
4626
4627 @return
4628 If the operation was successful, mconv_encode () returns the
4629 number of written bytes. Otherwise it returns -1 and assigns an
4630 error code to the external variable #merror_code. */
4631
4632 /***ja
4633 @brief M-text ��Х�����˥����ɤ���.
4634
4635 �ؿ� mconv_encode () �ϡ�M-text $MT ���ɤ��ơ������ɥ���С���
4636 $CONVERTER �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ�������줿�Х��������ࡣ
4637
4638 @return
4639 �⤷��������������С�mconv_encode () �Ͻ��ޤ줿�Х��ȿ����֤���
4640 �����Ǥʤ���� -1 ���֤��������ѿ� #merror_code
4641 �˥��顼�����ɤ����ꤹ�롣 */
4642
4643 /***
4644 @errors
4645 @c MERROR_IO, @c MERROR_CODING
4646
4647 @seealso
4648 mconv_rebind_buffer (), mconv_rebind_stream(),
4649 mconv_decode (), mconv_encode_range () */
4650
4651 int
mconv_encode(MConverter * converter,MText * mt)4652 mconv_encode (MConverter *converter, MText *mt)
4653 {
4654 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4655 }
4656
4657 /*=*/
4658
4659 /***en
4660 @brief Encode a part of an M-text.
4661
4662 The mconv_encode_range () function encodes the text between $FROM
4663 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4664 resulting byte sequence into the buffer area or the stream that is
4665 currently bound to code converter $CONVERTER.
4666
4667 @return
4668 If the operation was successful, mconv_encode_range () returns the
4669 number of written bytes. Otherwise it returns -1 and assigns an
4670 error code to the external variable #merror_code. */
4671
4672 /***ja
4673 @brief M-text �ΰ�����Х�����˥����ɤ���.
4674
4675 �ؿ� mconv_encode_range () �ϡ�M-text $MT �� $FROM
4676 ��$FROM ���Τ�ޤ�ˤ��� $TO ��$TO���Τϴޤޤʤ���
4677 �ޤǤ��ϰϤΥƥ����Ȥ��ɤ��ơ������ɥ���С���
4678 $CONVERTER �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ�������줿�Х��������ࡣ
4679
4680 @return
4681 �⤷��������������С�mconv_encode_range ()
4682 �Ͻ��ޤ줿�Х��ȿ����֤��������Ǥʤ���� -1
4683 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
4684
4685 /***
4686 @errors
4687 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4688
4689 @seealso
4690 mconv_rebind_buffer (), mconv_rebind_stream(),
4691 mconv_decode (), mconv_encode () */
4692
4693 int
mconv_encode_range(MConverter * converter,MText * mt,int from,int to)4694 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4695 {
4696 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4697
4698 M_CHECK_POS_X (mt, from, -1);
4699 M_CHECK_POS_X (mt, to, -1);
4700 if (to < from)
4701 to = from;
4702
4703 if (converter->at_most > 0 && from + converter->at_most < to)
4704 to = from + converter->at_most;
4705
4706 converter->nchars = converter->nbytes = 0;
4707 converter->result = MCONVERSION_RESULT_SUCCESS;
4708
4709 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4710 if (internal->binding == BINDING_BUFFER)
4711 {
4712 (*internal->coding->encoder) (mt, from, to,
4713 internal->buf.out + internal->used,
4714 internal->bufsize - internal->used,
4715 converter);
4716 internal->used += converter->nbytes;
4717 }
4718 else if (internal->binding == BINDING_STREAM)
4719 {
4720 unsigned char work[CONVERT_WORKSIZE];
4721
4722 while (from < to)
4723 {
4724 int written = 0;
4725 int prev_nbytes = converter->nbytes;
4726 int this_nbytes;
4727
4728 (*internal->coding->encoder) (mt, from, to, work,
4729 CONVERT_WORKSIZE, converter);
4730 this_nbytes = converter->nbytes - prev_nbytes;
4731 while (written < this_nbytes)
4732 {
4733 int wrtn = fwrite (work + written, sizeof (unsigned char),
4734 this_nbytes - written, internal->fp);
4735
4736 if (ferror (internal->fp))
4737 break;
4738 written += wrtn;
4739 }
4740 if (written < this_nbytes)
4741 {
4742 converter->result = MCONVERSION_RESULT_IO_ERROR;
4743 break;
4744 }
4745 from += converter->nchars;
4746 }
4747 }
4748 else /* fail safe */
4749 MERROR (MERROR_CODING, -1);
4750
4751 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4752 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4753 ? converter->nbytes : -1);
4754 }
4755
4756 /*=*/
4757
4758 /***en
4759 @brief Encode an M-text into a buffer area.
4760
4761 The mconv_encode_buffer () function encodes M-text $MT based on
4762 coding system $NAME and writes the resulting byte sequence into the
4763 buffer area pointed to by $BUF. At most $N bytes are written. A
4764 temporary code converter for encoding is automatically created
4765 and freed.
4766
4767 @return
4768 If the operation was successful, mconv_encode_buffer () returns
4769 the number of written bytes. Otherwise it returns -1 and assigns
4770 an error code to the external variable #merror_code. */
4771
4772 /***ja
4773 @brief M-text ���ɤ��ƥХåե��ΰ�˽���.
4774
4775 �ؿ� mconv_encode_buffer () ��M-text $MT ���ɷ� $NAME
4776 �˴�Ť��ƥ����ɤ�������줿�Х������ $BUF �λؤ��Хåե��ΰ�˽��ࡣ
4777 $N �Ͻ������Х��ȿ��Ǥ��롣
4778 �����ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4779
4780 @return
4781 �⤷��������������С�mconv_encode_buffer () �Ͻ��ޤ줿�Х��ȿ����֤���
4782 �����Ǥʤ����-1���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
4783
4784 /***
4785 @errors
4786 @c MERROR_IO, @c MERROR_CODING
4787
4788 @seealso
4789 mconv_encode (), mconv_encode_stream () */
4790
4791 int
mconv_encode_buffer(MSymbol name,MText * mt,unsigned char * buf,int n)4792 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4793 {
4794 MConverter *converter = mconv_buffer_converter (name, buf, n);
4795 int ret;
4796
4797 if (! converter)
4798 return -1;
4799 ret = mconv_encode (converter, mt);
4800 mconv_free_converter (converter);
4801 return ret;
4802 }
4803
4804 /*=*/
4805
4806 /***en
4807 @brief Encode an M-text to write to a stream.
4808
4809 The mconv_encode_stream () function encodes M-text $MT based on
4810 coding system $NAME and writes the resulting byte sequence to
4811 stream $FP. A temporary code converter for encoding is
4812 automatically created and freed.
4813
4814 @return
4815 If the operation was successful, mconv_encode_stream () returns
4816 the number of written bytes. Otherwise it returns -1 and assigns
4817 an error code to the external variable #merror_code. */
4818
4819 /***ja
4820 @brief M-text ���ɤ��ƥ��ȥ��˽���.
4821
4822 �ؿ� mconv_encode_stream () ��M-text $MT ���ɷ� $NAME
4823 �˴�Ť��ƥ����ɤ�������줿�Х�����ȥ�� $FP
4824 �˽Ф��������ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4825
4826 @return
4827 �⤷��������������С�mconv_encode_stream ()
4828 �Ͻ��ޤ줿�Х��ȿ����֤��������Ǥʤ���� -1
4829 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
4830
4831 /***
4832 @errors
4833 @c MERROR_IO, @c MERROR_CODING
4834
4835 @seealso
4836 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4837
4838 int
mconv_encode_stream(MSymbol name,MText * mt,FILE * fp)4839 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4840 {
4841 MConverter *converter = mconv_stream_converter (name, fp);
4842 int ret;
4843
4844 if (! converter)
4845 return -1;
4846 ret = mconv_encode (converter, mt);
4847 mconv_free_converter (converter);
4848 return ret;
4849 }
4850
4851 /*=*/
4852
4853 /***en
4854 @brief Read a character via a code converter.
4855
4856 The mconv_getc () function reads one character from the buffer
4857 area or the stream that is currently bound to code converter
4858 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4859 sequence. The internal status of $CONVERTER is updated
4860 appropriately.
4861
4862 @return
4863 If the operation was successful, mconv_getc () returns the
4864 character read in. If the input source reaches EOF, it returns @c
4865 EOF without changing the external variable #merror_code. If an
4866 error is detected, it returns @c EOF and assigns an error code to
4867 #merror_code. */
4868
4869 /***ja
4870 @brief �����ɥ���С�����ͳ�ǰ�ʸ�����ɤߤ���.
4871
4872 �ؿ� mconv_getc () �ϡ������ɥ���С��� $CONVERTER
4873 �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ�फ��ʸ�������ɤ߹��ࡣ
4874 �Х�����Υǥ����ɤˤ� $CONVERTER �Υǥ��������Ѥ����롣
4875 $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4876
4877 @return
4878 ��������������С�mconv_getc () ���ɤ߹��ޤ줿ʸ�����֤������ϸ���
4879 EOF ��ã�������ϡ������ѿ� #merror_code ���Ѥ����� @c EOF
4880 ���֤������顼�����Ф��줿���� @c EOF ���֤���#merror_code
4881 �˥��顼�����ɤ����ꤹ�롣 */
4882
4883 /***
4884 @errors
4885 @c MERROR_CODING
4886
4887 @seealso
4888 mconv_ungetc (), mconv_putc (), mconv_gets () */
4889
4890 int
mconv_getc(MConverter * converter)4891 mconv_getc (MConverter *converter)
4892 {
4893 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4894 int at_most = converter->at_most;
4895
4896 mtext_reset (internal->work_mt);
4897 converter->at_most = 1;
4898 mconv_decode (converter, internal->work_mt);
4899 converter->at_most = at_most;
4900 return (converter->nchars == 1
4901 ? STRING_CHAR (internal->work_mt->data)
4902 : EOF);
4903 }
4904
4905 /*=*/
4906
4907 /***en
4908 @brief Push a character back to a code converter.
4909
4910 The mconv_ungetc () function pushes character $C back to code
4911 converter $CONVERTER. Any number of characters can be pushed
4912 back. The lastly pushed back character is firstly read by the
4913 subsequent mconv_getc () call. The characters pushed back are
4914 registered only in $CONVERTER; they are not written to the input
4915 source. The internal status of $CONVERTER is updated
4916 appropriately.
4917
4918 @return
4919 If the operation was successful, mconv_ungetc () returns $C.
4920 Otherwise it returns @c EOF and assigns an error code to the
4921 external variable #merror_code. */
4922
4923 /***ja
4924 @brief �����ɥ���С����˰�ʸ���᤹.
4925
4926 �ؿ� mconv_ungetc () �ϡ������ɥ���С��� $CONVERTER ��ʸ�� $C
4927 ���᤹���ᤵ���ʸ���������¤Ϥʤ������θ�� mconv_getc ()
4928 ��ƤӽФ����ݤˤϡ��Ǹ���ᤵ�줿ʸ�����ǽ���ɤޤ�롣�ᤵ�줿ʸ����
4929 $CONVERTER ���������ߤ���������Ǥ��ꡢ�ºݤ����ϸ��˽��ޤ��櫓�ǤϤʤ���
4930 $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4931
4932 @return
4933 ��������������С�mconv_ungetc () �� $C ���֤��������Ǥʤ���� @c
4934 EOF ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
4935
4936 /***
4937 @errors
4938 @c MERROR_CODING, @c MERROR_CHAR
4939
4940 @seealso
4941 mconv_getc (), mconv_putc (), mconv_gets () */
4942
4943 int
mconv_ungetc(MConverter * converter,int c)4944 mconv_ungetc (MConverter *converter, int c)
4945 {
4946 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4947
4948 M_CHECK_CHAR (c, EOF);
4949
4950 converter->result = MCONVERSION_RESULT_SUCCESS;
4951 mtext_cat_char (internal->unread, c);
4952 return c;
4953 }
4954
4955 /*=*/
4956
4957 /***en
4958 @brief Write a character via a code converter.
4959
4960 The mconv_putc () function writes character $C to the buffer area
4961 or the stream that is currently bound to code converter
4962 $CONVERTER. The encoder of $CONVERTER is used to encode the
4963 character. The number of bytes actually written is set to the @c
4964 nbytes member of $CONVERTER. The internal status of $CONVERTER
4965 is updated appropriately.
4966
4967 @return
4968 If the operation was successful, mconv_putc () returns $C.
4969 If an error is detected, it returns @c EOF and assigns
4970 an error code to the external variable #merror_code. */
4971
4972 /***ja
4973 @brief �����ɥ���С������ͳ���ư�ʸ���Ф�.
4974
4975 �ؿ� mconv_putc () �ϡ������ɥ���С��� $CONVERTER
4976 �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ���ʸ�� $C
4977 ��Ф���ʸ���Υ����ɤˤ� $CONVERTER
4978 �Υ��������Ѥ����롣�ºݤ˽Ф��줿�Х��ȿ��ϡ�$CONVERTER �Υ��С�
4979 @c nbytes �˥��åȤ���롣$CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4980
4981 @return
4982 ��������������С�mconv_putc () �� $C ���֤������顼�����Ф��줿����
4983 @c EOF ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
4984
4985 /***
4986 @errors
4987 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4988
4989 @seealso
4990 mconv_getc (), mconv_ungetc (), mconv_gets () */
4991
4992 int
mconv_putc(MConverter * converter,int c)4993 mconv_putc (MConverter *converter, int c)
4994 {
4995 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4996
4997 M_CHECK_CHAR (c, EOF);
4998 mtext_reset (internal->work_mt);
4999 mtext_cat_char (internal->work_mt, c);
5000 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
5001 return EOF;
5002 return c;
5003 }
5004
5005 /*=*/
5006
5007 /***en
5008 @brief Read a line using a code converter.
5009
5010 The mconv_gets () function reads one line from the buffer area or
5011 the stream that is currently bound to code converter $CONVERTER.
5012 The decoder of $CONVERTER is used for decoding. The decoded
5013 character sequence is appended at the end of M-text $MT. The
5014 final newline character in the original byte sequence is not
5015 appended. The internal status of $CONVERTER is updated
5016 appropriately.
5017
5018 @return
5019 If the operation was successful, mconv_gets () returns the
5020 modified $MT. If it encounters EOF without reading a single
5021 character, it returns $MT without changing it. If an error is
5022 detected, it returns @c NULL and assigns an error code to
5023 #merror_code. */
5024
5025 /***ja
5026 @brief �����ɥ���С�����Ȥäư���ɤ߹���.
5027
5028 �ؿ� mconv_gets () �ϡ������ɥ���С��� $CONVERTER
5029 �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ�फ�� 1 �Ԥ��ɤ߹��ࡣ
5030 �Х�����Υǥ����ɤˤ� $CONVERTER
5031 �Υǥ��������Ѥ����롣�ǥ����ɤ��줿ʸ����� M-text $MT
5032 ���������ɲä���롣���ΥХ�����ν�ü����ʸ�����ɲä���ʤ���
5033 $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
5034
5035 @return
5036 ��������������С�mconv_gets () ���ѹ����줿 $MT
5037 ���֤����⤷1ʸ�����ɤޤ��� EOF �������������ϡ�$MT
5038 ���ѹ������ˤ��Τޤ��֤������顼�����Ф��줿���� @c NULL ���֤���
5039 #merror_code �˥��顼�����ɤ����ꤹ�롣 */
5040
5041 /***
5042 @errors
5043 @c MERROR_CODING
5044
5045 @seealso
5046 mconv_getc (), mconv_ungetc (), mconv_putc () */
5047
5048 MText *
mconv_gets(MConverter * converter,MText * mt)5049 mconv_gets (MConverter *converter, MText *mt)
5050 {
5051 int c;
5052
5053 M_CHECK_READONLY (mt, NULL);
5054 if (mt->format != MTEXT_FORMAT_UTF_8)
5055 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5056
5057 while (1)
5058 {
5059 c = mconv_getc (converter);
5060 if (c == EOF || c == '\n')
5061 break;
5062 mtext_cat_char (mt, c);
5063 }
5064 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5065 /* mconv_getc () sets #merror_code */
5066 return NULL;
5067 return mt;
5068 }
5069
5070 /*=*/
5071
5072 /*** @} */
5073
5074 /*
5075 Local Variables:
5076 coding: euc-japan
5077 End:
5078 */
5079