1 /* coding.c -- code conversion module.
2    Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012
3      National Institute of Advanced Industrial Science and Technology (AIST)
4      Registration Number H15PRO112
5 
6    This file is part of the m17n library.
7 
8    The m17n library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public License
10    as published by the Free Software Foundation; either version 2.1 of
11    the License, or (at your option) any later version.
12 
13    The m17n library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17 
18    You should have received a copy of the GNU Lesser General Public
19    License along with the m17n library; if not, write to the Free
20    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21    Boston, MA 02110-1301 USA.  */
22 
23 /***en
24     @addtogroup m17nConv
25     @brief Coding system objects and API for them.
26 
27     The m17n library represents a character encoding scheme (CES) of
28     coded character sets (CCS) as an object called @e coding @e
29     system.  Application programs can add original coding systems.
30 
31     To @e encode means converting code-points to character codes and
32     to @e decode means converting character codes back to code-points.
33 
34     Application programs can decode a byte sequence with a specified
35     coding system into an M-text, and inversely, can encode an M-text
36     into a byte sequence.  */
37 
38 /***ja
39     @addtogroup m17nConv
40     @brief �����ɷϥ��֥������ȤȤ���˴ؤ��� API.
41 
42     m17n �饤�֥��ϡ���沽ʸ������ (coded character set; CCS)
43     ��ʸ����粽���� (character encoding scheme; CES) �� @e �����ɷ�
44     �ȸƤ֥��֥������Ȥ�ɽ�����롣
45     ���ץꥱ�������ץ������ȼ��˥����ɷϤ��ɲä��뤳�Ȥ�Ǥ��롣
46 
47     �����ɥݥ���Ȥ���ʸ�������ɤؤ��Ѵ��� @e ��������
48     �ȸƤӡ�ʸ�������ɤ��饳���ɥݥ���Ȥؤ��Ѵ��� @e �ǥ����� �ȸƤ֡�
49 
50     ���ץꥱ�������ץ����ϡ����ꤵ�줿�����ɷϤǥХ������ǥ����ɤ��뤳�Ȥˤ�ä�
51     M-text �����뤳�Ȥ��Ǥ��롣�ޤ��դˡ����ꤵ�줿�����ɷϤ� M-text
52     �������ɤ����뤳�Ȥˤ�äƥХ���������뤳�Ȥ��Ǥ��롣  */
53 
54 /*=*/
55 
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
58      @{ */
59 
60 #include <config.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <ctype.h>
64 #include <string.h>
65 #include <sys/types.h>
66 #include <unistd.h>
67 #include <errno.h>
68 
69 #include "m17n.h"
70 #include "m17n-misc.h"
71 #include "internal.h"
72 #include "plist.h"
73 #include "character.h"
74 #include "charset.h"
75 #include "coding.h"
76 #include "mtext.h"
77 #include "symbol.h"
78 #include "mlocale.h"
79 
80 #define NUM_SUPPORTED_CHARSETS 32
81 
82 /** Structure for coding system object.  */
83 
84 typedef struct
85 {
86   /** Name of the coding system.  */
87   MSymbol name;
88 
89   /** Type of the coding system.  */
90   MSymbol type;
91 
92   /* Number of supported charsets.  */
93   int ncharsets;
94 
95   /** Array of supported charsets.  */
96   MCharset *charsets[NUM_SUPPORTED_CHARSETS];
97 
98   /** If non-NULL, function to call at the time of creating and
99       reseting a converter.  */
100   int (*resetter) (MConverter *converter);
101 
102   int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 		  MConverter *converter);
104 
105   int (*encoder) (MText *mt, int from, int to,
106 		  unsigned char *str, int str_bytes,
107 		  MConverter *converter);
108 
109   /** If non-zero, the coding system decode/encode ASCII characters as
110       is.  */
111   int ascii_compatible;
112 
113   /** Pointer to extra information given when the coding system is
114       defined.  The meaning depends on <type>.  */
115   void *extra_info;
116 
117   /** Pointer to information referred on conversion.  The meaning
118       depends on <type>.  The value NULL means that the coding system
119       is not yet setup.  */
120   void *extra_spec;
121 
122   int ready;
123 } MCodingSystem;
124 
125 struct MCodingList
126 {
127   int size, inc, used;
128   MCodingSystem **codings;
129 };
130 
131 static struct MCodingList coding_list;
132 
133 static MPlist *coding_definition_list;
134 
135 typedef struct {
136   /**en
137      Pointer to a structure of a coding system.  */
138   /**ja
139      �����ɷϤ�ɽ�魯�ǡ�����¤�ؤΥݥ��� */
140   MCodingSystem *coding;
141 
142   /**en
143      Buffer for carryover bytes generated while decoding. */
144   /**ja
145      �ǥ�������Υ���ꥣ�����С��Х����ѥХåե� */
146   unsigned char carryover[256];
147 
148   /**en
149      Number of carryover bytes. */
150   /**ja
151      ����ꥣ�����С��Х��ȿ� */
152   int carryover_bytes;
153 
154   /**en
155      Beginning of the byte sequence bound to this converter. */
156   /**ja
157      ���Υ���С����˷���դ���줿�Х��������Ƭ���� */
158   union {
159     const unsigned char *in;
160     unsigned char *out;
161   } buf;
162 
163   /**en
164      Size of buf. */
165   /**ja
166      buf ���礭�� */
167   int bufsize;
168 
169   /**en
170      Number of bytes already consumed in buf. */
171   /**ja
172      buf ��Ǥ��Ǥ˾����줿�Х��ȿ� */
173   int used;
174 
175   /**en
176      Stream bound to this converter. */
177   /**ja
178      ���Υ���С����˷���դ���줿���ȥ꡼�� */
179   FILE *fp;
180 
181   /**en
182      Which of above two is in use. */
183   /**ja
184      �嵭2�ԤΤ����줬�Ȥ��Ƥ��뤫 */
185   int binding;
186 
187   /**en
188      Buffer for unget. */
189   /**ja
190      Unget �ѥХåե� */
191   MText *unread;
192 
193   /**en
194     Working area. */
195   /**ja
196     ����ΰ� */
197   MText *work_mt;
198 
199   int seekable;
200 } MConverterStatus;
201 
202 
203 
204 /* Local macros and functions.  */
205 
206 /** At first, set SRC_BASE to SRC.  Then check if we have already
207     produced AT_MOST chars.  If so, set SRC_END to SRC, and jump to
208     source_end.  Otherwise, get one more byte C from SRC.  In that
209     case, if SRC == SRC_END, jump to the label source_end.  */
210 
211 #define ONE_MORE_BASE_BYTE(c)		\
212   do {					\
213     src_base = src;			\
214     if (nchars == at_most)		\
215       {					\
216 	src_end = src;			\
217 	goto source_end;		\
218       }					\
219     if (src == src_stop)		\
220       {					\
221 	if (src == src_end)		\
222 	  goto source_end;		\
223 	src_base = src = source;	\
224 	if (src == src_end)		\
225 	  goto source_end;		\
226 	src_stop = src_end;		\
227       }					\
228     (c) = *src++;			\
229   } while (0)
230 
231 
232 /** Get one more byte C from SRC.  If SRC == SRC_END, jump to the
233    label source_end.  */
234 
235 #define ONE_MORE_BYTE(c)	\
236   do {				\
237     if (src == src_stop)	\
238       {				\
239 	if (src == src_end)	\
240 	  goto source_end;	\
241 	src = source;		\
242 	if (src == src_end)	\
243 	  goto source_end;	\
244 	src_stop = src_end;	\
245       }				\
246     (c) = *src++;		\
247   } while (0)
248 
249 
250 #define REWIND_SRC_TO_BASE()						\
251   do {									\
252     if (src_base < source || src_base >= src_end)			\
253       src_stop = internal->carryover + internal->carryover_bytes;	\
254     src = src_base;							\
255   } while (0)
256 
257 
258 /** Push back byte C to SRC.  */
259 
260 #define UNGET_ONE_BYTE(c)		\
261   do {					\
262     if (src > source)			\
263       src--;				\
264     else				\
265       {					\
266 	internal->carryover[0] = c;	\
267 	internal->carryover_bytes = 1;	\
268 	src = internal->carryover;	\
269 	src_stop = src + 1;		\
270       }					\
271   } while  (0);
272 
273 
274 /** Store multibyte representation of character C at DST and increment
275     DST to the next of the produced bytes.  DST must be a pointer to
276     data area of M-text MT.  If the produced bytes are going to exceed
277     DST_END, enlarge the data area of MT.  */
278 
279 #define EMIT_CHAR(c)						\
280   do {								\
281     int bytes = CHAR_BYTES (c);					\
282     int len;							\
283 								\
284     if (dst + bytes + 1 > dst_end)				\
285       {								\
286 	len = dst - mt->data;					\
287 	bytes = mt->allocated + bytes + (src_stop - src);	\
288 	mtext__enlarge (mt, bytes);				\
289 	dst = mt->data + len;					\
290 	dst_end = mt->data + mt->allocated;			\
291       }								\
292     dst += CHAR_STRING (c, dst);				\
293     nchars++;							\
294   } while (0)
295 
296 
297 /* Check if there is enough room to produce LEN bytes at DST.  If not,
298    go to the label insufficient_destination.  */
299 
300 #define CHECK_DST(len)			\
301   do {					\
302     if (dst + (len) > dst_end)		\
303       goto insufficient_destination;	\
304   } while (0)
305 
306 
307 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
308     (MT->data + MT->nbytes) into MT, and put charset property on
309     them with CHARSET->name.  */
310 
311 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset)			\
312   do {									\
313     int chars = (num_chars);						\
314 									\
315     if (chars > 0)							\
316       {									\
317 	mtext__takein ((mt), chars, (num_bytes));			\
318 	if (charset)							\
319 	  mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars,	\
320 			  Mcharset, (void *) ((charset)->name));	\
321       }									\
322   } while (0)
323 
324 
325 #define SET_SRC(mt, format, from, to)					\
326   do {									\
327     if (format <= MTEXT_FORMAT_UTF_8)					\
328       {									\
329 	src = mt->data + POS_CHAR_TO_BYTE (mt, from);			\
330 	src_end = mt->data + POS_CHAR_TO_BYTE (mt, to);			\
331       }									\
332     else if (format <= MTEXT_FORMAT_UTF_16BE)				\
333       {									\
334 	src								\
335 	  = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from);	\
336 	src_end								\
337 	  = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to);	\
338       }									\
339     else								\
340       {									\
341 	src = mt->data + (sizeof (int)) * from;				\
342 	src_end = mt->data + (sizeof (int)) * to;			\
343       }									\
344   } while (0)
345 
346 
347 #define ONE_MORE_CHAR(c, bytes, format)				\
348   do {								\
349     if (src == src_end)						\
350       goto finish;						\
351     if (format <= MTEXT_FORMAT_UTF_8)				\
352       c = STRING_CHAR_AND_BYTES (src, bytes);			\
353     else if (format <= MTEXT_FORMAT_UTF_16BE)			\
354       {								\
355 	c = mtext_ref_char (mt, from++);			\
356 	bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c);	\
357       }								\
358     else							\
359       {								\
360 	c = ((unsigned *) (mt->data))[from++];			\
361 	bytes = sizeof (int);					\
362       }								\
363   } while (0)
364 
365 
366 static int
encode_unsupporeted_char(int c,unsigned char * dst,unsigned char * dst_end,MText * mt,int pos)367 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
368 			  MText *mt, int pos)
369 {
370   int len;
371   char *format;
372 
373   len = c < 0x10000 ? 8 : 10;
374   if (dst + len > dst_end)
375     return 0;
376 
377   mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
378   format = (c < 0xD800 ? "<U+%04X>"
379 	    : c < 0xE000 ? "<M+%04X>"
380 	    : c < 0x10000 ? "<U+%04X>"
381 	    : c < 0x110000 ? "<U+%06X>"
382 	    : "<M+%06X>");
383   sprintf ((char *) dst, format, c);
384   return len;
385 }
386 
387 
388 
389 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
390     characters by CONVERTER into M-text MT.  SRC is a pointer to the
391     not-yet processed bytes.  ERROR is 1 iff an invalid byte was
392     found.  */
393 
394 static int
finish_decoding(MText * mt,MConverter * converter,int nchars,const unsigned char * source,const unsigned char * src_end,const unsigned char * src,int error)395 finish_decoding (MText *mt, MConverter *converter, int nchars,
396 		 const unsigned char *source, const unsigned char *src_end,
397 		 const unsigned char *src,
398 		 int error)
399 {
400   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
401 
402   if (src == src_end)
403     internal->carryover_bytes = 0;
404   else if (error
405 	   || (converter->last_block
406 	       && ! converter->lenient))
407     converter->result = MCONVERSION_RESULT_INVALID_BYTE;
408   else if (! converter->last_block)
409     {
410       unsigned char *dst = internal->carryover;
411 
412       if (src < source || src > src_end)
413 	{
414 	  dst += internal->carryover_bytes;
415 	  src = source;
416 	}
417       while (src < src_end)
418 	*dst++ = *src++;
419       internal->carryover_bytes = dst - internal->carryover;
420       converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
421     }
422   else
423     {
424       unsigned char *dst = mt->data + mt->nbytes;
425       unsigned char *dst_end = mt->data + mt->allocated;
426       const unsigned char *src_stop = src_end;
427       int c;
428       int last_nchars = nchars;
429 
430       if (src < source || src > src_end)
431 	src_stop = internal->carryover + internal->carryover_bytes;
432       while (1)
433 	{
434 	  if (converter->at_most && nchars == converter->at_most)
435 	    break;
436 	  if (src == src_stop)
437 	    {
438 	      if (src == src_end)
439 		break;
440 	      src = source;
441 	      if (src == src_end)
442 		break;
443 	      src_stop = src_end;
444 	    }
445 	  c = *src++;
446 	  EMIT_CHAR (c);
447 	}
448       TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
449 		    mcharset__binary);
450       internal->carryover_bytes = 0;
451     }
452 
453   converter->nchars += nchars;
454   converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
455   return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
456 }
457 
458 
459 
460 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET.  */
461 
462 static int
setup_coding_charset(MCodingSystem * coding)463 setup_coding_charset (MCodingSystem *coding)
464 {
465   int ncharsets = coding->ncharsets;
466   unsigned *code_charset_table;
467 
468   if (ncharsets > 1)
469     {
470       /* At first, reorder charset list by dimensions (a charset of
471 	 smaller dimension comes first).  As the number of charsets is
472 	 usually very small (at most 32), we do a simple sort.  */
473       MCharset **charsets;
474       int idx = 0;
475       int i, j;
476 
477       MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
478       memcpy (charsets, coding->charsets,
479 	      sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
480       for (i = 0; i < 4; i++)
481 	for (j = 0; j < ncharsets; j++)
482 	  if (charsets[j]->dimension == i)
483 	    coding->charsets[idx++] = charsets[j];
484     }
485 
486   MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
487   while (ncharsets--)
488     {
489       int dim = coding->charsets[ncharsets]->dimension;
490       int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
491       int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
492 
493       if (coding->charsets[ncharsets]->ascii_compatible)
494 	coding->ascii_compatible = 1;
495       while (from <= to)
496 	code_charset_table[from++] |= 1 << ncharsets;
497     }
498 
499   coding->extra_spec = (void *) code_charset_table;
500   return 0;
501 }
502 
503 static int
reset_coding_charset(MConverter * converter)504 reset_coding_charset (MConverter *converter)
505 {
506   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
507   MCodingSystem *coding = internal->coding;
508 
509   if (! coding->ready
510       && setup_coding_charset (coding) < 0)
511     return -1;
512   coding->ready = 1;
513   return 0;
514 }
515 
516 static int
decode_coding_charset(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)517 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
518 		       MConverter *converter)
519 {
520   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
521   MCodingSystem *coding = internal->coding;
522   const unsigned char *src = internal->carryover;
523   const unsigned char *src_stop = src + internal->carryover_bytes;
524   const unsigned char *src_end = source + src_bytes;
525   const unsigned char *src_base;
526   unsigned char *dst = mt->data + mt->nbytes;
527   unsigned char *dst_end = mt->data + mt->allocated;
528   int nchars = 0;
529   int last_nchars = 0;
530   int at_most = converter->at_most > 0 ? converter->at_most : -1;
531 
532   unsigned *code_charset_table = (unsigned *) coding->extra_spec;
533   MCharset **charsets = coding->charsets;
534   MCharset *charset = mcharset__ascii;
535   int error = 0;
536 
537   while (1)
538     {
539       MCharset *this_charset = NULL;
540       int c;
541       unsigned mask;
542 
543       ONE_MORE_BASE_BYTE (c);
544       mask = code_charset_table[c];
545       if (mask)
546 	{
547 	  int idx = 0;
548 	  unsigned code = c;
549 	  int nbytes = 1;
550 	  int dim;
551 
552 	  while (mask)
553 	    {
554 	      while (! (mask & 1)) mask >>= 1, idx++;
555 	      this_charset = charsets[idx];
556 	      dim = this_charset->dimension;
557 	      while (nbytes < dim)
558 		{
559 		  ONE_MORE_BYTE (c);
560 		  code = (code << 8) | c;
561 		  nbytes++;
562 		}
563 	      c = DECODE_CHAR (this_charset, code);
564 	      if (c >= 0)
565 		goto emit_char;
566 	      mask >>= 1, idx++;
567 	    }
568 	}
569 
570       if (! converter->lenient)
571 	break;
572       REWIND_SRC_TO_BASE ();
573       c = *src++;
574       this_charset = mcharset__binary;
575 
576     emit_char:
577       if (this_charset != mcharset__ascii
578 	  && this_charset != charset)
579 	{
580 	  TAKEIN_CHARS (mt, nchars - last_nchars,
581 			dst - (mt->data + mt->nbytes), charset);
582 	  charset = this_charset;
583 	  last_nchars = nchars;
584 	}
585       EMIT_CHAR (c);
586     }
587   /* We reach here because of an invalid byte.  */
588   error = 1;
589 
590  source_end:
591   TAKEIN_CHARS (mt, nchars - last_nchars,
592 		dst - (mt->data + mt->nbytes), charset);
593   return finish_decoding (mt, converter, nchars,
594 			  source, src_end, src_base, error);
595 }
596 
597 static int
encode_coding_charset(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)598 encode_coding_charset (MText *mt, int from, int to,
599 		       unsigned char *destination, int dst_bytes,
600 		       MConverter *converter)
601 {
602   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
603   MCodingSystem *coding = internal->coding;
604   unsigned char *src, *src_end;
605   unsigned char *dst = destination;
606   unsigned char *dst_end = dst + dst_bytes;
607   int nchars = 0;
608   int ncharsets = coding->ncharsets;
609   MCharset **charsets = coding->charsets;
610   int ascii_compatible = coding->ascii_compatible;
611   enum MTextFormat format = mt->format;
612 
613   SET_SRC (mt, format, from, to);
614   while (1)
615     {
616       int c, bytes;
617 
618       ONE_MORE_CHAR (c, bytes, format);
619 
620       if (c < 0x80 && ascii_compatible)
621 	{
622 	  CHECK_DST (1);
623 	  *dst++ = c;
624 	}
625       else
626 	{
627 	  unsigned code;
628 	  MCharset *charset = NULL;
629 	  int i = 0;
630 
631 	  while (1)
632 	    {
633 	      charset = charsets[i];
634 	      code = ENCODE_CHAR (charset, c);
635 	      if (code != MCHAR_INVALID_CODE)
636 		break;
637 	      if (++i == ncharsets)
638 		goto unsupported_char;
639 	    }
640 
641 	  CHECK_DST (charset->dimension);
642 	  if (charset->dimension == 1)
643 	    {
644 	      *dst++ = code;
645 	    }
646 	  else if (charset->dimension == 2)
647 	    {
648 	      *dst++ = code >> 8;
649 	      *dst++ = code & 0xFF;
650 	    }
651 	  else if (charset->dimension == 3)
652 	    {
653 	      *dst++ = code >> 16;
654 	      *dst++ = (code >> 8) & 0xFF;
655 	      *dst++ = code & 0xFF;
656 	    }
657 	  else
658 	    {
659 	      *dst++ = code >> 24;
660 	      *dst++ = (code >> 16) & 0xFF;
661 	      *dst++ = (code >> 8) & 0xFF;
662 	      *dst++ = code & 0xFF;
663 	    }
664 	}
665       src += bytes;
666       nchars++;
667       continue;
668 
669     unsupported_char:
670       {
671 	int len;
672 
673 	if (! converter->lenient)
674 	  break;
675 	len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
676 	if (len == 0)
677 	  goto insufficient_destination;
678 	dst += len;
679 	src += bytes;
680 	nchars++;
681       }
682     }
683   /* We reach here because of an unsupported char.  */
684   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
685   goto finish;
686 
687  insufficient_destination:
688   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
689 
690  finish:
691   converter->nchars += nchars;
692   converter->nbytes += dst - destination;
693   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
694 }
695 
696 
697 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8).  */
698 
699 #define UTF8_CHARSET(p)					\
700   (! ((p)[0] & 0x80) ? (mcharset__unicode)			\
701    : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary)			\
702    : ! ((p)[0] & 0x20) ? (mcharset__unicode)			\
703    : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary)			\
704    : ! ((p)[0] & 0x10) ? (mcharset__unicode)			\
705    : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary)			\
706    : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2)		\
707 			    & (((p)[1] & 0x30) >> 4)) <= 0x10)	\
708 			  ? (mcharset__unicode)			\
709 			  : (mcharset__m17n))			\
710    : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary)			\
711    : ! ((p)[0] & 0x04) ? (mcharset__m17n)			\
712    : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary)			\
713    : ! ((p)[0] & 0x02) ? (mcharset__m17n)			\
714    : (mcharset__binary))
715 
716 
717 static int
decode_coding_utf_8(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)718 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
719 		     MConverter *converter)
720 {
721   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
722   MCodingSystem *coding = internal->coding;
723   const unsigned char *src = internal->carryover;
724   const unsigned char *src_stop = src + internal->carryover_bytes;
725   const unsigned char *src_end = source + src_bytes;
726   const unsigned char *src_base;
727   unsigned char *dst = mt->data + mt->nbytes;
728   unsigned char *dst_end = mt->data + mt->allocated;
729   int nchars = 0;
730   int last_nchars = 0;
731   int at_most = converter->at_most > 0 ? converter->at_most : -1;
732   int error = 0;
733   int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
734   MCharset *charset = NULL;
735 
736   while (1)
737     {
738       int c, c1, bytes;
739       MCharset *this_charset = NULL;
740 
741       ONE_MORE_BASE_BYTE (c);
742 
743       if (!(c & 0x80))
744 	bytes = 1;
745       else if (!(c & 0x40))
746 	goto invalid_byte;
747       else if (!(c & 0x20))
748 	bytes = 2, c &= 0x1F;
749       else if (!(c & 0x10))
750 	bytes = 3, c &= 0x0F;
751       else if (!(c & 0x08))
752 	bytes = 4, c &= 0x07;
753       else if (!(c & 0x04))
754 	bytes = 5, c &= 0x03;
755       else if (!(c & 0x02))
756 	bytes = 6, c &= 0x01;
757       else
758 	goto invalid_byte;
759 
760       while (bytes-- > 1)
761 	{
762 	  ONE_MORE_BYTE (c1);
763 	  if ((c1 & 0xC0) != 0x80)
764 	    goto invalid_byte;
765 	  c = (c << 6) | (c1 & 0x3F);
766 	}
767 
768       if (full
769 	  || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
770 	goto emit_char;
771 
772     invalid_byte:
773       if (! converter->lenient)
774 	break;
775       REWIND_SRC_TO_BASE ();
776       c = *src++;
777       this_charset = mcharset__binary;
778 
779     emit_char:
780       if (this_charset != charset)
781 	{
782 	  TAKEIN_CHARS (mt, nchars - last_nchars,
783 			dst - (mt->data + mt->nbytes), charset);
784 	  charset = this_charset;
785 	  last_nchars = nchars;
786 	}
787       EMIT_CHAR (c);
788     }
789   /* We reach here because of an invalid byte.  */
790   error = 1;
791 
792  source_end:
793   TAKEIN_CHARS (mt, nchars - last_nchars,
794 		dst - (mt->data + mt->nbytes), charset);
795   return finish_decoding (mt, converter, nchars,
796 			  source, src_end, src_base, error);
797 }
798 
799 static int
encode_coding_utf_8(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)800 encode_coding_utf_8 (MText *mt, int from, int to,
801 		     unsigned char *destination, int dst_bytes,
802 		     MConverter *converter)
803 {
804   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
805   MCodingSystem *coding = internal->coding;
806   unsigned char *src, *src_end;
807   unsigned char *dst = destination;
808   unsigned char *dst_end = dst + dst_bytes;
809   int nchars = 0;
810   enum MTextFormat format = mt->format;
811 
812   SET_SRC (mt, format, from, to);
813 
814   if (format <= MTEXT_FORMAT_UTF_8
815       && (converter->lenient
816 	  || coding->charsets[0] == mcharset__m17n))
817     {
818       if (dst_bytes < src_end - src)
819 	{
820 	  int byte_pos = (src + dst_bytes) - mt->data;
821 
822 	  to = POS_BYTE_TO_CHAR (mt, byte_pos);
823 	  byte_pos = POS_CHAR_TO_BYTE (mt, to);
824 	  src_end = mt->data + byte_pos;
825 	  converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
826 	}
827       memcpy (destination, src, src_end - src);
828       nchars = to - from;
829       dst += src_end - src;
830       goto finish;
831     }
832 
833   while (1)
834     {
835       int c, bytes;
836 
837       ONE_MORE_CHAR (c, bytes, format);
838 
839       if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
840 	break;
841       CHECK_DST (bytes);
842       dst += CHAR_STRING (c, dst);
843       src += bytes;
844       nchars++;
845     }
846   /* We reach here because of an unsupported char.  */
847   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
848   goto finish;
849 
850  insufficient_destination:
851   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
852 
853  finish:
854   converter->nchars += nchars;
855   converter->nbytes += dst - destination;
856   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
857 }
858 
859 
860 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32).  */
861 
862 enum utf_bom
863   {
864     UTF_BOM_MAYBE,
865     UTF_BOM_NO,
866     UTF_BOM_YES,
867     UTF_BOM_MAX
868   };
869 
870 enum utf_endian
871   {
872     UTF_BIG_ENDIAN,
873     UTF_LITTLE_ENDIAN,
874     UTF_ENDIAN_MAX
875   };
876 
877 struct utf_status
878 {
879   int surrogate;
880   enum utf_bom bom;
881   enum utf_endian endian;
882 };
883 
884 static int
setup_coding_utf(MCodingSystem * coding)885 setup_coding_utf (MCodingSystem *coding)
886 {
887   MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
888   MCodingInfoUTF *spec;
889 
890   if (info->code_unit_bits == 8)
891     coding->ascii_compatible = 1;
892   else if (info->code_unit_bits == 16
893 	   || info->code_unit_bits == 32)
894     {
895       if (info->bom < 0 || info->bom > 2
896 	  || info->endian < 0 || info->endian > 1)
897 	MERROR (MERROR_CODING, -1);
898     }
899   else
900     return -1;
901 
902   MSTRUCT_CALLOC (spec, MERROR_CODING);
903   *spec = *info;
904   coding->extra_spec = (void *) (spec);
905   return 0;
906 }
907 
908 static int
reset_coding_utf(MConverter * converter)909 reset_coding_utf (MConverter *converter)
910 {
911   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
912   MCodingSystem *coding = internal->coding;
913   struct utf_status *status = (struct utf_status *) &(converter->status);
914 
915   if (! coding->ready
916       && setup_coding_utf (coding) < 0)
917     return -1;
918   coding->ready = 1;
919 
920   status->surrogate = 0;
921   status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
922   status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
923   return 0;
924 }
925 
926 static int
decode_coding_utf_16(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)927 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
928 		      MConverter *converter)
929 {
930   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
931   const unsigned char *src = internal->carryover;
932   const unsigned char *src_stop = src + internal->carryover_bytes;
933   const unsigned char *src_end = source + src_bytes;
934   const unsigned char *src_base;
935   unsigned char *dst = mt->data + mt->nbytes;
936   unsigned char *dst_end = mt->data + mt->allocated;
937   int nchars = 0;
938   int last_nchars = 0;
939   int at_most = converter->at_most > 0 ? converter->at_most : -1;
940   struct utf_status *status = (struct utf_status *) &(converter->status);
941   unsigned char b1, b2;
942   MCharset *charset = NULL;
943   int error = 0;
944 
945   if (status->bom != UTF_BOM_NO)
946     {
947       int c;
948 
949       ONE_MORE_BASE_BYTE (b1);
950       ONE_MORE_BYTE (b2);
951       c = (b1 << 8) | b2;
952       if (c == 0xFEFF)
953 	status->endian = UTF_BIG_ENDIAN;
954       else if (c == 0xFFFE)
955 	status->endian = UTF_LITTLE_ENDIAN;
956       else if (status->bom == UTF_BOM_MAYBE
957 	       || converter->lenient)
958 	{
959 	  status->endian = UTF_BIG_ENDIAN;
960 	  REWIND_SRC_TO_BASE ();
961 	}
962       else
963 	{
964 	  error = 1;
965 	  goto source_end;
966 	}
967       status->bom = UTF_BOM_NO;
968     }
969 
970   while (1)
971     {
972       int c, c1;
973       MCharset *this_charset = NULL;
974 
975       ONE_MORE_BASE_BYTE (b1);
976       ONE_MORE_BYTE (b2);
977       if (status->endian == UTF_BIG_ENDIAN)
978 	c = ((b1 << 8) | b2);
979       else
980 	c = ((b2 << 8) | b1);
981       if (c < 0xD800 || c >= 0xE000)
982 	goto emit_char;
983       else if (c < 0xDC00)
984 	{
985 	  ONE_MORE_BYTE (b1);
986 	  ONE_MORE_BYTE (b2);
987 	  if (status->endian == UTF_BIG_ENDIAN)
988 	    c1 = ((b1 << 8) | b2);
989 	  else
990 	    c1 = ((b2 << 8) | b1);
991 	  if (c1 < 0xDC00 || c1 >= 0xE000)
992 	    goto invalid_byte;
993 	  c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
994 	  goto emit_char;
995 	}
996 
997     invalid_byte:
998       if (! converter->lenient)
999 	break;
1000       REWIND_SRC_TO_BASE ();
1001       ONE_MORE_BYTE (b1);
1002       ONE_MORE_BYTE (b2);
1003       if (status->endian == UTF_BIG_ENDIAN)
1004 	c = ((b1 << 8) | b2);
1005       else
1006 	c = ((b2 << 8) | b1);
1007       this_charset = mcharset__binary;
1008 
1009     emit_char:
1010       if (this_charset != charset)
1011 	{
1012 	  TAKEIN_CHARS (mt, nchars - last_nchars,
1013 			dst - (mt->data + mt->nbytes), charset);
1014 	  charset = this_charset;
1015 	  last_nchars = nchars;
1016 	}
1017       EMIT_CHAR (c);
1018     }
1019   /* We reach here because of an invalid byte.  */
1020   error = 1;
1021 
1022  source_end:
1023   TAKEIN_CHARS (mt, nchars - last_nchars,
1024 		dst - (mt->data + mt->nbytes), charset);
1025   return finish_decoding (mt, converter, nchars,
1026 			  source, src_end, src_base, error);
1027 }
1028 
1029 
1030 static int
decode_coding_utf_32(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)1031 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1032 		      MConverter *converter)
1033 {
1034   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1035   const unsigned char *src = internal->carryover;
1036   const unsigned char *src_stop = src + internal->carryover_bytes;
1037   const unsigned char *src_end = source + src_bytes;
1038   const unsigned char *src_base;
1039   unsigned char *dst = mt->data + mt->nbytes;
1040   unsigned char *dst_end = mt->data + mt->allocated;
1041   int nchars = 0;
1042   int last_nchars = 0;
1043   int at_most = converter->at_most > 0 ? converter->at_most : -1;
1044   struct utf_status *status = (struct utf_status *) &(converter->status);
1045   unsigned char b1, b2, b3, b4;
1046   MCharset *charset = NULL;
1047   int error = 0;
1048 
1049   if (status->bom != UTF_BOM_NO)
1050     {
1051       unsigned c;
1052 
1053       ONE_MORE_BASE_BYTE (b1);
1054       ONE_MORE_BYTE (b2);
1055       ONE_MORE_BYTE (b3);
1056       ONE_MORE_BYTE (b4);
1057       c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1058       if (c == 0x0000FEFF)
1059 	status->endian = UTF_BIG_ENDIAN;
1060       else if (c == 0xFFFE0000)
1061 	status->endian = UTF_LITTLE_ENDIAN;
1062       else if (status->bom == UTF_BOM_MAYBE
1063 	       || converter->lenient)
1064 	{
1065 	  status->endian = UTF_BIG_ENDIAN;
1066 	  REWIND_SRC_TO_BASE ();
1067 	}
1068       else
1069 	{
1070 	  error = 1;
1071 	  goto source_end;
1072 	}
1073       status->bom = UTF_BOM_NO;
1074     }
1075 
1076   while (1)
1077     {
1078       unsigned c;
1079       MCharset *this_charset = NULL;
1080 
1081       ONE_MORE_BASE_BYTE (b1);
1082       ONE_MORE_BYTE (b2);
1083       ONE_MORE_BYTE (b3);
1084       ONE_MORE_BYTE (b4);
1085       if (status->endian == UTF_BIG_ENDIAN)
1086 	c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1087       else
1088 	c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1089       if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1090 	goto emit_char;
1091 
1092       if (! converter->lenient)
1093 	break;
1094       REWIND_SRC_TO_BASE ();
1095       ONE_MORE_BYTE (c);
1096       this_charset = mcharset__binary;
1097 
1098     emit_char:
1099       if (this_charset != charset)
1100 	{
1101 	  TAKEIN_CHARS (mt, nchars - last_nchars,
1102 			dst - (mt->data + mt->nbytes), charset);
1103 	  charset = this_charset;
1104 	  last_nchars = nchars;
1105 	}
1106       EMIT_CHAR (c);
1107     }
1108   /* We reach here because of an invalid byte.  */
1109   error = 1;
1110 
1111  source_end:
1112   TAKEIN_CHARS (mt, nchars - last_nchars,
1113 		dst - (mt->data + mt->nbytes), charset);
1114   return finish_decoding (mt, converter, nchars,
1115 			  source, src_end, src_base, error);
1116 }
1117 
1118 
1119 static int
encode_coding_utf_16(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)1120 encode_coding_utf_16 (MText *mt, int from, int to,
1121 		      unsigned char *destination, int dst_bytes,
1122 		      MConverter *converter)
1123 {
1124   unsigned char *src, *src_end;
1125   unsigned char *dst = destination;
1126   unsigned char *dst_end = dst + dst_bytes;
1127   int nchars = 0;
1128   struct utf_status *status = (struct utf_status *) &(converter->status);
1129   int big_endian = status->endian == UTF_BIG_ENDIAN;
1130   enum MTextFormat format = mt->format;
1131 
1132   SET_SRC (mt, format, from, to);
1133 
1134   if (status->bom != UTF_BOM_NO)
1135     {
1136       CHECK_DST (2);
1137       if (big_endian)
1138 	*dst++ = 0xFE, *dst++ = 0xFF;
1139       else
1140 	*dst++ = 0xFF, *dst++ = 0xFE;
1141       status->bom = UTF_BOM_NO;
1142     }
1143 
1144   while (1)
1145     {
1146       int c, bytes;
1147 
1148       ONE_MORE_CHAR (c, bytes, format);
1149 
1150       if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1151 	{
1152 	  CHECK_DST (2);
1153 	  if (big_endian)
1154 	    *dst++ = c >> 8, *dst++ = c & 0xFF;
1155 	  else
1156 	    *dst++ = c & 0xFF, *dst++ = c >> 8;
1157 	}
1158       else if (c >= 0x10000 && c < 0x110000)
1159 	{
1160 	  int c1, c2;
1161 
1162 	  CHECK_DST (4);
1163 	  c -= 0x10000;
1164 	  c1 = (c >> 10) + 0xD800;
1165 	  c2 = (c & 0x3FF) + 0xDC00;
1166 	  if (big_endian)
1167 	    *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1168 	      *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1169 	  else
1170 	    *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1171 	      *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1172 	}
1173       else
1174 	{
1175 	  unsigned char buf[11];
1176 	  int len, i;
1177 
1178 	  if (! converter->lenient)
1179 	    break;
1180 	  len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1181 					  mt, from + nchars);
1182 	  if (len == 0)
1183 	    goto insufficient_destination;
1184 	  if (big_endian)
1185 	    for (i = 0; i < len; i++)
1186 	      *dst++ = 0, *dst++ = buf[i];
1187 	  else
1188 	    for (i = 0; i < len; i++)
1189 	      *dst++ = buf[i], *dst++ = 0;
1190 	}
1191       src += bytes;
1192       nchars++;
1193     }
1194   /* We reach here because of an unsupported char.  */
1195   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1196   goto finish;
1197 
1198  insufficient_destination:
1199   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1200 
1201  finish:
1202   converter->nchars += nchars;
1203   converter->nbytes += dst - destination;
1204   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1205 }
1206 
1207 static int
encode_coding_utf_32(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)1208 encode_coding_utf_32 (MText *mt, int from, int to,
1209 		      unsigned char *destination, int dst_bytes,
1210 		      MConverter *converter)
1211 {
1212   unsigned char *src, *src_end;
1213   unsigned char *dst = destination;
1214   unsigned char *dst_end = dst + dst_bytes;
1215   int nchars = 0;
1216   struct utf_status *status = (struct utf_status *) &(converter->status);
1217   int big_endian = status->endian == UTF_BIG_ENDIAN;
1218   enum MTextFormat format = mt->format;
1219 
1220   SET_SRC (mt, format, from, to);
1221 
1222   if (status->bom != UTF_BOM_NO)
1223     {
1224       CHECK_DST (4);
1225       if (big_endian)
1226 	*dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1227       else
1228 	*dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1229       status->bom = UTF_BOM_NO;
1230     }
1231 
1232   while (1)
1233     {
1234       int c, bytes;
1235 
1236       ONE_MORE_CHAR (c, bytes, format);
1237 
1238       if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1239 	{
1240 	  CHECK_DST (4);
1241 	  if (big_endian)
1242 	    *dst++ = 0x00, *dst++ = c >> 16,
1243 	      *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1244 	  else
1245 	    *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1246 	      *dst++ = c >> 16, *dst++ = 0x00;
1247 	}
1248       else
1249 	{
1250 	  unsigned char buf[11];
1251 	  int len, i;
1252 
1253 	  if (! converter->lenient)
1254 	    break;
1255 	  len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1256 					  mt, from + nchars);
1257 	  if (len == 0)
1258 	    goto insufficient_destination;
1259 	  if (big_endian)
1260 	    for (i = 0; i < len; i++)
1261 	      *dst++ = 0, *dst++ = buf[i];
1262 	  else
1263 	    for (i = 0; i < len; i++)
1264 	      *dst++ = buf[i], *dst++ = 0;
1265 	}
1266       src += bytes;
1267       nchars++;
1268     }
1269   /* We reach here because of an unsupported char.  */
1270   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1271   goto finish;
1272 
1273  insufficient_destination:
1274   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1275 
1276  finish:
1277   converter->nchars += nchars;
1278   converter->nbytes += dst - destination;
1279   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1280 }
1281 
1282 
1283 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022.  */
1284 
1285 #define ISO_CODE_STX	0x02		/* start text */
1286 #define ISO_CODE_SO	0x0E		/* shift-out */
1287 #define ISO_CODE_SI	0x0F		/* shift-in */
1288 #define ISO_CODE_SS2_7	0x19		/* single-shift-2 for 7-bit code */
1289 #define ISO_CODE_ESC	0x1B		/* escape */
1290 #define ISO_CODE_SS2	0x8E		/* single-shift-2 */
1291 #define ISO_CODE_SS3	0x8F		/* single-shift-3 */
1292 
1293 /** Structure pointed by MCodingSystem.extra_spec.  */
1294 
1295 struct iso_2022_spec
1296 {
1297   unsigned flags;
1298 
1299   /** Initial graphic registers (0..3) invoked to each graphic
1300       plane left and right. */
1301   int initial_invocation[2];
1302 
1303   /** Initially designated charsets for each graphic register.  */
1304   MCharset *initial_designation[4];
1305 
1306   int n_designations;
1307   char *designations;
1308 
1309   int use_esc;
1310 };
1311 
1312 struct iso_2022_status
1313 {
1314   int invocation[2];
1315   MCharset *designation[4];
1316   unsigned single_shifting : 1;
1317   unsigned bol : 1;
1318   unsigned r2l : 1;
1319   unsigned utf8_shifting : 1;
1320   MCharset *non_standard_charset;
1321   int non_standard_charset_bytes;
1322   int non_standard_encoding;
1323 };
1324 
1325 enum iso_2022_code_class {
1326   ISO_control_0,		/* Control codes in the range
1327 				   0x00..0x1F and 0x7F, except for the
1328 				   following 4 codes.  */
1329   ISO_shift_out,		/* ISO_CODE_SO (0x0E) */
1330   ISO_shift_in,			/* ISO_CODE_SI (0x0F) */
1331   ISO_single_shift_2_7,		/* ISO_CODE_SS2_7 (0x19) */
1332   ISO_escape,			/* ISO_CODE_SO (0x1B) */
1333   ISO_control_1,		/* Control codes in the range
1334 				   0x80..0x9F, except for the
1335 				   following 3 codes.  */
1336   ISO_single_shift_2,		/* ISO_CODE_SS2 (0x8E) */
1337   ISO_single_shift_3,		/* ISO_CODE_SS3 (0x8F) */
1338   ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1339   ISO_0x20_or_0x7F,	      /* Codes of the values 0x20 or 0x7F.  */
1340   ISO_graphic_plane_0,	 /* Graphic codes in the range 0x21..0x7E.  */
1341   ISO_0xA0_or_0xFF,	      /* Codes of the values 0xA0 or 0xFF.  */
1342   ISO_graphic_plane_1	 /* Graphic codes in the range 0xA1..0xFE.  */
1343 } iso_2022_code_class[256];
1344 
1345 
1346 #define MCODING_ISO_DESIGNATION_MASK	\
1347   (MCODING_ISO_DESIGNATION_G0		\
1348    | MCODING_ISO_DESIGNATION_G1		\
1349    | MCODING_ISO_DESIGNATION_CTEXT	\
1350    | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1351 
1352 static int
setup_coding_iso_2022(MCodingSystem * coding)1353 setup_coding_iso_2022 (MCodingSystem *coding)
1354 {
1355   MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1356   int ncharsets = coding->ncharsets;
1357   struct iso_2022_spec *spec;
1358   int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1359   int i;
1360 
1361   coding->ascii_compatible = 0;
1362 
1363   MSTRUCT_CALLOC (spec, MERROR_CODING);
1364 
1365   spec->flags = info->flags;
1366   spec->initial_invocation[0] = info->initial_invocation[0];
1367   spec->initial_invocation[1] = info->initial_invocation[1];
1368   for (i = 0; i < 4; i++)
1369     spec->initial_designation[i] = NULL;
1370   if (designation_policy)
1371     {
1372       spec->n_designations = ncharsets;
1373       if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1374 	spec->n_designations += mcharset__iso_2022_table.used;
1375       MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1376       for (i = 0; i < spec->n_designations; i++)
1377 	spec->designations[i] = -1;
1378     }
1379   else
1380     {
1381       if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1382 	MERROR (MERROR_CODING, -1);
1383       spec->designations = NULL;
1384     }
1385 
1386   for (i = 0; i < ncharsets; i++)
1387     {
1388       int reg = info->designations[i];
1389 
1390       if (reg != -5
1391 	  && coding->charsets[i]->final_byte > 0
1392 	  && (reg < -4 || reg > 3))
1393 	MERROR (MERROR_CODING, -1);
1394       if (reg >= 0)
1395 	{
1396 	  if (spec->initial_designation[reg])
1397 	    MERROR (MERROR_CODING, -1);
1398 	  spec->initial_designation[reg] = coding->charsets[i];
1399 	}
1400       else if (reg >= -4)
1401 	{
1402 	  if (! designation_policy
1403 	      && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1404 	    MERROR (MERROR_CODING, -1);
1405 	  reg += 4;
1406 	}
1407 
1408       if (designation_policy)
1409 	spec->designations[i] = reg;
1410       if (coding->charsets[i] == mcharset__ascii)
1411 	coding->ascii_compatible = 1;
1412     }
1413 
1414   if (coding->ascii_compatible
1415       && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1416 			 | MCODING_ISO_DESIGNATION_CTEXT
1417 			 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1418 			 | MCODING_ISO_LOCKING_SHIFT)))
1419     coding->ascii_compatible = 0;
1420 
1421   if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1422     for (i = 0; i < mcharset__iso_2022_table.used; i++)
1423       {
1424 	MCharset *charset = mcharset__iso_2022_table.charsets[i];
1425 
1426 	spec->designations[ncharsets + i]
1427 	  = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1428 	      || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1429 	     ? (charset->code_range[0] == 32
1430 		|| charset->code_range[1] == 255)
1431 	     : designation_policy == MCODING_ISO_DESIGNATION_G1);
1432       }
1433 
1434   spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1435 		   || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1436 		       && (spec->initial_designation[2]
1437 			   || spec->initial_designation[3]))
1438 		   || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1439 		       && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1440 		   || (spec->flags & MCODING_ISO_ISO6429));
1441 
1442   coding->extra_spec = (void *) spec;
1443 
1444   return 0;
1445 }
1446 
1447 static int
reset_coding_iso_2022(MConverter * converter)1448 reset_coding_iso_2022 (MConverter *converter)
1449 {
1450   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1451   MCodingSystem *coding = internal->coding;
1452   struct iso_2022_status *status
1453     = (struct iso_2022_status *) &(converter->status);
1454   struct iso_2022_spec *spec;
1455   int i;
1456 
1457   if (! coding->ready
1458       && setup_coding_iso_2022 (coding) < 0)
1459     return -1;
1460   coding->ready = 1;
1461 
1462   spec = (struct iso_2022_spec *) coding->extra_spec;
1463   status->invocation[0] = spec->initial_invocation[0];
1464   status->invocation[1] = spec->initial_invocation[1];
1465   for (i = 0; i < 4; i++)
1466     status->designation[i] = spec->initial_designation[i];
1467   status->single_shifting = 0;
1468   status->bol = 1;
1469   status->r2l = 0;
1470 
1471   return 0;
1472 }
1473 
1474 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev)		  \
1475   do {									  \
1476     MCharset *charset;							  \
1477 									  \
1478     if ((final) < '0' || (final) >= 128)				  \
1479       goto invalid_byte;						  \
1480     if (rev < 0)							  \
1481       {									  \
1482 	charset = MCHARSET_ISO_2022 ((dim), (chars), (final));		  \
1483 	if (! (spec->flags & MCODING_ISO_FULL_SUPPORT))			  \
1484 	  {								  \
1485 	    int i;							  \
1486 									  \
1487 	    for (i = 0; i < coding->ncharsets; i++)			  \
1488 	      if (charset == coding->charsets[i])			  \
1489 		break;							  \
1490 	    if (i == coding->ncharsets)					  \
1491 	      goto invalid_byte;					  \
1492 	  }								  \
1493       }									  \
1494     else								  \
1495       {									  \
1496 	int i;								  \
1497 									  \
1498 	for (i = 0; i < mcharset__iso_2022_table.used; i++)		  \
1499 	  {								  \
1500 	    charset = mcharset__iso_2022_table.charsets[i];		  \
1501 	    if (charset->revision == (rev)				  \
1502 		&& charset->dimension == (dim)				  \
1503 		&& charset->final_byte == (final)			  \
1504 		&& (charset->code_range[1] == (chars)			  \
1505 		    || ((chars) == 96 && charset->code_range[1] == 255))) \
1506 	      break;							  \
1507 	  }								  \
1508 	if (i == mcharset__iso_2022_table.used)				  \
1509 	  goto invalid_byte;						  \
1510       }									  \
1511     status->designation[reg] = charset;					  \
1512   } while (0)
1513 
1514 
1515 static MCharset *
find_ctext_non_standard_charset(char * charset_name)1516 find_ctext_non_standard_charset (char *charset_name)
1517 {
1518   MCharset *charset;
1519 
1520   if (! strcmp (charset_name, "koi8-r"))
1521     charset = MCHARSET (msymbol ("koi8-r"));
1522   else if  (! strcmp (charset_name, "big5-0"))
1523     charset = MCHARSET (msymbol ("big5"));
1524   else
1525     charset = NULL;
1526   return charset;
1527 }
1528 
1529 static int
decode_coding_iso_2022(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)1530 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1531 		       MConverter *converter)
1532 {
1533   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1534   MCodingSystem *coding = internal->coding;
1535   const unsigned char *src = internal->carryover;
1536   const unsigned char *src_stop = src + internal->carryover_bytes;
1537   const unsigned char *src_end = source + src_bytes;
1538   const unsigned char *src_base;
1539   unsigned char *dst = mt->data + mt->nbytes;
1540   unsigned char *dst_end = mt->data + mt->allocated;
1541   int nchars = 0;
1542   int last_nchars = 0;
1543   int at_most = converter->at_most > 0 ? converter->at_most : -1;
1544   struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1545   struct iso_2022_status *status
1546     = (struct iso_2022_status *) &(converter->status);
1547   MCharset *charset0, *charset1, *charset;
1548   int error = 0;
1549   MCharset *cns_charsets[15];
1550 
1551   charset0 = (status->invocation[0] >= 0
1552 	      ? status->designation[status->invocation[0]] : NULL);
1553   charset1 = (status->invocation[1] >= 0
1554 	      ? status->designation[status->invocation[1]] : NULL);
1555   charset = mcharset__ascii;
1556 
1557   if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1558     {
1559       int i;
1560 
1561       memset (cns_charsets, 0, sizeof (cns_charsets));
1562       for (i = 0; i < coding->ncharsets; i++)
1563 	if (coding->charsets[i]->dimension == 2
1564 	    && coding->charsets[i]->code_range[1] == 126)
1565 	  {
1566 	    int final = coding->charsets[i]->final_byte;
1567 
1568 	    if (final >= 'G' && final <= 'M')
1569 	      cns_charsets[final - 'G'] = coding->charsets[i];
1570 	    else if (final < 0)
1571 	      cns_charsets[14] = coding->charsets[i];
1572 	  }
1573     }
1574 
1575   while (1)
1576     {
1577       MCharset *this_charset = NULL;
1578       int c1, c2, c3;
1579 
1580       ONE_MORE_BASE_BYTE (c1);
1581 
1582       if (status->utf8_shifting)
1583 	{
1584 	  int buf[6];
1585 	  int bytes = CHAR_BYTES_BY_HEAD (c1);
1586 	  int i;
1587 
1588 	  buf[0] = c1;
1589 	  for (i = 1; i < bytes; i++)
1590 	    {
1591 	      ONE_MORE_BYTE (c1);
1592 	      buf[i] = c1;
1593 	    }
1594 	  this_charset = UTF8_CHARSET (buf);
1595 	  c1 = STRING_CHAR_UTF8 (buf);
1596 	  goto emit_char;
1597 	}
1598 
1599       if (status->non_standard_encoding > 0)
1600 	{
1601 	  int i;
1602 
1603 	  this_charset = status->non_standard_charset;
1604 	  for (i = 1; i < status->non_standard_charset_bytes; i++)
1605 	    {
1606 	      ONE_MORE_BYTE (c2);
1607 	      c1 = (c1 << 8) | c2;
1608 	    }
1609 	  c1 = DECODE_CHAR (this_charset, c1);
1610 	  goto emit_char;
1611 	}
1612 
1613       switch (iso_2022_code_class[c1])
1614 	{
1615 	case ISO_graphic_plane_0:
1616 	  this_charset = charset0;
1617 	  break;
1618 
1619 	case ISO_0x20_or_0x7F:
1620 	  if (! charset0
1621 	      || (charset0->code_range[0] != 32
1622 		  && charset0->code_range[1] != 255))
1623 	    /* This is SPACE or DEL.  */
1624 	    this_charset = mcharset__ascii;
1625 	  else
1626 	    /* This is a graphic character of plane 0.  */
1627 	    this_charset = charset0;
1628 	  break;
1629 
1630 	case ISO_graphic_plane_1:
1631 	  if (!charset1)
1632 	    goto invalid_byte;
1633 	  this_charset = charset1;
1634 	  break;
1635 
1636 	case ISO_0xA0_or_0xFF:
1637 	  if (! charset1
1638 	      || charset1->code_range[0] == 33
1639 	      || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1640 	    goto invalid_byte;
1641 	  /* This is a graphic character of plane 1. */
1642 	  if (! charset1)
1643 	    goto invalid_byte;
1644 	  this_charset = charset1;
1645 	  break;
1646 
1647 	case ISO_control_0:
1648 	  this_charset = mcharset__ascii;
1649 	  break;
1650 
1651 	case ISO_control_1:
1652 	  goto invalid_byte;
1653 
1654 	case ISO_shift_out:
1655 	  if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1656 	      &&  status->designation[1])
1657 	    {
1658 	      status->invocation[0] = 1;
1659 	      charset0 = status->designation[1];
1660 	      continue;
1661 	    }
1662 	  this_charset = mcharset__ascii;
1663 	  break;
1664 
1665 	case ISO_shift_in:
1666 	  if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1667 	    {
1668 	      status->invocation[0] = 0;
1669 	      charset0 = status->designation[0];
1670 	      continue;
1671 	    }
1672 	  this_charset = mcharset__ascii;
1673 	  break;
1674 
1675 	case ISO_single_shift_2_7:
1676 	  if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1677 	    {
1678 	      this_charset = mcharset__ascii;
1679 	      break;
1680 	    }
1681 	  c1 = 'N';
1682 	  goto label_escape_sequence;
1683 
1684 	case ISO_single_shift_2:
1685 	  if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1686 	    {
1687 	      ONE_MORE_BYTE (c1);
1688 	      if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1689 		  || ! cns_charsets[c1 - 0xA1])
1690 		goto invalid_byte;
1691 	      status->designation[2] = cns_charsets[c1 - 0xA1];
1692 	    }
1693 	  else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1694 	    goto invalid_byte;
1695 	  /* SS2 is handled as an escape sequence of ESC 'N' */
1696 	  c1 = 'N';
1697 	  goto label_escape_sequence;
1698 
1699 	case ISO_single_shift_3:
1700 	  if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1701 	    goto invalid_byte;
1702 	  /* SS2 is handled as an escape sequence of ESC 'O' */
1703 	  c1 = 'O';
1704 	  goto label_escape_sequence;
1705 
1706 	case ISO_control_sequence_introducer:
1707 	  /* CSI is handled as an escape sequence of ESC '[' ...  */
1708 	  c1 = '[';
1709 	  goto label_escape_sequence;
1710 
1711 	case ISO_escape:
1712 	  if (! spec->use_esc)
1713 	    {
1714 	      this_charset = mcharset__ascii;
1715 	      break;
1716 	    }
1717 	  ONE_MORE_BYTE (c1);
1718 	label_escape_sequence:
1719 	  /* Escape sequences handled here are invocation,
1720 	     designation, and direction specification.  */
1721 	  switch (c1)
1722 	    {
1723 	    case '&':	     /* revision of following character set */
1724 	      if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1725 		goto unused_escape_sequence;
1726 	      ONE_MORE_BYTE (c1);
1727 	      if (c1 < '@' || c1 > '~')
1728 		goto invalid_byte;
1729 	      ONE_MORE_BYTE (c1);
1730 	      if (c1 != ISO_CODE_ESC)
1731 		goto invalid_byte;
1732 	      ONE_MORE_BYTE (c1);
1733 	      goto label_escape_sequence;
1734 
1735 	    case '$':	     /* designation of 2-byte character set */
1736 	      if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1737 		goto unused_escape_sequence;
1738 	      ONE_MORE_BYTE (c1);
1739 	      if (c1 >= '@' && c1 <= 'B')
1740 		{ /* designation of JISX0208.1978, GB2312.1980, or
1741 		     JISX0208.1980 */
1742 		  ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1743 		}
1744 	      else if (c1 >= 0x28 && c1 <= 0x2B)
1745 		{ /* designation of (dimension 2, chars 94) character set */
1746 		  ONE_MORE_BYTE (c2);
1747 		  ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1748 		}
1749 	      else if (c1 >= 0x2C && c1 <= 0x2F)
1750 		{ /* designation of (dimension 2, chars 96) character set */
1751 		  ONE_MORE_BYTE (c2);
1752 		  ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1753 		}
1754 	      else
1755 		goto invalid_byte;
1756 	      /* We must update these variables now.  */
1757 	      if (status->invocation[0] >= 0)
1758 		charset0 = status->designation[status->invocation[0]];
1759 	      if (status->invocation[1] >= 0)
1760 		charset1 = status->designation[status->invocation[1]];
1761 	      continue;
1762 
1763 	    case 'n':		/* invocation of locking-shift-2 */
1764 	      if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1765 		  || ! status->designation[2])
1766 		goto invalid_byte;
1767 	      status->invocation[0] = 2;
1768 	      charset0 = status->designation[2];
1769 	      continue;
1770 
1771 	    case 'o':		/* invocation of locking-shift-3 */
1772 	      if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1773 		  || ! status->designation[3])
1774 		goto invalid_byte;
1775 	      status->invocation[0] = 3;
1776 	      charset0 = status->designation[3];
1777 	      continue;
1778 
1779 	    case 'N':		/* invocation of single-shift-2 */
1780 	      if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1781 		     || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1782 		  || ! status->designation[2])
1783 		goto invalid_byte;
1784 	      this_charset = status->designation[2];
1785 	      ONE_MORE_BYTE (c1);
1786 	      if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1787 		goto invalid_byte;
1788 	      break;
1789 
1790 	    case 'O':		/* invocation of single-shift-3 */
1791 	      if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1792 		  || ! status->designation[3])
1793 		goto invalid_byte;
1794 	      this_charset = status->designation[3];
1795 	      ONE_MORE_BYTE (c1);
1796 	      if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1797 		goto invalid_byte;
1798 	      break;
1799 
1800 	    case '[':		/* specification of direction */
1801 	      if (! (spec->flags & MCODING_ISO_ISO6429))
1802 		goto invalid_byte;
1803 	      /* For the moment, nested direction is not supported.
1804 		 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1805 		 left-to-right, and nonzero means right-to-left.  */
1806 	      ONE_MORE_BYTE (c1);
1807 	      switch (c1)
1808 		{
1809 		case ']':	/* end of the current direction */
1810 		case '0':	/* end of the current direction */
1811 		  status->r2l = 0;
1812 		  break;
1813 
1814 		case '1':	/* start of left-to-right direction */
1815 		  ONE_MORE_BYTE (c1);
1816 		  if (c1 != ']')
1817 		    goto invalid_byte;
1818 		  status->r2l = 0;
1819 		  break;
1820 
1821 		case '2':	/* start of right-to-left direction */
1822 		  ONE_MORE_BYTE (c1);
1823 		  if (c1 != ']')
1824 		    goto invalid_byte;
1825 		  status->r2l = 1;
1826 		  break;
1827 
1828 		default:
1829 		  goto invalid_byte;
1830 		}
1831 	      continue;
1832 
1833 	    case '%':
1834 	      {
1835 		char charset_name[16];
1836 		int bytes;
1837 		int i;
1838 
1839 		if (! (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT))
1840 		  goto invalid_byte;
1841 		/* Compound-text uses these escape sequences:
1842 
1843 		ESC % G  -- utf-8 bytes -- ESC % @
1844 		ESC % / 1 M L -- charset name -- STX -- bytes --
1845 		ESC % / 2 M L -- charset name -- STX -- bytes --
1846 		ESC % / 3 M L -- charset name -- STX -- bytes --
1847 		ESC % / 4 M L -- charset name -- STX -- bytes --
1848 
1849 		It also uses this sequence but that is not yet
1850 		supported here.
1851 
1852 		ESC % / 0 M L -- charset name -- STX -- bytes -- */
1853 
1854 		ONE_MORE_BYTE (c1);
1855 		if (c1 == 'G')
1856 		  {
1857 		    status->utf8_shifting = 1;
1858 		    continue;
1859 		  }
1860 		if (c1 == '@')
1861 		  {
1862 		    if (! status->utf8_shifting)
1863 		      goto invalid_byte;
1864 		    status->utf8_shifting = 0;
1865 		    continue;
1866 		  }
1867 		if (c1 != '/')
1868 		  goto invalid_byte;
1869 		ONE_MORE_BYTE (c1);
1870 		if (c1 < '1' || c1 > '4')
1871 		  goto invalid_byte;
1872 		status->non_standard_charset_bytes = c1 - '0';
1873 		ONE_MORE_BYTE (c1);
1874 		ONE_MORE_BYTE (c2);
1875 		if (c1 < 128 || c2 < 128)
1876 		  goto invalid_byte;
1877 		bytes = (c1 - 128) * 128 + (c2 - 128);
1878 		for (i = 0; i < 16; i++)
1879 		  {
1880 		    ONE_MORE_BYTE (c1);
1881 		    if (c1 == ISO_CODE_STX)
1882 		      break;
1883 		    charset_name[i] = TOLOWER (c1);
1884 		  }
1885 		if (i == 16)
1886 		  goto invalid_byte;
1887 		charset_name[i++] = '\0';
1888 		this_charset = find_ctext_non_standard_charset (charset_name);
1889 		if (! this_charset)
1890 		  goto invalid_byte;
1891 		status->non_standard_charset = this_charset;
1892 		status->non_standard_encoding = bytes - i;
1893 		continue;
1894 	      }
1895 
1896 	    default:
1897 	      if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1898 		goto unused_escape_sequence;
1899 	      if (c1 >= 0x28 && c1 <= 0x2B)
1900 		{ /* designation of (dimension 1, chars 94) charset */
1901 		  ONE_MORE_BYTE (c2);
1902 		  ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1903 		}
1904 	      else if (c1 >= 0x2C && c1 <= 0x2F)
1905 		{ /* designation of (dimension 1, chars 96) charset */
1906 		  ONE_MORE_BYTE (c2);
1907 		  ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1908 		}
1909 	      else
1910 		goto invalid_byte;
1911 	      /* We must update these variables now.  */
1912 	      if (status->invocation[0] >= 0)
1913 		charset0 = status->designation[status->invocation[0]];
1914 	      if (status->invocation[1] >= 0)
1915 		charset1 = status->designation[status->invocation[1]];
1916 	      continue;
1917 
1918 	    unused_escape_sequence:
1919 	      UNGET_ONE_BYTE (c1);
1920 	      c1 = ISO_CODE_ESC;
1921 	      this_charset = mcharset__ascii;
1922 	    }
1923 	}
1924 
1925       if (this_charset->dimension == 1)
1926 	{
1927 	  if (this_charset->code_range[1] <= 128)
1928 	    c1 &= 0x7F;
1929 	}
1930       else if (this_charset->dimension == 2)
1931 	{
1932 	  ONE_MORE_BYTE (c2);
1933 	  c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1934 	}
1935       else			/* i.e.  (dimension == 3) */
1936 	{
1937 	  ONE_MORE_BYTE (c2);
1938 	  ONE_MORE_BYTE (c3);
1939 	  c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1940 	}
1941       c1 = DECODE_CHAR (this_charset, c1);
1942       goto emit_char;
1943 
1944     invalid_byte:
1945       if (! converter->lenient)
1946 	break;
1947       REWIND_SRC_TO_BASE ();
1948       c1 = *src++;
1949       this_charset = mcharset__binary;
1950 
1951     emit_char:
1952       if (this_charset != mcharset__ascii
1953 	  && this_charset != charset)
1954 	{
1955 	  TAKEIN_CHARS (mt, nchars - last_nchars,
1956 			dst - (mt->data + mt->nbytes), charset);
1957 	  charset = this_charset;
1958 	  last_nchars = nchars;
1959 	}
1960       EMIT_CHAR (c1);
1961       if (status->non_standard_encoding > 0)
1962 	status->non_standard_encoding -= status->non_standard_charset_bytes;
1963     }
1964   /* We reach here because of an invalid byte.  */
1965   error = 1;
1966 
1967 
1968 
1969  source_end:
1970   TAKEIN_CHARS (mt, nchars - last_nchars,
1971 		dst - (mt->data + mt->nbytes), charset);
1972   return finish_decoding (mt, converter, nchars,
1973 			  source, src_end, src_base, error);
1974 
1975 }
1976 
1977 /* Produce codes (escape sequence) for designating CHARSET to graphic
1978    register REG at DST, and increment DST.  If CHARSET->final-char is
1979    '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1980    sequence of short-form.  Update STATUS->designation.  */
1981 
1982 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status)		   \
1983   do {									   \
1984     char *intermediate_char_94 = "()*+";				   \
1985     char *intermediate_char_96 = ",-./";				   \
1986 									   \
1987     if (dst + 4 > dst_end)						   \
1988       goto memory_shortage;						   \
1989     *dst++ = ISO_CODE_ESC;						   \
1990     if (charset->dimension == 1)					   \
1991       {									   \
1992 	if (charset->code_range[0] != 32				   \
1993 	    && charset->code_range[1] != 255)				   \
1994 	  *dst++ = (unsigned char) (intermediate_char_94[reg]);		   \
1995 	else								   \
1996 	  *dst++ = (unsigned char) (intermediate_char_96[reg]);		   \
1997       }									   \
1998     else								   \
1999       {									   \
2000 	*dst++ = '$';							   \
2001 	if (charset->code_range[0] != 32				   \
2002 	    && charset->code_range[1] != 255)				   \
2003 	  {								   \
2004 	    if (spec->flags & MCODING_ISO_LONG_FORM			   \
2005 		|| reg != 0						   \
2006 		|| charset->final_byte < '@' || charset->final_byte > 'B') \
2007 	      *dst++ = (unsigned char) (intermediate_char_94[reg]);	   \
2008 	  }								   \
2009 	else								   \
2010 	  *dst++ = (unsigned char) (intermediate_char_96[reg]);		   \
2011       }									   \
2012     *dst++ = charset->final_byte;					   \
2013 									   \
2014     status->designation[reg] = charset;					   \
2015   } while (0)
2016 
2017 
2018 /* The following two macros produce codes (control character or escape
2019    sequence) for ISO-2022 single-shift functions (single-shift-2 and
2020    single-shift-3).  */
2021 
2022 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status)	\
2023   do {							\
2024     if (dst + 2 > dst_end)				\
2025       goto memory_shortage;				\
2026     if (! (spec->flags & MCODING_ISO_EIGHT_BIT))	\
2027       *dst++ = ISO_CODE_ESC, *dst++ = 'N';		\
2028     else						\
2029       *dst++ = ISO_CODE_SS2;				\
2030     status->single_shifting = 1;			\
2031   } while (0)
2032 
2033 
2034 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status)	\
2035   do {							\
2036     if (dst + 2 > dst_end)				\
2037       goto memory_shortage;				\
2038     if (! (spec->flags & MCODING_ISO_EIGHT_BIT))	\
2039       *dst++ = ISO_CODE_ESC, *dst++ = 'O';		\
2040     else						\
2041       *dst++ = ISO_CODE_SS3;				\
2042     status->single_shifting = 1;			\
2043   } while (0)
2044 
2045 
2046 /* The following four macros produce codes (control character or
2047    escape sequence) for ISO-2022 locking-shift functions (shift-in,
2048    shift-out, locking-shift-2, and locking-shift-3).  */
2049 
2050 #define ISO2022_ENCODE_SHIFT_IN(status)		\
2051   do {						\
2052     if (dst + 1 > dst_end)			\
2053       goto memory_shortage;			\
2054     *dst++ = ISO_CODE_SI;			\
2055     status->invocation[0] = 0;			\
2056   } while (0)
2057 
2058 
2059 #define ISO2022_ENCODE_SHIFT_OUT(status)	\
2060   do {						\
2061     if (dst + 1 > dst_end)			\
2062       goto memory_shortage;			\
2063     *dst++ = ISO_CODE_SO;			\
2064     status->invocation[0] = 1;			\
2065   } while (0)
2066 
2067 
2068 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status)	\
2069   do {						\
2070     if (dst + 2 > dst_end)			\
2071       goto memory_shortage;			\
2072     *dst++ = ISO_CODE_ESC, *dst++ = 'n';	\
2073     status->invocation[0] = 2;			\
2074   } while (0)
2075 
2076 
2077 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status)	\
2078   do {						\
2079     if (dst + 2 > dst_end)			\
2080       goto memory_shortage;			\
2081     *dst++ = ISO_CODE_ESC, *dst++ = 'o';	\
2082     status->invocation[0] = 3;			\
2083   } while (0)
2084 
2085 #define ISO2022_ENCODE_UTF8_SHIFT_START(len)	\
2086   do {						\
2087     CHECK_DST (3 + len);			\
2088     *dst++ = ISO_CODE_ESC;			\
2089     *dst++ = '%';				\
2090     *dst++ = 'G';				\
2091     status->utf8_shifting = 1;			\
2092   } while (0)
2093 
2094 
2095 #define ISO2022_ENCODE_UTF8_SHIFT_END()	\
2096   do {					\
2097     CHECK_DST (3);			\
2098     *dst++ = ISO_CODE_ESC;		\
2099     *dst++ = '%';			\
2100     *dst++ = '@';			\
2101     status->utf8_shifting = 0;		\
2102   } while (0)
2103 
2104 
2105 #define ISO2022_ENCODE_NON_STANDARD(name, len)			\
2106   do {								\
2107     CHECK_DST (6 + len + 1 + non_standard_charset_bytes);	\
2108     non_standard_begin = dst;					\
2109     *dst++ = ISO_CODE_ESC;					\
2110     *dst++ = '%';						\
2111     *dst++ = '/';						\
2112     *dst++ = '0' + non_standard_charset_bytes;			\
2113     *dst++ = 0, *dst++ = 0;	/* filled later */		\
2114     memcpy (dst, name, len);					\
2115     dst += len;							\
2116     *dst++ = ISO_CODE_STX;					\
2117     non_standard_bytes = len + 1;				\
2118   } while (0)
2119 
2120 
2121 static char *
find_ctext_non_standard_name(MCharset * charset,int * bytes)2122 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2123 {
2124   char *name = msymbol_name (charset->name);
2125 
2126   if (! strcmp (name, "koi8-r"))
2127     *bytes = 1;
2128   else if (! strcmp (name, "big5"))
2129     name = "big5-0", *bytes = 2;
2130   else
2131     return NULL;
2132   return name;
2133 }
2134 
2135 /* Designate CHARSET to a graphic register specified in
2136    SPEC->designation.  If the register is not yet invoked to graphic
2137    left not right, invoke it to graphic left.  DSTP points to a
2138    variable containing a memory address where the output must go.
2139    DST_END is the limit of that memory.
2140 
2141    Return 0 if it succeeds.  Return -1 otherwise, which means that the
2142    memory area is too short.  By side effect, update the variable that
2143    DSTP points to.  */
2144 
2145 static int
iso_2022_designate_invoke_charset(MCodingSystem * coding,MCharset * charset,struct iso_2022_spec * spec,struct iso_2022_status * status,unsigned char ** dstp,unsigned char * dst_end)2146 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2147 				   MCharset *charset,
2148 				   struct iso_2022_spec *spec,
2149 				   struct iso_2022_status *status,
2150 				   unsigned char **dstp,
2151 				   unsigned char *dst_end)
2152 {
2153   int i;
2154   unsigned char *dst = *dstp;
2155 
2156   for (i = 0; i < 4; i++)
2157     if (charset == status->designation[i])
2158       break;
2159 
2160   if (i >= 4)
2161     {
2162       /* CHARSET is not yet designated to any graphic registers.  */
2163       for (i = 0; i < coding->ncharsets; i++)
2164 	if (charset == coding->charsets[i])
2165 	  break;
2166       if (i == coding->ncharsets)
2167 	{
2168 	  for (i = 0; i < mcharset__iso_2022_table.used; i++)
2169 	    if (charset == mcharset__iso_2022_table.charsets[i])
2170 	      break;
2171 	  i += coding->ncharsets;
2172 	}
2173       i = spec->designations[i];
2174       ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2175     }
2176 
2177   if (status->invocation[0] != i
2178       && status->invocation[1] != i)
2179     {
2180       /* Graphic register I is not yet invoked.  */
2181       switch (i)
2182 	{
2183 	case 0:			/* graphic register 0 */
2184 	  ISO2022_ENCODE_SHIFT_IN (status);
2185 	  break;
2186 
2187 	case 1:			/* graphic register 1 */
2188 	  ISO2022_ENCODE_SHIFT_OUT (status);
2189 	  break;
2190 
2191 	case 2:			/* graphic register 2 */
2192 	  if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2193 	    ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2194 	  else
2195 	    ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2196 	  break;
2197 
2198 	case 3:			/* graphic register 3 */
2199 	  if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2200 	    ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2201 	  else
2202 	    ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2203 	  break;
2204 	}
2205     }
2206   *dstp = dst;
2207   return 0;
2208 
2209  memory_shortage:
2210   *dstp = dst;
2211   return -1;
2212 }
2213 
2214 
2215 /* Reset the invocation/designation status to the initial one.  SPEC
2216    and STATUS contain information about the current and initial
2217    invocation /designation status respectively.  DSTP points to a
2218    variable containing a memory address where the output must go.
2219    DST_END is the limit of that memory.
2220 
2221    Return 0 if it succeeds.  Return -1 otherwise, which means that the
2222    memory area is too short.  By side effect, update the variable that
2223    DSTP points to.  */
2224 
2225 static int
iso_2022_reset_invocation_designation(struct iso_2022_spec * spec,struct iso_2022_status * status,unsigned char ** dstp,unsigned char * dst_end)2226 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2227 				       struct iso_2022_status *status,
2228 				       unsigned char **dstp,
2229 				       unsigned char *dst_end)
2230 {
2231   unsigned char *dst = *dstp;
2232   int i;
2233 
2234   /* Reset the invocation status of GL.  We have not yet supported GR
2235      invocation.  */
2236   if (status->invocation[0] != spec->initial_invocation[0]
2237 	&& spec->initial_invocation[0] >= 0)
2238     {
2239       if (spec->initial_invocation[0] == 0)
2240 	ISO2022_ENCODE_SHIFT_IN (status);
2241       else if (spec->initial_invocation[0] == 1)
2242 	ISO2022_ENCODE_SHIFT_OUT (status);
2243       else if (spec->initial_invocation[0] == 2)
2244 	ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2245       else			/* i.e. spec->initial_invocation[0] == 3 */
2246 	ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2247     }
2248 
2249   /* Reset the designation status of G0..G3.  */
2250   for (i = 0; i < 4; i++)
2251     if (status->designation[i] != spec->initial_designation[i]
2252 	&& spec->initial_designation[i])
2253       {
2254 	MCharset *charset = spec->initial_designation[i];
2255 
2256 	ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2257       }
2258 
2259   *dstp = dst;
2260   return 0;
2261 
2262  memory_shortage:
2263   *dstp = dst;
2264   return -1;
2265 }
2266 
2267 
2268 static int
encode_coding_iso_2022(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)2269 encode_coding_iso_2022 (MText *mt, int from, int to,
2270 		       unsigned char *destination, int dst_bytes,
2271 		       MConverter *converter)
2272 {
2273   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2274   MCodingSystem *coding = internal->coding;
2275   unsigned char *src, *src_end;
2276   unsigned char *dst = destination;
2277   unsigned char *dst_end = dst + dst_bytes;
2278   int nchars = 0;
2279   unsigned char *dst_base;
2280   struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2281   int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2282   struct iso_2022_status *status
2283     = (struct iso_2022_status *) &(converter->status);
2284   MCharset *primary, *charset0, *charset1;
2285   int next_primary_change;
2286   int ncharsets = coding->ncharsets;
2287   MCharset **charsets = coding->charsets;
2288   MCharset *cns_charsets[15];
2289   int ascii_compatible = coding->ascii_compatible;
2290   MCharset *non_standard_charset = NULL;
2291   int non_standard_charset_bytes = 0;
2292   int non_standard_bytes = 0;
2293   unsigned char *non_standard_begin = NULL;
2294   enum MTextFormat format = mt->format;
2295 
2296   SET_SRC (mt, format, from, to);
2297 
2298   if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2299     {
2300       int i;
2301 
2302       memset (cns_charsets, 0, sizeof (cns_charsets));
2303       for (i = 0; i < ncharsets; i++)
2304 	if (charsets[i]->dimension == 2)
2305 	  {
2306 	    int final = charsets[i]->final_byte;
2307 
2308 	    if (final >= 'G' && final <= 'M')
2309 	      cns_charsets[final - 'G'] = charsets[i];
2310 	    else if (final < 0)
2311 	      cns_charsets[14] = charsets[i];
2312 	  }
2313     }
2314 
2315   next_primary_change = from;
2316   primary = NULL;
2317   charset0 = status->designation[status->invocation[0]];
2318   charset1 = (status->invocation[1] < 0 ? NULL
2319 	      : status->designation[status->invocation[1]]);
2320 
2321   while (1)
2322     {
2323       int bytes, c;
2324 
2325       dst_base = dst;
2326       ONE_MORE_CHAR (c, bytes, format);
2327 
2328       if (c < 128 && ascii_compatible)
2329 	{
2330 	  if (status->utf8_shifting)
2331 	    ISO2022_ENCODE_UTF8_SHIFT_END ();
2332 	  CHECK_DST (1);
2333 	  *dst++ = c;
2334 	}
2335       else if (c <= 32 || c == 127)
2336 	{
2337 	  if (status->utf8_shifting)
2338 	    ISO2022_ENCODE_UTF8_SHIFT_END ();
2339 	  if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2340 	      || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2341 	    {
2342 	      if (iso_2022_reset_invocation_designation (spec, status,
2343 							 &dst, dst_end) < 0)
2344 		goto insufficient_destination;
2345 	      charset0 = status->designation[status->invocation[0]];
2346 	      charset1 = (status->invocation[1] < 0 ? NULL
2347 			  : status->designation[status->invocation[1]]);
2348 	    }
2349 	  CHECK_DST (1);
2350 	  *dst++ = c;
2351 	}
2352       else
2353 	{
2354 	  unsigned code = MCHAR_INVALID_CODE;
2355 	  MCharset *charset = NULL;
2356 	  int gr_mask;
2357 	  int pos = from + nchars;
2358 
2359 	  if (pos >= next_primary_change)
2360 	    {
2361 	      MSymbol primary_charset
2362 		= (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2363 	      primary = MCHARSET (primary_charset);
2364 	      if (primary && primary != mcharset__binary)
2365 		{
2366 		  if (primary->final_byte <= 0)
2367 		    primary = NULL;
2368 		  else if (! full_support)
2369 		    {
2370 		      int i;
2371 
2372 		      for (i = 0; i < ncharsets; i++)
2373 			if (primary == charsets[i])
2374 			  break;
2375 		      if (i == ncharsets)
2376 			primary = NULL;
2377 		    }
2378 		}
2379 
2380 	      mtext_prop_range (mt, Mcharset, pos,
2381 				NULL, &next_primary_change, 0);
2382 	    }
2383 
2384 	  if (primary && primary != mcharset__binary)
2385 	    {
2386 	      code = ENCODE_CHAR (primary, c);
2387 	      if (code != MCHAR_INVALID_CODE)
2388 		charset = primary;
2389 	    }
2390 	  if (! charset)
2391 	    {
2392 	      if (c <= 32 || c == 127)
2393 		{
2394 		  code = c;
2395 		  charset = mcharset__ascii;
2396 		}
2397 	      else
2398 		{
2399 		  int i;
2400 
2401 		  for (i = 0; i < ncharsets; i++)
2402 		    {
2403 		      charset = charsets[i];
2404 		      code = ENCODE_CHAR (charset, c);
2405 		      if (code != MCHAR_INVALID_CODE)
2406 			break;
2407 		    }
2408 		  if (i == ncharsets)
2409 		    {
2410 		      if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2411 			{
2412 			  for (i = 0; i < mcharset__iso_2022_table.used; i++)
2413 			    {
2414 			      charset = mcharset__iso_2022_table.charsets[i];
2415 			      code = ENCODE_CHAR (charset, c);
2416 			      if (code != MCHAR_INVALID_CODE)
2417 				break;
2418 			    }
2419 			  if (i == mcharset__iso_2022_table.used)
2420 			    {
2421 			      if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2422 				goto unsupported_char;
2423 			      converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2424 			      goto finish;
2425 			    }
2426 			}
2427 		      else
2428 			goto unsupported_char;
2429 		    }
2430 		}
2431 	    }
2432 
2433 	  if (charset
2434 	      && (charset->final_byte >= 0
2435 		  || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2436 	    {
2437 	      if (code >= 0x80 && code < 0xA0)
2438 		goto unsupported_char;
2439 	      code &= 0x7F7F7F7F;
2440 	      if (status->utf8_shifting)
2441 		ISO2022_ENCODE_UTF8_SHIFT_END ();
2442 	      if (charset == charset0)
2443 		gr_mask = 0;
2444 	      else if (charset == charset1)
2445 		gr_mask = 0x80;
2446 	      else
2447 		{
2448 		  unsigned char *p = NULL;
2449 
2450 		  if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2451 		    {
2452 		      int i;
2453 
2454 		      if (cns_charsets[0] == charset)
2455 			{
2456 			  CHECK_DST (2);
2457 			}
2458 		      else
2459 			{
2460 			  for (i = 1; i < 15; i++)
2461 			    if (cns_charsets[i] == charset)
2462 			      break;
2463 			  CHECK_DST (4);
2464 			  *dst++ = ISO_CODE_SS2;
2465 			  *dst++ = 0xA1 + i;
2466 			}
2467 		      status->single_shifting = 1;
2468 		      p = dst;
2469 		    }
2470 		  else
2471 		    {
2472 		      if (iso_2022_designate_invoke_charset
2473 			  (coding, charset, spec, status, &dst, dst_end) < 0)
2474 			goto insufficient_destination;
2475 		      charset0 = status->designation[status->invocation[0]];
2476 		      charset1 = (status->invocation[1] < 0 ? NULL
2477 				  : status->designation[status->invocation[1]]);
2478 		    }
2479 		  if (status->single_shifting)
2480 		    gr_mask
2481 		      = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2482 		  else if (charset == charset0)
2483 		    gr_mask = 0;
2484 		  else
2485 		    gr_mask = 0x80;
2486 		}
2487 	      if (charset->dimension == 1)
2488 		{
2489 		  CHECK_DST (1);
2490 		  *dst++ = code | gr_mask;
2491 		}
2492 	      else if (charset->dimension == 2)
2493 		{
2494 		  CHECK_DST (2);
2495 		  *dst++ = (code >> 8) | gr_mask;
2496 		  *dst++ = (code & 0xFF) | gr_mask;
2497 		}
2498 	      else
2499 		{
2500 		  CHECK_DST (3);
2501 		  *dst++ = (code >> 16) | gr_mask;
2502 		  *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2503 		  *dst++ = (code & 0xFF) | gr_mask;
2504 		}
2505 	      status->single_shifting = 0;
2506 	    }
2507 	  else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2508 	    {
2509 	      if (charset != non_standard_charset)
2510 		{
2511 		  char *name = (find_ctext_non_standard_name
2512 				(charset, &non_standard_charset_bytes));
2513 
2514 		  if (name)
2515 		    {
2516 		      int len = strlen (name);
2517 
2518 		      ISO2022_ENCODE_NON_STANDARD (name, len);
2519 		      non_standard_charset = charset;
2520 		    }
2521 		  else
2522 		    non_standard_charset = NULL;
2523 		}
2524 
2525 	      if (non_standard_charset)
2526 		{
2527 		  if (dst + non_standard_charset_bytes > dst_end)
2528 		    goto insufficient_destination;
2529 		  non_standard_bytes += non_standard_charset_bytes;
2530 		  non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2531 		  non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2532 		  if (non_standard_charset_bytes == 1)
2533 		    *dst++ = code;
2534 		  else if (non_standard_charset_bytes == 2)
2535 		    *dst++ = code >> 8, *dst++ = code & 0xFF;
2536 		  else if (non_standard_charset_bytes == 3)
2537 		    *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2538 		      *dst++ = code & 0xFF;
2539 		  else		/* i.e non_standard_charset_bytes == 3 */
2540 		    *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2541 		      *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2542 		}
2543 	      else
2544 		{
2545 		  int len = CHAR_BYTES (c);
2546 
2547 		  if (c >= 0x110000)
2548 		    goto unsupported_char;
2549 		  if (! status->utf8_shifting)
2550 		    ISO2022_ENCODE_UTF8_SHIFT_START (len);
2551 		  else
2552 		    CHECK_DST (len);
2553 		  CHAR_STRING (c, dst);
2554 		}
2555 	    }
2556 	  else
2557 	    goto unsupported_char;
2558 	}
2559       src += bytes;
2560       nchars++;
2561       continue;
2562 
2563     unsupported_char:
2564       {
2565 	int len;
2566 
2567 	if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2568 					       spec, status,
2569 					       &dst, dst_end) < 0)
2570 	  goto insufficient_destination;
2571 	if (! converter->lenient)
2572 	  break;
2573 	len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2574 	if (len == 0)
2575 	  goto insufficient_destination;
2576 	dst += len;
2577 	src += bytes;
2578 	nchars++;
2579       }
2580     }
2581   /* We reach here because of an unsupported char.  */
2582   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2583   goto finish;
2584 
2585  insufficient_destination:
2586   dst = dst_base;
2587   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2588 
2589  finish:
2590   if (converter->result == MCONVERSION_RESULT_SUCCESS
2591       && converter->last_block)
2592     {
2593       if (status->utf8_shifting)
2594 	{
2595 	  ISO2022_ENCODE_UTF8_SHIFT_END ();
2596 	  dst_base = dst;
2597 	}
2598       if (spec->flags & MCODING_ISO_RESET_AT_EOL
2599 	  && charset0 != spec->initial_designation[0])
2600 	{
2601 	  if (iso_2022_reset_invocation_designation (spec, status,
2602 						     &dst, dst_end) < 0)
2603 	    goto insufficient_destination;
2604 	}
2605     }
2606   converter->nchars += nchars;
2607   converter->nbytes += dst - destination;
2608   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2609 }
2610 
2611 
2612 /* Staffs for coding-systems of type MCODING_TYPE_MISC.  */
2613 
2614 /* For SJIS handling... */
2615 
2616 #define SJIS_TO_JIS(s1, s2)				\
2617   (s2 >= 0x9F						\
2618    ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8)	\
2619       | (s2 - 0x7E))					\
2620    : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8)	\
2621       | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2622 
2623 #define JIS_TO_SJIS(c1, c2)				\
2624   ((c1 & 1)						\
2625    ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8)	\
2626       | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F)))		\
2627    : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8)	\
2628       | (c2 + 0x7E)))
2629 
2630 
2631 static int
reset_coding_sjis(MConverter * converter)2632 reset_coding_sjis (MConverter *converter)
2633 {
2634   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2635   MCodingSystem *coding = internal->coding;
2636 
2637   if (! coding->ready)
2638     {
2639       MSymbol kanji_sym = msymbol ("jisx0208.1983");
2640       MCharset *kanji = MCHARSET (kanji_sym);
2641       MSymbol kana_sym = msymbol ("jisx0201-kana");
2642       MCharset *kana = MCHARSET (kana_sym);
2643 
2644       if (! kanji || ! kana)
2645 	return -1;
2646       coding->ncharsets = 3;
2647       coding->charsets[1] = kanji;
2648       coding->charsets[2] = kana;
2649     }
2650   coding->ready = 1;
2651   return 0;
2652 }
2653 
2654 static int
decode_coding_sjis(const unsigned char * source,int src_bytes,MText * mt,MConverter * converter)2655 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2656 		    MConverter *converter)
2657 {
2658   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2659   MCodingSystem *coding = internal->coding;
2660   const unsigned char *src = internal->carryover;
2661   const unsigned char *src_stop = src + internal->carryover_bytes;
2662   const unsigned char *src_end = source + src_bytes;
2663   const unsigned char *src_base;
2664   unsigned char *dst = mt->data + mt->nbytes;
2665   unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2666   int nchars = 0;
2667   int last_nchars = 0;
2668   int at_most = converter->at_most > 0 ? converter->at_most : -1;
2669 
2670   MCharset *charset_roman = coding->charsets[0];
2671   MCharset *charset_kanji = coding->charsets[1];
2672   MCharset *charset_kana = coding->charsets[2];
2673   MCharset *charset = mcharset__ascii;
2674   int error = 0;
2675 
2676   while (1)
2677     {
2678       MCharset *this_charset;
2679       int c, c1, c2;
2680 
2681       ONE_MORE_BASE_BYTE (c1);
2682 
2683       c2 = -1;
2684       if (c1 < 0x80)
2685 	{
2686 	  this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2687 			  ? mcharset__ascii
2688 			  : charset_roman);
2689 	}
2690       else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2691 	{
2692 	  ONE_MORE_BYTE (c2);
2693 	  if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2694 	    {
2695 	      this_charset = charset_kanji;
2696 	      c1 = SJIS_TO_JIS (c1, c2);
2697 	    }
2698 	  else
2699 	    goto invalid_byte;
2700 	}
2701       else if (c1 >= 0xA1 && c1 <= 0xDF)
2702 	{
2703 	  this_charset = charset_kana;
2704 	  c1 &= 0x7F;
2705 	}
2706       else
2707 	goto invalid_byte;
2708 
2709       c = DECODE_CHAR (this_charset, c1);
2710       if (c >= 0)
2711 	goto emit_char;
2712 
2713     invalid_byte:
2714       if (! converter->lenient)
2715 	break;
2716       REWIND_SRC_TO_BASE ();
2717       c = *src++;
2718       this_charset = mcharset__binary;
2719 
2720     emit_char:
2721       if (this_charset != mcharset__ascii
2722 	  && this_charset != charset)
2723 	{
2724 	  TAKEIN_CHARS (mt, nchars - last_nchars,
2725 			dst - (mt->data + mt->nbytes), charset);
2726 	  charset = this_charset;
2727 	  last_nchars = nchars;
2728 	}
2729       EMIT_CHAR (c);
2730     }
2731   /* We reach here because of an invalid byte.  */
2732   error = 1;
2733 
2734  source_end:
2735   TAKEIN_CHARS (mt, nchars - last_nchars,
2736 		dst - (mt->data + mt->nbytes), charset);
2737   return finish_decoding (mt, converter, nchars,
2738 			  source, src_end, src_base, error);
2739 }
2740 
2741 static int
encode_coding_sjis(MText * mt,int from,int to,unsigned char * destination,int dst_bytes,MConverter * converter)2742 encode_coding_sjis (MText *mt, int from, int to,
2743 		    unsigned char *destination, int dst_bytes,
2744 		    MConverter *converter)
2745 {
2746   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2747   MCodingSystem *coding = internal->coding;
2748   unsigned char *src, *src_end;
2749   unsigned char *dst = destination;
2750   unsigned char *dst_end = dst + dst_bytes;
2751   int nchars = 0;
2752   MCharset *charset_roman = coding->charsets[0];
2753   MCharset *charset_kanji = coding->charsets[1];
2754   MCharset *charset_kana = coding->charsets[2];
2755   enum MTextFormat format = mt->format;
2756 
2757   SET_SRC (mt, format, from, to);
2758 
2759   while (1)
2760     {
2761       int c, bytes, len;
2762       unsigned code;
2763 
2764       ONE_MORE_CHAR (c, bytes, format);
2765 
2766       if (c <= 0x20 || c == 0x7F)
2767 	{
2768 	  CHECK_DST (1);
2769 	  *dst++ = c;
2770 	}
2771       else
2772 	{
2773 	  if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2774 	    {
2775 	      CHECK_DST (1);
2776 	      *dst++ = c;
2777 	    }
2778 	  else if ((code = ENCODE_CHAR (charset_kanji, c))
2779 		   != MCHAR_INVALID_CODE)
2780 	    {
2781 	      int c1 = code >> 8, c2 = code & 0xFF;
2782 	      code = JIS_TO_SJIS (c1, c2);
2783 	      CHECK_DST (2);
2784 	      *dst++ = code >> 8;
2785 	      *dst++ = code & 0xFF;
2786 	    }
2787 	  else if ((code = ENCODE_CHAR (charset_kana, c))
2788 		   != MCHAR_INVALID_CODE)
2789 	    {
2790 	      CHECK_DST (1);
2791 	      *dst++ = code | 0x80;
2792 	    }
2793 	  else
2794 	    {
2795 	      if (! converter->lenient)
2796 		break;
2797 	      len = encode_unsupporeted_char (c, dst, dst_end,
2798 					      mt, from + nchars);
2799 	      if (len == 0)
2800 		goto insufficient_destination;
2801 	      dst += len;
2802 	    }
2803 	}
2804       src += bytes;
2805       nchars++;
2806     }
2807   /* We reach here because of an unsupported char.  */
2808   converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2809   goto finish;
2810 
2811  insufficient_destination:
2812   converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2813 
2814  finish:
2815   converter->nchars += nchars;
2816   converter->nbytes += dst - destination;
2817   return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2818 }
2819 
2820 
2821 static MCodingSystem *
find_coding(MSymbol name)2822 find_coding (MSymbol name)
2823 {
2824   MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2825 
2826   if (! coding)
2827     {
2828       MPlist *plist, *pl;
2829       MSymbol sym = msymbol__canonicalize (name);
2830 
2831       plist = mplist_find_by_key (coding_definition_list, sym);
2832       if (! plist)
2833 	return NULL;
2834       pl = MPLIST_PLIST (plist);
2835       name = MPLIST_VAL (pl);
2836       mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2837 			   NULL, NULL, NULL, NULL);
2838       coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2839       plist = mplist_pop (plist);
2840       M17N_OBJECT_UNREF (plist);
2841     }
2842   return coding;
2843 }
2844 
2845 #define BINDING_NONE 0
2846 #define BINDING_BUFFER 1
2847 #define BINDING_STREAM 2
2848 
2849 #define CONVERT_WORKSIZE 0x10000
2850 
2851 
2852 /* Internal API */
2853 
2854 int
mcoding__init(void)2855 mcoding__init (void)
2856 {
2857   int i;
2858   MPlist *param, *charsets, *pl;
2859 
2860   MLIST_INIT1 (&coding_list, codings, 128);
2861   coding_definition_list = mplist ();
2862 
2863   /* ISO-2022 specific initialize routine.  */
2864   for (i = 0; i < 0x20; i++)
2865     iso_2022_code_class[i] = ISO_control_0;
2866   for (i = 0x21; i < 0x7F; i++)
2867     iso_2022_code_class[i] = ISO_graphic_plane_0;
2868   for (i = 0x80; i < 0xA0; i++)
2869     iso_2022_code_class[i] = ISO_control_1;
2870   for (i = 0xA1; i < 0xFF; i++)
2871     iso_2022_code_class[i] = ISO_graphic_plane_1;
2872   iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2873   iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2874   iso_2022_code_class[0x0E] = ISO_shift_out;
2875   iso_2022_code_class[0x0F] = ISO_shift_in;
2876   iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2877   iso_2022_code_class[0x1B] = ISO_escape;
2878   iso_2022_code_class[0x8E] = ISO_single_shift_2;
2879   iso_2022_code_class[0x8F] = ISO_single_shift_3;
2880   iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2881 
2882   Mcoding = msymbol ("coding");
2883 
2884   Mutf = msymbol ("utf");
2885   Miso_2022 = msymbol ("iso-2022");
2886 
2887   Mreset_at_eol = msymbol ("reset-at-eol");
2888   Mreset_at_cntl = msymbol ("reset-at-cntl");
2889   Meight_bit = msymbol ("eight-bit");
2890   Mlong_form = msymbol ("long-form");
2891   Mdesignation_g0 = msymbol ("designation-g0");
2892   Mdesignation_g1 = msymbol ("designation-g1");
2893   Mdesignation_ctext = msymbol ("designation-ctext");
2894   Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2895   Mlocking_shift = msymbol ("locking-shift");
2896   Msingle_shift = msymbol ("single-shift");
2897   Msingle_shift_7 = msymbol ("single-shift-7");
2898   Meuc_tw_shift = msymbol ("euc-tw-shift");
2899   Miso_6429 = msymbol ("iso-6429");
2900   Mrevision_number = msymbol ("revision-number");
2901   Mfull_support = msymbol ("full-support");
2902   Mmaybe = msymbol ("maybe");
2903 
2904   Mtype = msymbol ("type");
2905   Mcharsets = msymbol_as_managing_key ("charsets");
2906   Mflags = msymbol_as_managing_key ("flags");
2907   Mdesignation = msymbol_as_managing_key ("designation");
2908   Minvocation = msymbol_as_managing_key ("invocation");
2909   Mcode_unit = msymbol ("code-unit");
2910   Mbom = msymbol ("bom");
2911   Mlittle_endian = msymbol ("little-endian");
2912 
2913   param = mplist ();
2914   charsets = mplist ();
2915   pl = param;
2916   /* Setup predefined codings.  */
2917   mplist_set (charsets, Msymbol, Mcharset_ascii);
2918   pl = mplist_add (pl, Mtype, Mcharset);
2919   pl = mplist_add (pl, Mcharsets, charsets);
2920   Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2921 					  NULL, NULL, NULL, NULL);
2922 
2923   {
2924     MSymbol alias = msymbol ("ANSI_X3.4-1968");
2925     MCodingSystem *coding
2926       = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2927 
2928     msymbol_put (alias, Mcoding, coding);
2929     alias = msymbol__canonicalize (alias);
2930     msymbol_put (alias, Mcoding, coding);
2931   }
2932 
2933   mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2934   Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2935 					    NULL, NULL, NULL, NULL);
2936 
2937   mplist_set (charsets, Msymbol, Mcharset_m17n);
2938   mplist_put (param, Mtype, Mutf);
2939   mplist_put (param, Mcode_unit, (void *) 8);
2940   Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2941 					    NULL, NULL, NULL, NULL);
2942 
2943   mplist_set (charsets, Msymbol, Mcharset_unicode);
2944   Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2945 				       NULL, NULL, NULL, NULL);
2946 
2947   mplist_put (param, Mcode_unit, (void *) 16);
2948   mplist_put (param, Mbom, Mmaybe);
2949 #ifndef WORDS_BIGENDIAN
2950   mplist_put (param, Mlittle_endian, Mt);
2951 #endif
2952   Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2953 					NULL, NULL, NULL, NULL);
2954 
2955   mplist_put (param, Mcode_unit, (void *) 32);
2956   Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2957 					NULL, NULL, NULL, NULL);
2958 
2959   mplist_put (param, Mcode_unit, (void *) 16);
2960   mplist_put (param, Mbom, Mnil);
2961   mplist_put (param, Mlittle_endian, Mnil);
2962   Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2963 					  NULL, NULL, NULL, NULL);
2964 
2965   mplist_put (param, Mcode_unit, (void *) 32);
2966   Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2967 					  NULL, NULL, NULL, NULL);
2968 
2969   mplist_put (param, Mcode_unit, (void *) 16);
2970   mplist_put (param, Mlittle_endian, Mt);
2971   Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2972 					  NULL, NULL, NULL, NULL);
2973 
2974   mplist_put (param, Mcode_unit, (void *) 32);
2975   Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2976 					  NULL, NULL, NULL, NULL);
2977 
2978   mplist_put (param, Mtype, Mnil);
2979   pl = mplist ();
2980   mplist_add (pl, Msymbol, msymbol ("Shift_JIS"));
2981   mplist_put (param, Maliases, pl);
2982   mplist_set (charsets, Msymbol, Mcharset_ascii);
2983   Mcoding_sjis = mconv_define_coding ("sjis", param,
2984 				      reset_coding_sjis,
2985 				      decode_coding_sjis,
2986 				      encode_coding_sjis, NULL);
2987 
2988   M17N_OBJECT_UNREF (charsets);
2989   M17N_OBJECT_UNREF (param);
2990   M17N_OBJECT_UNREF (pl);
2991 
2992   return 0;
2993 }
2994 
2995 void
mcoding__fini(void)2996 mcoding__fini (void)
2997 {
2998   int i;
2999   MPlist *plist;
3000 
3001   for (i = 0; i < coding_list.used; i++)
3002     {
3003       MCodingSystem *coding = coding_list.codings[i];
3004 
3005       if (coding->extra_info)
3006 	free (coding->extra_info);
3007       if (coding->extra_spec)
3008 	{
3009 	  if (coding->type == Miso_2022)
3010 	    free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3011 	  free (coding->extra_spec);
3012 	}
3013       free (coding);
3014     }
3015   MLIST_FREE1 (&coding_list, codings);
3016   MPLIST_DO (plist, coding_definition_list)
3017     M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3018   M17N_OBJECT_UNREF (coding_definition_list);
3019 }
3020 
3021 void
mconv__register_charset_coding(MSymbol sym)3022 mconv__register_charset_coding (MSymbol sym)
3023 {
3024   MSymbol name = msymbol__canonicalize (sym);
3025 
3026   if (! mplist_find_by_key (coding_definition_list, name))
3027     {
3028       MPlist *param = mplist (), *charsets = mplist ();
3029 
3030       mplist_set (charsets, Msymbol, sym);
3031       mplist_add (param, Msymbol, sym);
3032       mplist_add (param, Mtype, Mcharset);
3033       mplist_add (param, Mcharsets, charsets);
3034       mplist_put (coding_definition_list, name, param);
3035       M17N_OBJECT_UNREF (charsets);
3036     }
3037 }
3038 
3039 
3040 int
mcoding__load_from_database()3041 mcoding__load_from_database ()
3042 {
3043   MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3044   MPlist *def_list, *plist;
3045   MPlist *definitions = coding_definition_list;
3046   int mdebug_flag = MDEBUG_CODING;
3047 
3048   if (! mdb)
3049     return 0;
3050   MDEBUG_PUSH_TIME ();
3051   def_list = (MPlist *) mdatabase_load (mdb);
3052   MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to load the data."));
3053   MDEBUG_POP_TIME ();
3054   if (! def_list)
3055     return -1;
3056 
3057   MDEBUG_PUSH_TIME ();
3058   MPLIST_DO (plist, def_list)
3059     {
3060       MPlist *pl, *aliases;
3061       MSymbol name, canonicalized;
3062 
3063       if (! MPLIST_PLIST_P (plist))
3064 	MERROR (MERROR_CHARSET, -1);
3065       pl = MPLIST_PLIST (plist);
3066       if (! MPLIST_SYMBOL_P (pl))
3067 	MERROR (MERROR_CHARSET, -1);
3068       name = MPLIST_SYMBOL (pl);
3069       canonicalized = msymbol__canonicalize (name);
3070       pl = mplist__from_plist (MPLIST_NEXT (pl));
3071       mplist_push (pl, Msymbol, name);
3072       definitions = mplist_add (definitions, canonicalized, pl);
3073       aliases = mplist_get (pl, Maliases);
3074       if (aliases)
3075 	MPLIST_DO (aliases, aliases)
3076 	  if (MPLIST_SYMBOL_P (aliases))
3077 	    {
3078 	      name = MPLIST_SYMBOL (aliases);
3079 	      canonicalized = msymbol__canonicalize (name);
3080 	      definitions = mplist_add (definitions, canonicalized, pl);
3081 	      M17N_OBJECT_REF (pl);
3082 	    }
3083     }
3084 
3085   M17N_OBJECT_UNREF (def_list);
3086   MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to parse the loaded data."));
3087   MDEBUG_POP_TIME ();
3088   return 0;
3089 }
3090 
3091 /*** @} */
3092 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3093 
3094 
3095 /* External API */
3096 
3097 /*** @addtogroup m17nConv */
3098 /*** @{ */
3099 /*=*/
3100 
3101 /***en @name Variables: Symbols representing coding systems */
3102 /***ja @name �ѿ�: ����Ѥߥ����ɷϤ���ꤹ�뤿��Υ���ܥ� */
3103 /*** @{ */
3104 /*=*/
3105 
3106 /***en
3107     @brief Symbol for the coding system US-ASCII.
3108 
3109     The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3110     represents a coding system for the CES US-ASCII.  */
3111 
3112 /***ja
3113     @brief US-ASCII �����ɷϤΥ���ܥ�.
3114 
3115     ����ܥ� #Mcoding_us_ascii �� <tt>"us-ascii"</tt> �Ȥ���̾���������
3116     CES US-ASCII �ѤΥ����ɷϤ�����
3117     */
3118 MSymbol Mcoding_us_ascii;
3119 /*=*/
3120 
3121 /***en
3122     @brief Symbol for the coding system ISO-8859-1.
3123 
3124     The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3125     represents a coding system for the CES ISO-8859-1.  */
3126 
3127 /***ja
3128     @brief ISO-8859-1 �����ɷϤΥ���ܥ�.
3129 
3130     ����ܥ� #Mcoding_iso_8859_1 �� <tt>"iso-8859-1"</tt>
3131     �Ȥ���̾���������CES ISO-8859-1 �ѤΥ����ɷϤ�����  */
3132 
3133 MSymbol Mcoding_iso_8859_1;
3134 /*=*/
3135 
3136 /***en
3137     @brief Symbol for the coding system UTF-8.
3138 
3139     The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3140     a coding system for the CES UTF-8.  */
3141 
3142 /***ja
3143     @brief UTF-8 �����ɷϤΥ���ܥ�.
3144 
3145     ����ܥ� #Mcoding_utf_8 �� <tt>"utf-8"</tt> �Ȥ���̾���������CES
3146     UTF-8 �ѤΥ����ɷϤ�����
3147      */
3148 
3149 MSymbol Mcoding_utf_8;
3150 /*=*/
3151 
3152 /***en
3153     @brief Symbol for the coding system UTF-8-FULL.
3154 
3155     The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3156     represents a coding system that is a extension of UTF-8.  This
3157     coding system uses the same encoding algorithm as UTF-8 but is not
3158     limited to the Unicode characters.  It can encode all characters
3159     supported by the m17n library.  */
3160 
3161 /***ja
3162     @brief UTF-8-FULL �����ɷϤΥ���ܥ�.
3163 
3164     ����ܥ� #Mcoding_utf_8_full �� <tt>"utf-8-full"</tt>
3165     �Ȥ���̾���������<tt>"UTF-8"</tt> �γ�ĥ�Ǥ��륳���ɷϤ�����
3166     ���Υ����ɷϤ� UTF-8 ��Ʊ���������ǥ������르�ꥺ����Ѥ��뤬���оݤ�
3167     Unicode ʸ���ˤϸ��ꤵ��ʤ���
3168     �ޤ�m17n �饤�֥�꤬�������Ƥ�ʸ���������ɤ��뤳�Ȥ��Ǥ��롣
3169     */
3170 
3171 MSymbol Mcoding_utf_8_full;
3172 /*=*/
3173 
3174 /***en
3175     @brief Symbol for the coding system UTF-16.
3176 
3177     The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3178     represents a coding system for the CES UTF-16 (RFC 2279).  */
3179 /***ja
3180     @brief UTF-16 �����ɷϤΥ���ܥ�.
3181 
3182     ����ܥ� #Mcoding_utf_16 �� <tt>"utf-16"</tt> �Ȥ���̾���������
3183     CES UTF-16 (RFC 2279) �ѤΥ����ɷϤ�����
3184      */
3185 
3186 MSymbol Mcoding_utf_16;
3187 /*=*/
3188 
3189 /***en
3190     @brief Symbol for the coding system UTF-16BE.
3191 
3192     The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3193     represents a coding system for the CES UTF-16BE (RFC 2279).  */
3194 
3195 /***ja
3196     @brief UTF-16BE �����ɷϤΥ���ܥ�.
3197 
3198     ����ܥ� #Mcoding_utf_16be �� <tt>"utf-16be"</tt> �Ȥ���̾���������
3199     CES UTF-16BE (RFC 2279) �ѤΥ����ɷϤ�����     */
3200 
3201 MSymbol Mcoding_utf_16be;
3202 /*=*/
3203 
3204 /***en
3205     @brief Symbol for the coding system UTF-16LE.
3206 
3207     The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3208     represents a coding system for the CES UTF-16LE (RFC 2279).  */
3209 
3210 /***ja
3211     @brief UTF-16LE �����ɷϤΥ���ܥ�.
3212 
3213     ����ܥ� #Mcoding_utf_16le �� <tt>"utf-16le"</tt> �Ȥ���̾���������
3214     CES UTF-16LE (RFC 2279) �ѤΥ����ɷϤ�����     */
3215 
3216 MSymbol Mcoding_utf_16le;
3217 /*=*/
3218 
3219 /***en
3220     @brief Symbol for the coding system UTF-32.
3221 
3222     The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3223     represents a coding system for the CES UTF-32 (RFC 2279).  */
3224 
3225 /***ja
3226     @brief UTF-32 �����ɷϤΥ���ܥ�.
3227 
3228     ����ܥ� #Mcoding_utf_32 �� <tt>"utf-32"</tt> �Ȥ���̾���������
3229     CES UTF-32 (RFC 2279) �ѤΥ����ɷϤ�����     */
3230 
3231 MSymbol Mcoding_utf_32;
3232 /*=*/
3233 
3234 /***en
3235     @brief Symbol for the coding system UTF-32BE.
3236 
3237     The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3238     represents a coding system for the CES UTF-32BE (RFC 2279).  */
3239 /***ja
3240     @brief UTF-32BE �����ɷϤΥ���ܥ�.
3241 
3242     ����ܥ� #Mcoding_utf_32be �� <tt>"utf-32be"</tt> �Ȥ���̾���������
3243     CES UTF-32BE (RFC 2279) �ѤΥ����ɷϤ�����     */
3244 
3245 MSymbol Mcoding_utf_32be;
3246 /*=*/
3247 
3248 /***en
3249     @brief Symbol for the coding system UTF-32LE.
3250 
3251     The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3252     represents a coding system for the CES UTF-32LE (RFC 2279).  */
3253 /***ja
3254     @brief UTF-32LE �����ɷϤΥ���ܥ�.
3255 
3256     ����ܥ� #Mcoding_utf_32le �� <tt>"utf-32le"</tt> �Ȥ���̾���������
3257     CES UTF-32LE (RFC 2279) �ѤΥ����ɷϤ�����     */
3258 
3259 MSymbol Mcoding_utf_32le;
3260 /*=*/
3261 
3262 /***en
3263     @brief Symbol for the coding system SJIS.
3264 
3265     The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3266     system for the CES Shift-JIS.  */
3267 /***ja
3268     @brief SJIS �����ɷϤΥ���ܥ�.
3269 
3270     ����ܥ� #Mcoding_sjis has �� <tt>"sjis"</tt> �Ȥ���̾���������
3271     CES Shift-JIS�ѤΥ����ɷϤ�����  */
3272 
3273 MSymbol Mcoding_sjis;
3274 /*** @} */
3275 /*=*/
3276 
3277 /***en
3278     @name Variables: Parameter keys for mconv_define_coding ().  */
3279 /***ja
3280     @name �ѿ�:  mconv_define_coding () �ѥѥ�᡼������  */
3281 /*** @{ */
3282 /*=*/
3283 
3284 /***en
3285     Parameter key for mconv_define_coding () (which see). */
3286 /***ja
3287     mconv_define_coding () �ѥѥ�᡼������ (�ܺ٤� mconv_define_coding ()����). */
3288 MSymbol Mtype;
3289 MSymbol Mcharsets;
3290 MSymbol Mflags;
3291 MSymbol Mdesignation;
3292 MSymbol Minvocation;
3293 MSymbol Mcode_unit;
3294 MSymbol Mbom;
3295 MSymbol Mlittle_endian;
3296 /*** @} */
3297 /*=*/
3298 
3299 /***en
3300     @name Variables: Symbols representing coding system types.  */
3301 /***ja
3302     @name �ѿ��� �����ɷϤΥ����פ�������ܥ�.  */
3303 /*** @{ */
3304 /*=*/
3305 
3306 /***en
3307     Symbol that can be a value of the #Mtype parameter of a coding
3308     system used in an argument to the mconv_define_coding () function
3309     (which see).  */
3310 /***ja
3311     �ؿ� mconv_define_coding () �ΰ����Ȥ����Ѥ����륳���ɷϤΥѥ�᡼��
3312     #Mtype ���ͤȤʤ����륷��ܥ롣(�ܺ٤�
3313     mconv_define_coding ()����)��  */
3314 
3315 MSymbol Mutf;
3316 /*=*/
3317 MSymbol Miso_2022;
3318 /*=*/
3319 /*** @} */
3320 /*=*/
3321 
3322 /***en
3323     @name Variables: Symbols appearing in the value of Mflags parameter.  */
3324 /***ja
3325     @name �ѿ��� �ѥ�᡼�� Mflags ���ͤȤʤ����륷��ܥ�.  */
3326 /*** @{ */
3327 /***en
3328     Symbols that can be a value of the @b Mflags parameter of a coding
3329     system used in an argument to the mconv_define_coding () function
3330     (which see).  */
3331 /***ja
3332     �ؿ� mconv_define_coding () �ΰ����Ȥ����Ѥ����륳���ɷϤΥѥ�᡼��
3333     @b Mflags ���ͤȤʤ����륷��ܥ롣(�ܺ٤�
3334     mconv_define_coding ()����)��  */
3335 MSymbol Mreset_at_eol;
3336 MSymbol Mreset_at_cntl;
3337 MSymbol Meight_bit;
3338 MSymbol Mlong_form;
3339 MSymbol Mdesignation_g0;
3340 MSymbol Mdesignation_g1;
3341 MSymbol Mdesignation_ctext;
3342 MSymbol Mdesignation_ctext_ext;
3343 MSymbol Mlocking_shift;
3344 MSymbol Msingle_shift;
3345 MSymbol Msingle_shift_7;
3346 MSymbol Meuc_tw_shift;
3347 MSymbol Miso_6429;
3348 MSymbol Mrevision_number;
3349 MSymbol Mfull_support;
3350 /*** @} */
3351 /*=*/
3352 
3353 /***en
3354     @name Variables: Others
3355 
3356     Remaining variables.  */
3357 /***ja @name �ѿ�: ����¾
3358 
3359     �ۤ����ѿ��� */
3360 /*** @{ */
3361 /*=*/
3362 /***en
3363     @brief Symbol whose name is "maybe".
3364 
3365     The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>.  It is
3366     used a value of @b Mbom parameter of the function
3367     mconv_define_coding () (which see).  */
3368 /***ja
3369     @brief "maybe"�Ȥ���̾������ĥ���ܥ�.
3370 
3371     �ѿ� #Mmaybe �� <tt>"maybe"</tt> �Ȥ���̾������ġ�����ϴؿ�
3372     mconv_define_coding () �ѥ�᡼�� @b Mbom ���ͤȤ����Ѥ����롣
3373     (�ܺ٤� mconv_define_coding () ����)�� */
3374 
3375 MSymbol Mmaybe;
3376 /*=*/
3377 
3378 /***en
3379     @brief The symbol @c Mcoding.
3380 
3381     Any decoded M-text has a text property whose key is the predefined
3382     symbol @c Mcoding.  The name of @c Mcoding is
3383     <tt>"coding"</tt>.  */
3384 
3385 /***ja
3386     @brief ����ܥ� @c Mcoding.
3387 
3388     �ǥ����ɤ��줿 M-text �Ϥ��٤ơ�����������Ѥߥ���ܥ� @c Mcoding
3389     �Ǥ���褦�ʥƥ����ȥץ�ѥƥ�����ġ�����ܥ� @c Mcoding ��
3390     <tt>"coding"</tt> �Ȥ���̾������ġ�  */
3391 
3392 MSymbol Mcoding;
3393 /*=*/
3394 /*** @} */
3395 
3396 /***en
3397     @brief Define a coding system.
3398 
3399     The mconv_define_coding () function defines a new coding system
3400     and makes it accessible via a symbol whose name is $NAME.  $PLIST
3401     specifies parameters of the coding system as below:
3402 
3403     <ul>
3404 
3405     <li> Key is @c Mtype, value is a symbol
3406 
3407     The value specifies the type of the coding system.  It must be
3408     @b Mcharset, @b Mutf, @b Miso_2022, or @b Mnil.
3409 
3410     If the type is @b Mcharset, $EXTRA_INFO is ignored.
3411 
3412     If the type is @b Mutf, $EXTRA_INFO must be a pointer to
3413     #MCodingInfoUTF.
3414 
3415     If the type is @b Miso_2022, $EXTRA_INFO must be a pointer to
3416     #MCodingInfoISO2022.
3417 
3418     If the type is #Mnil, the argument $RESETTER, $DECODER, and
3419     $ENCODER must be supplied.  $EXTRA_INFO is ignored.  Otherwise,
3420     they can be @c NULL and the m17n library provides proper defaults.
3421 
3422     <li> Key is @b Mcharsets, value is a plist
3423 
3424     The value specifies a list charsets supported by the coding
3425     system.  The keys of the plist must be #Msymbol, and the values
3426     must be symbols representing charsets.
3427 
3428     <li> Key is @b Mflags, value is a plist
3429 
3430     If the type is @b Miso_2022, the values specifies flags to control
3431     the ISO 2022 interpreter.  The keys of the plist must e #Msymbol,
3432     and values must be one of the following.
3433 
3434     <ul>
3435 
3436     <li> @b Mreset_at_eol
3437 
3438     If this flag exists, designation and invocation status is reset to
3439     the initial state at the end of line.
3440 
3441     <li> @b Mreset_at_cntl
3442 
3443     If this flag exists, designation and invocation status is reset to
3444     the initial state at a control character.
3445 
3446     <li> @b Meight_bit
3447 
3448     If this flag exists, the graphic plane right is used.
3449 
3450     <li> @b Mlong_form
3451 
3452     If this flag exists, the over-long escape sequences (ESC '$' '('
3453     \<final_byte\>) are used for designating the CCS JISX0208.1978,
3454     GB2312, and JISX0208.
3455 
3456     <li> @b Mdesignation_g0
3457 
3458     If this flag and @b Mfull_support exists, designates charsets not
3459     listed in the charset list to the graphic register G0.
3460 
3461     <li> @b Mdesignation_g1
3462 
3463     If this flag and @b Mfull_support exists, designates charsets not
3464     listed in the charset list to the graphic register G1.
3465 
3466     <li> @b Mdesignation_ctext
3467 
3468     If this flag and @b Mfull_support exists, designates charsets not
3469     listed in the charset list to a graphic register G0 or G1 based on
3470     the criteria of the Compound Text.
3471 
3472     <li> @b Mdesignation_ctext_ext
3473 
3474     If this flag and @b Mfull_support exists, designates charsets not
3475     listed in the charset list to a graphic register G0 or G1, or use
3476     extended segment for such charsets based on the criteria of the
3477     Compound Text.
3478 
3479     <li> @b Mlocking_shift
3480 
3481     If this flag exists, use locking shift.
3482 
3483     <li> @b Msingle_shift
3484 
3485     If this flag exists, use single shift.
3486 
3487     <li> @b Msingle_shift_7
3488 
3489     If this flag exists, use 7-bit single shift code (0x19).
3490 
3491     <li> @b Meuc_tw_shift
3492 
3493     If this flag exists, use a special shifting according to EUC-TW.
3494 
3495     <li> @b Miso_6429
3496 
3497     This flag is currently ignored.
3498 
3499     <li> @b Mrevision_number
3500 
3501     If this flag exists, use a revision number escape sequence to
3502     designate a charset that has a revision number.
3503 
3504     <li> @b Mfull_support
3505 
3506     If this flag exists, support all charsets registered in the
3507     International Registry.
3508 
3509     </ul>
3510 
3511     <li> Key is @b Mdesignation, value is a plist
3512 
3513     If the type is @b Miso_2022, the value specifies how to designate
3514     each supported characters.  The keys of the plist must be
3515     #Minteger, and the values must be numbers indicating a graphic
3516     registers.  The Nth element value is for the Nth charset of the
3517     charset list.  The value 0..3 means that it is assumed that a
3518     charset is already designated to the graphic register 0..3.  The
3519     negative value G (-4..-1) means that a charset is not designated
3520     to any register at first, and if necessary, is designated to the
3521     (G+4) graphic register.
3522 
3523     <li> Key is @b Minvocation, value is a plist
3524 
3525     If the type is @b Miso_2022, the value specifies how to invocate
3526     each graphic registers.  The plist length must be one or two.  The
3527     keys of the plist must be #Minteger, and the values must be
3528     numbers indicating a graphic register.  The value of the first
3529     element specifies which graphic register is invocated to the
3530     graphic plane left.  If the length is one, no graphic register is
3531     invocated to the graphic plane right.  Otherwise, the value of the
3532     second element specifies which graphic register is invocated to
3533     the graphic plane right.
3534 
3535     <li> Key is @b Mcode_unit, value is an integer
3536 
3537     If the type is @b Mutf, the value specifies the bit length of a
3538     code-unit.  It must be 8, 16, or 32.
3539 
3540     <li> Key is @b Mbom, value is a symbol
3541 
3542     If the type is @b Mutf and the code-unit bit length is 16 or 32,
3543     it specifies whether or not to use BOM (Byte Order Mark).  If the
3544     value is #Mnil (default), BOM is not used, else if the value is
3545     #Mmaybe, the existence of BOM is detected at decoding time, else
3546     BOM is used.
3547 
3548     <li> Key is @b Mlittle_endian, value is a symbol
3549 
3550     If the type is @b Mutf and the code-unit bit length is 16 or 32,
3551     it specifies whether or not the encoding is little endian.  If the
3552     value is #Mnil (default), it is big endian, else it is little
3553     endian.
3554 
3555     </ul>
3556 
3557     $RESETTER is a pointer to a function that resets a converter for
3558     the coding system to the initial status.  The pointed function is
3559     called with one argument, a pointer to a converter object.
3560 
3561     $DECODER is a pointer to a function that decodes a byte sequence
3562     according to the coding system.  The pointed function is called
3563     with four arguments:
3564 
3565 	@li A pointer to the byte sequence to decode.
3566 	@li The number of bytes to decode.
3567 	@li A pointer to an M-text to which the decoded characters are appended.
3568 	@li A pointer to a converter object.
3569 
3570     $DECODER must return 0 if it succeeds.  Otherwise it must return -1.
3571 
3572     $ENCODER is a pointer to a function that encodes an M-text
3573     according to the coding system.  The pointed function is called
3574     with six arguments:
3575 
3576 	@li A pointer to the M-text to encode.
3577 	@li The starting position of the encoding.
3578 	@li The ending position of the encoding.
3579 	@li A pointer to a memory area where the produced bytes are stored.
3580 	@li The size of the memory area.
3581 	@li A pointer to a converter object.
3582 
3583     $ENCODER must return 0 if it succeeds.  Otherwise it must return -1.
3584 
3585     $EXTRA_INFO is a pointer to a data structure that contains extra
3586     information about the coding system.  The type of the data
3587     structure depends on $TYPE.
3588 
3589     @return
3590 
3591     If the operation was successful, mconv_define_coding () returns a
3592     symbol whose name is $NAME.  If an error is detected, it returns
3593     #Mnil and assigns an error code to the external variable #merror_code.  */
3594 
3595 /***ja
3596     @brief �����ɷϤ��������.
3597 
3598     �ؿ� mconv_define_coding () �ϡ������������ɷϤ�������������
3599     $NAME �Ȥ���̾���Υ���ܥ��ͳ�ǥ��������Ǥ���褦�ˤ��롣 $PLIST
3600     �Ǥ�������륳���ɷϤΥѥ�᡼����ʲ��Τ褦�˻��ꤹ�롣
3601 
3602     <ul>
3603 
3604     <li> ������ @c Mtype ���ͤ�����ܥ�λ�
3605 
3606     �ͤϥ����ɷϤΥ����פ�ɽ����@b Mcharset, @b Mutf, @b Miso_2022, #Mnil
3607     �Τ����줫�Ǥʤ��ƤϤʤ�ʤ���
3608 
3609     �����פ� @b Mcharset �ʤ�� $EXTRA_INFO ��̵�뤵��롣
3610 
3611     �����פ� @b Mutf �ʤ�� $EXTRA_INFO �� #MCodingInfoUTF
3612     �ؤΥݥ����Ǥʤ��ƤϤʤ�ʤ���
3613 
3614     �����פ� @b Miso_2022�ʤ�� $EXTRA_INFO �� #MCodingInfoISO2022
3615     �ؤΥݥ����Ǥʤ��ƤϤʤ�ʤ���
3616 
3617     �����פ� #Mnil �ʤ�С����� $RESETTER, $DECODER, $ENCODER
3618     ��Ϳ���ʤ��ƤϤʤ�ʤ���$EXTRA_INFO ��̵�뤵��롣
3619     ����ʳ��ξ��ˤϤ����� @c NULL �Ǥ褯��
3620     m17n �饤�֥�꤬Ŭ�ڤʥǥե�����ͤ�Ϳ���롣
3621 
3622     <li> ������ @b Mcharsets ���ͤ� plist �λ�
3623 
3624     �ͤϤ��Υ����ɷϤǥ��ݡ��Ȥ����ʸ�����åȤΥꥹ�ȤǤ��롣plist�Υ�����
3625     #Msymbol���ͤ�ʸ�����åȤ�������ܥ�Ǥʤ��ƤϤʤ�ʤ���
3626 
3627     <li> ������ @b Mflags �ͤ� plist �λ�
3628 
3629     �����פ� @b Miso_2022 �ʤ�С������ͤ�, ISO 2022
3630     �����ץ꥿�Ѥ�����ե�å�������plist �Υ����� #Msymbol
3631     �Ǥ��ꡢ�ͤϰʲ��Τ����줫�Ǥ��롣
3632 
3633     <ul>
3634 
3635     <li> @b Mreset_at_eol
3636 
3637     ���Υե饰������С��޷�ʸ������λؼ���ƽФϹ����ǥꥻ�åȤ��������ξ��֤���롣
3638 
3639     <li> @b Mreset_at_cntl
3640 
3641     ���Υե饰������С��޷�ʸ������λؼ���ƽФ�����ʸ���˽в�ä������ǥꥻ�åȤ��������ξ��֤���롣
3642 
3643     <li> @b Meight_bit
3644 
3645     ���Υե饰������С��޷�ʸ������α�Ⱦ�̤��Ѥ����롣
3646 
3647     <li> @b Mlong_form
3648 
3649     ���Υե饰������С�ʸ������ JISX0208.1978, GB2312, JISX0208
3650     ��ؼ�����ݤ� over-long ���������ץ������� (ESC '$' '('
3651     \<final_byte\>) ���Ѥ����롣
3652 
3653     <li> @b Mdesignation_g0
3654 
3655     ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3656     G0 ����˻ؼ����롣
3657 
3658     <li> @b Mdesignation_g1
3659 
3660     ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3661     G1 ����˻ؼ����롣
3662 
3663     <li> @b Mdesignation_ctext
3664 
3665     ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3666     G0 ����ޤ��� G1 ����ˡ�����ѥ���ɥƥ����Ȥδ��ˤ��äƻؼ����롣
3667 
3668     <li> @b Mdesignation_ctext_ext
3669 
3670     ���Υե饰�� @b Mfull_support ������С�ʸ�����åȥꥹ�Ȥ˸����ʤ�ʸ�����åȤ�
3671     G0 ����ޤ��� G1 ����ˡ����뤤�ϳ�ĥ�������Ȥ˥���ѥ���ɥƥ����Ȥδ��ˤ��äƻؼ����롣
3672 
3673     <li> @b Mlocking_shift
3674 
3675     ���Υե饰������С���å������եȤ��Ѥ��롣
3676 
3677     <li> @b Msingle_shift
3678 
3679     ���Υե饰������С������륷�եȤ��Ѥ��롣
3680 
3681     <li> @b Msingle_shift_7
3682 
3683     ���Υե饰������С�7-bit �����륷�եȥ����� (0x19) ���Ѥ��롣
3684 
3685     <li> @b Meuc_tw_shift
3686 
3687     ���Υե饰������С�EUC-TW �˱�ä����̤ʥ��եȤ��Ѥ��롣
3688 
3689     <li> @b Miso_6429
3690 
3691     �������Ǥ��Ѥ����Ƥ��ʤ���
3692 
3693     <li> @b Mrevision_number
3694 
3695     ���Υե饰������С�revision number �����ʸ�����åȤ�ؼ�����ݤ�
3696     revision number ���������ץ������������Ѥ��롣
3697 
3698     <li> @b Mfull_support
3699 
3700     ���Υե饰������С�the International Registry
3701     ����Ͽ����Ƥ�����ʸ�����åȤ��ݡ��Ȥ��롣
3702 
3703     </ul>
3704 
3705     <li> ������ @b Mdesignation ���ͤ� plist �λ�
3706 
3707     �����פ� @b Miso_2022 �ʤ�С��ͤϳ�ʸ����ɤΤ褦�˻ؼ����뤫������
3708     plist �Υ����� #Minteger���ͤϽ����graphic register��
3709     ���������Ǥ��롣N���ܤ����Ǥ��ͤϡ�ʸ�����åȥꥹ�Ȥ� N
3710     ���ܤ�ʸ�����åȤ��б����롣�ͤ� 0..3 �Ǥ���С�ʸ�����åȤ����Ǥ�
3711     G0..G3 �˻ؼ� ����Ƥ��롣
3712 
3713     �ͤ���(-4..-1) �Ǥ���С�������֤Ǥ�ʸ�����åȤ��ɤ��ˤ�ؼ�����Ƥ��ʤ����ȡ�ɬ�פʺݤˤ�
3714     G0..G3 �Τ��줾��˻ؼ����뤳�Ȥ��̣���롣
3715 
3716     <li> ������ @b Minvocation ���ͤ� plist �λ�
3717 
3718     �����פ� @b Miso_2022 �ʤ�С��ͤϳƽ����ɤΤ褦�˸ƤӽФ���������
3719     plist ��Ĺ���� 1 �ʤ��� 2 �Ǥ��롣plist �Υ�����
3720     #Minteger���ͤϽ����graphic register)���������Ǥ��롣
3721     �ǽ�����Ǥ��ͤ��޷�ʸ�����纸Ⱦ�̤˸ƤӽФ���뽸�������
3722     plist ��Ĺ���� 1 �ʤ�С���Ⱦ�̤ˤϲ���ƤӽФ���ʤ���
3723     �����Ǥ���С����Ĥ�����Ǥ��ͤ��޷�ʸ�����籦Ⱦ�̤˸ƤӽФ���뽸�������
3724 
3725     <li> ������ @b Mcode_unit ���ͤ������ͤλ�
3726 
3727     �����פ� @b Mutf �ʤ�С��ͤϥ����ɥ�˥åȤΥӥå�Ĺ�Ǥ��ꡢ8, 16,
3728     32 �Τ����줫�Ǥ��롣
3729 
3730     <li> ������ @b Mbom ���ͤ�����ܥ�λ�
3731 
3732     �����פ� @b Mutf �ǥ����ɥ�˥åȤΥӥå�Ĺ�� 16 �� 32�ʤ�С��ͤ�
3733     BOM (Byte Order Mark) ����Ѥ��뤫�ɤ����������ͤ��ǥե�����ͤ�
3734     #Mnil �ʤ�С����Ѥ��ʤ����ͤ� #Mmaybe �ʤ�Хǥ����ɻ��� BOM
3735     �����뤫�ɤ�����Ĵ�٤롣����ʳ��ʤ�л��Ѥ��롣
3736 
3737     <li> ������ @b Mlittle_endian ���ͤ�����ܥ�λ�
3738 
3739     �����פ� @b Mutf �ǥ����ɥ�˥åȤΥӥå�Ĺ�� 16 �� 32
3740     �ʤ�С��ͤϥ������ɤ� little endian ���ɤ����������ͤ��ǥե�����ͤ�
3741     #Mnil �ʤ�� big endian �Ǥ��ꡢ�����Ǥʤ���� little endian �Ǥ��롣
3742 
3743     </ul>
3744 
3745     $RESETTER
3746     �Ϥ��Υ����ɷ��ѤΥ���С����������֤˥ꥻ�åȤ���ؿ��ؤΥݥ����Ǥ��롣
3747     ���δؿ��ϥ���С������֥������ȤؤΥݥ����Ȥ�����������Ȥ롣
3748 
3749     $DECODER �ϥХ�������Υ����ɷϤ˽��äƥǥ����ɤ���ؿ��ؤΥݥ����Ǥ��롣
3750     ���δؿ��ϰʲ��Σ�������Ȥ롣
3751 
3752 	@li �ǥ����ɤ���Х�����ؤΥݥ���
3753 	@li �ǥ����ɤ��٤��Х��ȿ�
3754 	@li �ǥ����ɷ�̤�ʸ�����ղä��� M-text �ؤΥݥ���
3755 	@li ����С������֥������ȤؤΥݥ���
3756 
3757     $DECODER �����������Ȥ��ˤ� 0 �����Ԥ����Ȥ��ˤ� -1
3758     ���֤��ʤ��ƤϤʤ�ʤ���
3759 
3760     $ENCODER �� M-text ���Υ����ɷϤ˽��äƥ������ɤ���ؿ��ؤΥݥ����Ǥ��롣
3761     ���δؿ��ϰʲ��Σ�������Ȥ롣
3762 
3763         @li �������ɤ���M-text �ؤΥݥ���
3764         @li M-text �Υ������ɳ��ϰ���
3765         @li M-text �Υ������ɽ�λ����
3766         @li ���������Х��Ȥ��ݻ���������ΰ�ؤΥݥ���
3767         @li �����ΰ�Υ�����
3768 	@li ����С������֥������ȤؤΥݥ���
3769 
3770     $ENCODER �����������Ȥ��ˤ� 0 �����Ԥ����Ȥ��ˤ� -1
3771     ���֤��ʤ��ƤϤʤ�ʤ���
3772 
3773     $EXTRA_INFO �ϥ����ǥ��������ƥ�˴ؤ����ɲþ����ޤ�ǡ�����¤�ؤΥݥ����Ǥ��롣
3774     ���Υǡ�����¤�η� $TYPE �˰�¸���롣
3775 
3776     @return
3777 
3778     ��������������� mconv_define_coding () �� $NAME
3779     �Ȥ���̾���Υ���ܥ���֤��� ���顼�����Ф��줿���� #Mnil
3780     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
3781       */
3782 
3783 /***
3784     @errors
3785     @c MERROR_CODING  */
3786 
3787 MSymbol
mconv_define_coding(const char * name,MPlist * plist,int (* resetter)(MConverter *),int (* decoder)(const unsigned char *,int,MText *,MConverter *),int (* encoder)(MText *,int,int,unsigned char *,int,MConverter *),void * extra_info)3788 mconv_define_coding (const char *name, MPlist *plist,
3789 		     int (*resetter) (MConverter *),
3790 		     int (*decoder) (const unsigned char *, int, MText *,
3791 				     MConverter *),
3792 		     int (*encoder) (MText *, int, int,
3793 				     unsigned char *, int,
3794 				     MConverter *),
3795 		     void *extra_info)
3796 {
3797   MSymbol sym = msymbol (name);
3798   int i;
3799   MCodingSystem *coding;
3800   MPlist *pl;
3801 
3802   MSTRUCT_MALLOC (coding, MERROR_CODING);
3803   coding->name = sym;
3804   if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3805     coding->type = Mcharset;
3806   pl = (MPlist *) mplist_get (plist, Mcharsets);
3807   if (! pl)
3808     MERROR (MERROR_CODING, Mnil);
3809   coding->ncharsets = mplist_length (pl);
3810   if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3811     coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3812   for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3813     {
3814       MSymbol charset_name;
3815 
3816       if (MPLIST_KEY (pl) != Msymbol)
3817 	MERROR (MERROR_CODING, Mnil);
3818       charset_name = MPLIST_SYMBOL (pl);
3819       if (! (coding->charsets[i] = MCHARSET (charset_name)))
3820 	MERROR (MERROR_CODING, Mnil);
3821     }
3822 
3823   coding->resetter = resetter;
3824   coding->decoder = decoder;
3825   coding->encoder = encoder;
3826   coding->ascii_compatible = 0;
3827   coding->extra_info = extra_info;
3828   coding->extra_spec = NULL;
3829   coding->ready = 0;
3830 
3831   if (coding->type == Mcharset)
3832     {
3833       if (! coding->resetter)
3834 	coding->resetter = reset_coding_charset;
3835       if (! coding->decoder)
3836 	coding->decoder = decode_coding_charset;
3837       if (! coding->encoder)
3838 	coding->encoder = encode_coding_charset;
3839     }
3840   else if (coding->type == Mutf)
3841     {
3842       MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3843       MSymbol val;
3844 
3845       if (! coding->resetter)
3846 	coding->resetter = reset_coding_utf;
3847 
3848       info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3849       if (info->code_unit_bits == 8)
3850 	{
3851 	  if (! coding->decoder)
3852 	    coding->decoder = decode_coding_utf_8;
3853 	  if (! coding->encoder)
3854 	    coding->encoder = encode_coding_utf_8;
3855 	}
3856       else if (info->code_unit_bits == 16)
3857 	{
3858 	  if (! coding->decoder)
3859 	    coding->decoder = decode_coding_utf_16;
3860 	  if (! coding->encoder)
3861 	    coding->encoder = encode_coding_utf_16;
3862 	}
3863       else if (info->code_unit_bits == 32)
3864 	{
3865 	  if (! coding->decoder)
3866 	    coding->decoder = decode_coding_utf_32;
3867 	  if (! coding->encoder)
3868 	    coding->encoder = encode_coding_utf_32;
3869 	}
3870       else
3871 	MERROR (MERROR_CODING, Mnil);
3872       val = (MSymbol) mplist_get (plist, Mbom);
3873       if (val == Mnil)
3874 	info->bom = 1;
3875       else if (val == Mmaybe)
3876 	info->bom = 0;
3877       else
3878 	info->bom = 2;
3879 
3880       info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3881       coding->extra_info = info;
3882     }
3883   else if (coding->type == Miso_2022)
3884     {
3885       MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3886 
3887       if (! coding->resetter)
3888 	coding->resetter = reset_coding_iso_2022;
3889       if (! coding->decoder)
3890 	coding->decoder = decode_coding_iso_2022;
3891       if (! coding->encoder)
3892 	coding->encoder = encode_coding_iso_2022;
3893 
3894       info->initial_invocation[0] = 0;
3895       info->initial_invocation[1] = -1;
3896       pl = (MPlist *) mplist_get (plist, Minvocation);
3897       if (pl)
3898 	{
3899 	  if (MPLIST_KEY (pl) != Minteger)
3900 	    MERROR (MERROR_CODING, Mnil);
3901 	  info->initial_invocation[0] = MPLIST_INTEGER (pl);
3902 	  if (! MPLIST_TAIL_P (pl))
3903 	    {
3904 	      pl = MPLIST_NEXT (pl);
3905 	      if (MPLIST_KEY (pl) != Minteger)
3906 		MERROR (MERROR_CODING, Mnil);
3907 	      info->initial_invocation[1] = MPLIST_INTEGER (pl);
3908 	    }
3909 	}
3910       memset (info->designations, 0, sizeof (info->designations));
3911       for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3912 	   i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3913 	   i++, pl = MPLIST_NEXT (pl))
3914 	info->designations[i] = MPLIST_INTEGER (pl);
3915 
3916       info->flags = 0;
3917       MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3918 	{
3919 	  MSymbol val;
3920 
3921 	  if (MPLIST_KEY (pl) != Msymbol)
3922 	    MERROR (MERROR_CODING, Mnil);
3923 	  val = MPLIST_SYMBOL (pl);
3924 	  if (val == Mreset_at_eol)
3925 	    info->flags |= MCODING_ISO_RESET_AT_EOL;
3926 	  else if (val == Mreset_at_cntl)
3927 	    info->flags |= MCODING_ISO_RESET_AT_CNTL;
3928 	  else if (val == Meight_bit)
3929 	    info->flags |= MCODING_ISO_EIGHT_BIT;
3930 	  else if (val == Mlong_form)
3931 	    info->flags |= MCODING_ISO_LOCKING_SHIFT;
3932 	  else if (val == Mdesignation_g0)
3933 	    info->flags |= MCODING_ISO_DESIGNATION_G0;
3934 	  else if (val == Mdesignation_g1)
3935 	    info->flags |= MCODING_ISO_DESIGNATION_G1;
3936 	  else if (val == Mdesignation_ctext)
3937 	    info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3938 	  else if (val == Mdesignation_ctext_ext)
3939 	    info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3940 	  else if (val == Mlocking_shift)
3941 	    info->flags |= MCODING_ISO_LOCKING_SHIFT;
3942 	  else if (val == Msingle_shift)
3943 	    info->flags |= MCODING_ISO_SINGLE_SHIFT;
3944 	  else if (val == Msingle_shift_7)
3945 	    info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3946 	  else if (val == Meuc_tw_shift)
3947 	    info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3948 	  else if (val == Miso_6429)
3949 	    info->flags |= MCODING_ISO_ISO6429;
3950 	  else if (val == Mrevision_number)
3951 	    info->flags |= MCODING_ISO_REVISION_NUMBER;
3952 	  else if (val == Mfull_support)
3953 	    info->flags |= MCODING_ISO_FULL_SUPPORT;
3954 	}
3955 
3956       coding->extra_info = info;
3957     }
3958   else
3959     {
3960       if (! coding->decoder || ! coding->encoder)
3961 	MERROR (MERROR_CODING, Mnil);
3962       if (! coding->resetter)
3963 	coding->ready = 1;
3964     }
3965 
3966   msymbol_put (sym, Mcoding, coding);
3967   msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3968   plist = (MPlist *) mplist_get (plist, Maliases);
3969   if (plist)
3970     {
3971       MPLIST_DO (pl, plist)
3972 	{
3973 	  MSymbol alias;
3974 
3975 	  if (MPLIST_KEY (pl) != Msymbol)
3976 	    continue;
3977 	  alias = MPLIST_SYMBOL (pl);
3978 	  msymbol_put (alias, Mcoding, coding);
3979 	  msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3980 	}
3981     }
3982 
3983   MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3984 
3985   return sym;
3986 }
3987 
3988 /*=*/
3989 
3990 /***en
3991     @brief Resolve coding system name.
3992 
3993     The mconv_resolve_coding () function returns $SYMBOL if it
3994     represents a coding system.  Otherwise, canonicalize $SYMBOL as to
3995     a coding system name, and if the canonicalized name represents a
3996     coding system, return it.  Otherwise, return #Mnil.  */
3997 /***ja
3998     @brief �����ɷϤ�̾�����褹��.
3999 
4000     �ؿ� mconv_resolve_coding () �� $SYMBOL �������ɷϤ����Ƥ���Ф�����֤���
4001     �����Ǥʤ���Х����ɷϤ�̾���Ȥ��� $SYMBOL
4002     ���������������줬�����ɷϤ�ɽ���Ƥ�������������� $SYMBOL ���֤���
4003     �����Ǥʤ����#Mnil ���֤���  */
4004 
4005 
4006 
4007 MSymbol
mconv_resolve_coding(MSymbol symbol)4008 mconv_resolve_coding (MSymbol symbol)
4009 {
4010   MCodingSystem *coding = find_coding (symbol);
4011 
4012   if (! coding)
4013     {
4014       symbol = msymbol__canonicalize (symbol);
4015       coding = find_coding (symbol);
4016     }
4017   return (coding ? coding->name : Mnil);
4018 }
4019 
4020 /*=*/
4021 
4022 
4023 /***en
4024     @brief List symbols representing coding systems.
4025 
4026     The mconv_list_codings () function makes an array of symbols
4027     representing a coding system, stores the pointer to the array in a
4028     place pointed to by $SYMBOLS, and returns the length of the array.  */
4029 /***ja
4030     @brief �����ɷϤ�ɽ�魯����ܥ�������.
4031 
4032     �ؿ� mchar_list_codings () �ϡ������ɷϤ�������ܥ���¤٤�������ꡢ
4033     $SYMBOLS �ǥݥ���Ȥ��줿���ˤ�������ؤΥݥ������֤��������Ĺ�����֤��� */
4034 
4035 int
mconv_list_codings(MSymbol ** symbols)4036 mconv_list_codings (MSymbol **symbols)
4037 {
4038   int i = coding_list.used + mplist_length (coding_definition_list);
4039   int j;
4040   MPlist *plist;
4041 
4042   MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4043   i = 0;
4044   MPLIST_DO (plist, coding_definition_list)
4045     {
4046       MPlist *pl = MPLIST_VAL (plist);
4047       (*symbols)[i++] = MPLIST_SYMBOL (pl);
4048     }
4049   for (j = 0; j < coding_list.used; j++)
4050     if (! mplist_find_by_key (coding_definition_list,
4051 			      coding_list.codings[j]->name))
4052       (*symbols)[i++] = coding_list.codings[j]->name;
4053   return i;
4054 }
4055 
4056 /*=*/
4057 
4058 /***en
4059     @brief Create a code converter bound to a buffer.
4060 
4061     The mconv_buffer_converter () function creates a pointer to a code
4062     converter for coding system $NAME.  The code converter is bound
4063     to buffer area of $N bytes pointed to by $BUF.  Subsequent
4064     decodings and encodings are done to/from this buffer area.
4065 
4066     $NAME can be #Mnil.  In this case, a coding system associated
4067     with the current locale (LC_CTYPE) is used.
4068 
4069     @return
4070     If the operation was successful, mconv_buffer_converter () returns
4071     the created code converter.  Otherwise it returns @c NULL and
4072     assigns an error code to the external variable #merror_code.  */
4073 
4074 /***ja
4075     @brief �Хåե��˷���դ���줿�����ɥ���С�������.
4076 
4077     �ؿ� mconv_buffer_converter () �ϡ������ɷ� $NAME
4078     �ѤΥ����ɥ���С������롣���Υ����ɥ���С����ϡ�$BUF �Ǽ�������礭�� $N
4079     �Х��ȤΥХåե��ΰ�˷���դ����롣
4080     ����ʹߤΥǥ����ɤ���ӥ������ɤϡ����ΥХåե��ΰ���Ф��ƹԤʤ��롣
4081 
4082     $NAME �� #Mnil �Ǥ��äƤ�褤�����ξ��ϸ��ߤΥ�����
4083     (LC_CTYPE) �˴�Ϣ�դ���줿�����ɷϤ��Ȥ��롣
4084 
4085     @return
4086     �⤷��������������� mconv_buffer_converter () �� �������������ɥ���С������֤���
4087     �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4088     �˥��顼�����ɤ����ꤹ�롣
4089 
4090     @latexonly \IPAlabel{mconverter} @endlatexonly  */
4091 
4092 /***
4093     @errors
4094     @c MERROR_SYMBOL, @c MERROR_CODING
4095 
4096     @seealso
4097     mconv_stream_converter ()  */
4098 
4099 MConverter *
mconv_buffer_converter(MSymbol name,const unsigned char * buf,int n)4100 mconv_buffer_converter (MSymbol name, const unsigned char *buf, int n)
4101 {
4102   MCodingSystem *coding;
4103   MConverter *converter;
4104   MConverterStatus *internal;
4105 
4106   if (name == Mnil)
4107     name = mlocale_get_prop (mlocale__ctype, Mcoding);
4108   coding = find_coding (name);
4109   if (! coding)
4110     MERROR (MERROR_CODING, NULL);
4111   MSTRUCT_CALLOC (converter, MERROR_CODING);
4112   MSTRUCT_CALLOC (internal, MERROR_CODING);
4113   converter->internal_info = internal;
4114   internal->coding = coding;
4115   if (coding->resetter
4116       && (*coding->resetter) (converter) < 0)
4117     {
4118       free (internal);
4119       free (converter);
4120       MERROR (MERROR_CODING, NULL);
4121     }
4122 
4123   internal->unread = mtext ();
4124   internal->work_mt = mtext ();
4125   mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4126   internal->buf.in = buf;
4127   internal->used = 0;
4128   internal->bufsize = n;
4129   internal->binding = BINDING_BUFFER;
4130 
4131   return converter;
4132 }
4133 
4134 /*=*/
4135 
4136 /***en
4137     @brief Create a code converter bound to a stream.
4138 
4139     The mconv_stream_converter () function creates a pointer to a code
4140     converter for coding system $NAME.  The code converter is bound
4141     to stream $FP.  Subsequent decodings and encodings are done
4142     to/from this stream.
4143 
4144     $NAME can be #Mnil.  In this case, a coding system associated
4145     with the current locale (LC_CTYPE) is used.
4146 
4147     @return
4148     If the operation was successful, mconv_stream_converter ()
4149     returns the created code converter.  Otherwise it returns @c NULL
4150     and assigns an error code to the external variable
4151     #merror_code.  */
4152 
4153 /***ja
4154     @brief ���ȥ꡼��˷���դ���줿�����ɥ���С�������.
4155 
4156     �ؿ� mconv_stream_converter () �ϡ������ɷ� $NAME
4157     �ѤΥ����ɥ���С������롣���Υ����ɥ���С����ϡ����ȥ꡼�� $FP
4158     �˷���դ����롣
4159     ����ʹߤΥǥ����ɤ���ӥ������ɤϡ����Υ��ȥ꡼����Ф��ƹԤʤ��롣
4160 
4161     $NAME �� #Mnil �Ǥ��äƤ�褤�����ξ��ϸ��ߤΥ�����
4162     (LC_CTYPE) �˴�Ϣ�դ���줿�����ɷϤ��Ȥ��롣
4163 
4164     @return
4165     �⤷��������������С�mconv_stream_converter ()
4166     �Ϻ������������ɥ���С������֤��������Ǥʤ���� @c NULL
4167     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
4168 
4169     @latexonly \IPAlabel{mconverter} @endlatexonly  */
4170 
4171 /***
4172     @errors
4173     @c MERROR_SYMBOL, @c MERROR_CODING
4174 
4175     @seealso
4176     mconv_buffer_converter ()  */
4177 
4178 MConverter *
mconv_stream_converter(MSymbol name,FILE * fp)4179 mconv_stream_converter (MSymbol name, FILE *fp)
4180 {
4181   MCodingSystem *coding;
4182   MConverter *converter;
4183   MConverterStatus *internal;
4184 
4185   if (name == Mnil)
4186     name = mlocale_get_prop (mlocale__ctype, Mcoding);
4187   coding = find_coding (name);
4188   if (! coding)
4189     MERROR (MERROR_CODING, NULL);
4190   MSTRUCT_CALLOC (converter, MERROR_CODING);
4191   MSTRUCT_CALLOC (internal, MERROR_CODING);
4192   converter->internal_info = internal;
4193   internal->coding = coding;
4194   if (coding->resetter
4195       && (*coding->resetter) (converter) < 0)
4196     {
4197       free (internal);
4198       free (converter);
4199       MERROR (MERROR_CODING, NULL);
4200     }
4201 
4202   if (fseek (fp, 0, SEEK_CUR) < 0)
4203     {
4204       if (errno == EBADF)
4205 	{
4206 	  free (internal);
4207 	  free (converter);
4208 	  return NULL;
4209 	}
4210       internal->seekable = 0;
4211     }
4212   else
4213     internal->seekable = 1;
4214   internal->unread = mtext ();
4215   internal->work_mt = mtext ();
4216   mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4217   internal->fp = fp;
4218   internal->binding = BINDING_STREAM;
4219 
4220   return converter;
4221 }
4222 
4223 /*=*/
4224 
4225 /***en
4226     @brief Reset a code converter.
4227 
4228     The mconv_reset_converter () function resets code converter
4229     $CONVERTER to the initial state.
4230 
4231     @return
4232     If $CONVERTER->coding has its own reseter function,
4233     mconv_reset_converter () returns the result of that function
4234     applied to $CONVERTER.  Otherwise it returns 0.  */
4235 
4236 /***ja
4237     @brief �����ɥ���С�����ꥻ�åȤ���.
4238 
4239     �ؿ� mconv_reset_converter () �ϥ����ɥ���С��� $CONVERTER
4240     �������֤��᤹��
4241 
4242     @return
4243     �⤷ $CONVERTER->coding �˥ꥻ�å��Ѥδؿ����������Ƥ���ʤ�С�
4244     mconv_reset_converter () �Ϥ��δؿ��� $CONVERTER
4245     ��Ŭ�Ѥ�����̤��֤��������Ǥʤ����0���֤���  */
4246 
4247 int
mconv_reset_converter(MConverter * converter)4248 mconv_reset_converter (MConverter *converter)
4249 {
4250   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4251 
4252   converter->nchars = converter->nbytes = 0;
4253   converter->result = MCONVERSION_RESULT_SUCCESS;
4254   internal->carryover_bytes = 0;
4255   internal->used = 0;
4256   mtext_reset (internal->unread);
4257   if (internal->coding->resetter)
4258     return (*internal->coding->resetter) (converter);
4259   return 0;
4260 }
4261 
4262 /*=*/
4263 
4264 /***en
4265     @brief Free a code converter.
4266 
4267     The mconv_free_converter () function frees the code converter
4268     $CONVERTER.  */
4269 
4270 /***ja
4271     @brief �����ɥ���С������������.
4272 
4273     �ؿ� mconv_free_converter () �ϥ����ɥ���С��� $CONVERTER
4274     ��������롣  */
4275 
4276 void
mconv_free_converter(MConverter * converter)4277 mconv_free_converter (MConverter *converter)
4278 {
4279   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4280 
4281   M17N_OBJECT_UNREF (internal->work_mt);
4282   M17N_OBJECT_UNREF (internal->unread);
4283   free (internal);
4284   free (converter);
4285 }
4286 
4287 /*=*/
4288 
4289 /***en
4290     @brief Bind a buffer to a code converter.
4291 
4292     The mconv_rebind_buffer () function binds buffer area of $N bytes
4293     pointed to by $BUF to code converter $CONVERTER.  Subsequent
4294     decodings and encodings are done to/from this newly bound buffer
4295     area.
4296 
4297     @return
4298     This function always returns $CONVERTER.  */
4299 
4300 /***ja
4301     @brief �����ɥ���С����˥Хåե��ΰ�����դ���.
4302 
4303     �ؿ� mconv_rebind_buffer () �ϡ�$BUF �ˤ�äƻؤ��줿�礭�� $N
4304     �Х��ȤΥХåե��ΰ�����ɥ���С��� $CONVERTER �˷���դ��롣
4305     ����ʹߤΥǥ����ɤ���ӥ������ɤϡ����ο����˷���դ���줿�Хåե��ΰ���Ф��ƹԤʤ���褦�ˤʤ롣
4306 
4307     @return
4308     ���δؿ��Ͼ�� $CONVERTER ���֤���
4309 
4310     @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly  */
4311 
4312 /***
4313     @seealso
4314     mconv_rebind_stream () */
4315 
4316 MConverter *
mconv_rebind_buffer(MConverter * converter,const unsigned char * buf,int n)4317 mconv_rebind_buffer (MConverter *converter, const unsigned char *buf, int n)
4318 {
4319   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4320 
4321   internal->buf.in = buf;
4322   internal->used = 0;
4323   internal->bufsize = n;
4324   internal->binding = BINDING_BUFFER;
4325   return converter;
4326 }
4327 
4328 /*=*/
4329 
4330 /***en
4331     @brief Bind a stream to a code converter.
4332 
4333     The mconv_rebind_stream () function binds stream $FP to code
4334     converter $CONVERTER.  Following decodings and encodings are done
4335     to/from this newly bound stream.
4336 
4337     @return
4338     This function always returns $CONVERTER.  */
4339 
4340 /***ja
4341     @brief �����ɥ���С����˥��ȥ꡼������դ���.
4342 
4343     �ؿ� mconv_rebind_stream () �ϡ����ȥ꡼�� $FP �����ɥ���С���
4344     $CONVERTER �˷���դ��롣
4345     ����ʹߤΥǥ����ɤ���ӥ������ɤϡ����ο����˷���դ���줿���ȥ꡼����Ф��ƹԤʤ���褦�ˤʤ롣
4346 
4347     @return
4348     ���δؿ��Ͼ�� $CONVERTER ���֤���
4349 
4350     @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly  */
4351 
4352 /***
4353     @seealso
4354     mconv_rebind_buffer () */
4355 
4356 MConverter *
mconv_rebind_stream(MConverter * converter,FILE * fp)4357 mconv_rebind_stream (MConverter *converter, FILE *fp)
4358 {
4359   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4360 
4361   if (fseek (fp, 0, SEEK_CUR) < 0)
4362     {
4363       if (errno == EBADF)
4364 	return NULL;
4365       internal->seekable = 0;
4366     }
4367   else
4368     internal->seekable = 1;
4369   internal->fp = fp;
4370   internal->binding = BINDING_STREAM;
4371   return converter;
4372 }
4373 
4374 /*=*/
4375 
4376 /***en
4377     @brief Decode a byte sequence into an M-text.
4378 
4379     The mconv_decode () function decodes a byte sequence and appends
4380     the result at the end of M-text $MT.  The source byte sequence is
4381     taken from either the buffer area or the stream that is currently
4382     bound to $CONVERTER.
4383 
4384     @return
4385     If the operation was successful, mconv_decode () returns updated
4386     $MT.  Otherwise it returns @c NULL and assigns an error code to
4387     the external variable #merror_code.  */
4388 
4389 /***ja
4390     @brief �Х������ M-text �˥ǥ����ɤ���.
4391 
4392     �ؿ� mconv_decode () �ϡ��Х������ǥ����ɤ��Ƥ��η�̤� M-text
4393     $MT ���������ɲä��롣�ǥ����ɸ��ΥХ�����ϡ�$CONVERTER
4394     �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼�फ�����롣
4395 
4396     @return
4397     �⤷��������������С�mconv_decode () �Ϲ������줿 $MT ���֤���
4398     �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4399     �˥��顼�����ɤ����ꤹ�롣  */
4400 
4401 /***
4402     @errors
4403     @c MERROR_IO, @c MERROR_CODING
4404 
4405     @seealso
4406     mconv_rebind_buffer (), mconv_rebind_stream (),
4407     mconv_encode (), mconv_encode_range (),
4408     mconv_decode_buffer (), mconv_decode_stream ()  */
4409 
4410 MText *
mconv_decode(MConverter * converter,MText * mt)4411 mconv_decode (MConverter *converter, MText *mt)
4412 {
4413   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4414   int at_most = converter->at_most > 0 ? converter->at_most : -1;
4415   int n;
4416 
4417   M_CHECK_READONLY (mt, NULL);
4418 
4419   if (mt->format != MTEXT_FORMAT_UTF_8)
4420     mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4421 
4422   if (! mt->data)
4423     mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4424 
4425   converter->nchars = converter->nbytes = 0;
4426   converter->result = MCONVERSION_RESULT_SUCCESS;
4427 
4428   n = mtext_nchars (internal->unread);
4429   if (n > 0)
4430     {
4431       int limit = n;
4432       int i;
4433 
4434       if (at_most > 0 && at_most < limit)
4435 	limit = at_most;
4436 
4437       for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4438 	mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4439       mtext_del (internal->unread, n + 1, internal->unread->nchars);
4440       if (at_most > 0)
4441 	{
4442 	  if (at_most == limit)
4443 	    return mt;
4444 	  converter->at_most -= converter->nchars;
4445 	}
4446     }
4447 
4448   if (internal->binding == BINDING_BUFFER)
4449     {
4450       (*internal->coding->decoder) (internal->buf.in + internal->used,
4451 				    internal->bufsize - internal->used,
4452 				    mt, converter);
4453       internal->used += converter->nbytes;
4454     }
4455   else if (internal->binding == BINDING_STREAM)
4456     {
4457       unsigned char work[CONVERT_WORKSIZE];
4458       int last_block = converter->last_block;
4459       int use_fread = at_most < 0 && internal->seekable;
4460 
4461       converter->last_block = 0;
4462       while (1)
4463 	{
4464 	  int nbytes, prev_nbytes;
4465 
4466 	  if (feof (internal->fp))
4467 	    nbytes = 0;
4468 	  else if (use_fread)
4469 	    nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4470 			    internal->fp);
4471 	  else
4472 	    {
4473 	      int c = getc (internal->fp);
4474 
4475 	      if (c != EOF)
4476 		work[0] = c, nbytes = 1;
4477 	      else
4478 		nbytes = 0;
4479 	    }
4480 
4481 	  if (ferror (internal->fp))
4482 	    {
4483 	      converter->result = MCONVERSION_RESULT_IO_ERROR;
4484 	      break;
4485 	    }
4486 
4487 	  if (nbytes == 0)
4488 	    converter->last_block = last_block;
4489 	  prev_nbytes = converter->nbytes;
4490 	  (*internal->coding->decoder) (work, nbytes, mt, converter);
4491 	  if (converter->nbytes - prev_nbytes < nbytes)
4492 	    {
4493 	      if (use_fread)
4494 		fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4495 		       SEEK_CUR);
4496 	      else
4497 		ungetc (work[0], internal->fp);
4498 	      break;
4499 	    }
4500 	  if (nbytes == 0
4501 	      || (converter->at_most > 0
4502 		  && converter->nchars == converter->at_most))
4503 	    break;
4504 	}
4505       converter->last_block = last_block;
4506     }
4507   else				/* internal->binding == BINDING_NONE */
4508     MERROR (MERROR_CODING, NULL);
4509 
4510   converter->at_most = at_most;
4511   return ((converter->result == MCONVERSION_RESULT_SUCCESS
4512 	   || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4513 	  ? mt : NULL);
4514 }
4515 
4516 /*=*/
4517 
4518 /***en
4519     @brief Decode a buffer area based on a coding system.
4520 
4521     The mconv_decode_buffer () function decodes $N bytes of the buffer
4522     area pointed to by $BUF based on the coding system $NAME.  A
4523     temporary code converter for decoding is automatically created
4524     and freed.
4525 
4526     @return
4527     If the operation was successful, mconv_decode_buffer ()
4528     returns the resulting M-text.  Otherwise it returns @c NULL and
4529     assigns an error code to the external variable #merror_code.  */
4530 
4531 /***ja
4532     @brief �����ɷϤ˴�Ť��ƥХåե��ΰ��ǥ����ɤ���.
4533 
4534     �ؿ� mconv_decode_buffer () �ϡ�$BUF �ˤ�äƻؤ��줿 $N
4535     �Х��ȤΥХåե��ΰ�������ɷ� $NAME �˴�Ť��ƥǥ����ɤ��롣
4536     �ǥ����ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4537 
4538     @return
4539     �⤷��������������С�mconv_decode_buffer () ������줿 M-text ���֤���
4540     �����Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4541     �˥��顼�����ɤ����ꤹ�롣  */
4542 
4543 /***
4544     @errors
4545     @c MERROR_IO, @c MERROR_CODING
4546 
4547     @seealso
4548     mconv_decode (), mconv_decode_stream ()  */
4549 
4550 MText *
mconv_decode_buffer(MSymbol name,const unsigned char * buf,int n)4551 mconv_decode_buffer (MSymbol name, const unsigned char *buf, int n)
4552 {
4553   MConverter *converter = mconv_buffer_converter (name, buf, n);
4554   MText *mt;
4555 
4556   if (! converter)
4557     return NULL;
4558   mt = mtext ();
4559   if (! mconv_decode (converter, mt))
4560     {
4561       M17N_OBJECT_UNREF (mt);
4562       mt = NULL;
4563     }
4564   mconv_free_converter (converter);
4565   return mt;
4566 }
4567 
4568 /*=*/
4569 
4570 /***en
4571     @brief Decode a stream input based on a coding system.
4572 
4573     The mconv_decode_stream () function decodes the entire byte
4574     sequence read in from stream $FP based on the coding system $NAME.
4575     A code converter for decoding is automatically created and freed.
4576 
4577     @return
4578     If the operation was successful, mconv_decode_stream () returns
4579     the resulting M-text.  Otherwise it returns @c NULL and assigns an
4580     error code to the external variable #merror_code.  */
4581 
4582 /***ja
4583     @brief �����ɷϤ˴�Ť��ƥ��ȥ꡼�����Ϥ�ǥ����ɤ���.
4584 
4585     �ؿ� mconv_decode_stream () �ϡ����ȥ꡼�� $FP
4586     �����ɤ߹��ޤ��Х��������Τ������ɷ� $NAME
4587     �˴�Ť��ƥǥ����ɤ��롣�ǥ����ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4588 
4589     @return
4590     �⤷��������������С�mconv_decode_stream () ������줿 M-text
4591     ���֤��������Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
4592     �˥��顼�����ɤ����ꤹ�롣  */
4593 
4594 /***
4595     @errors
4596     @c MERROR_IO, @c MERROR_CODING
4597 
4598     @seealso
4599     mconv_decode (), mconv_decode_buffer ()  */
4600 
4601 MText *
mconv_decode_stream(MSymbol name,FILE * fp)4602 mconv_decode_stream (MSymbol name, FILE *fp)
4603 {
4604   MConverter *converter = mconv_stream_converter (name, fp);
4605   MText *mt;
4606 
4607   if (! converter)
4608     return NULL;
4609   mt = mtext ();
4610   if (! mconv_decode (converter, mt))
4611     {
4612       M17N_OBJECT_UNREF (mt);
4613       mt = NULL;
4614     }
4615   mconv_free_converter (converter);
4616   return mt;
4617 }
4618 
4619 /*=*/
4620 
4621 /***en @brief Encode an M-text into a byte sequence.
4622 
4623     The mconv_encode () function encodes M-text $MT and writes the
4624     resulting byte sequence into the buffer area or the stream that is
4625     currently bound to code converter $CONVERTER.
4626 
4627     @return
4628     If the operation was successful, mconv_encode () returns the
4629     number of written bytes.  Otherwise it returns -1 and assigns an
4630     error code to the external variable #merror_code.  */
4631 
4632 /***ja
4633     @brief M-text ��Х�����˥������ɤ���.
4634 
4635     �ؿ� mconv_encode () �ϡ�M-text $MT �������ɤ��ơ������ɥ���С���
4636     $CONVERTER �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼�������줿�Х����������ࡣ
4637 
4638     @return
4639     �⤷��������������С�mconv_encode () �Ͻ����ޤ줿�Х��ȿ����֤���
4640     �����Ǥʤ���� -1 ���֤��������ѿ� #merror_code
4641     �˥��顼�����ɤ����ꤹ�롣  */
4642 
4643 /***
4644     @errors
4645     @c MERROR_IO, @c MERROR_CODING
4646 
4647     @seealso
4648     mconv_rebind_buffer (), mconv_rebind_stream(),
4649     mconv_decode (), mconv_encode_range ()  */
4650 
4651 int
mconv_encode(MConverter * converter,MText * mt)4652 mconv_encode (MConverter *converter, MText *mt)
4653 {
4654   return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4655 }
4656 
4657 /*=*/
4658 
4659 /***en
4660     @brief Encode a part of an M-text.
4661 
4662     The mconv_encode_range () function encodes the text between $FROM
4663     (inclusive) and $TO (exclusive) in M-text $MT and writes the
4664     resulting byte sequence into the buffer area or the stream that is
4665     currently bound to code converter $CONVERTER.
4666 
4667     @return
4668     If the operation was successful, mconv_encode_range () returns the
4669     number of written bytes. Otherwise it returns -1 and assigns an
4670     error code to the external variable #merror_code.  */
4671 
4672 /***ja
4673     @brief M-text �ΰ�����Х�����˥������ɤ���.
4674 
4675     �ؿ� mconv_encode_range () �ϡ�M-text $MT �� $FROM
4676     ��$FROM ���Τ�ޤ�ˤ��� $TO ��$TO���Τϴޤޤʤ���
4677     �ޤǤ��ϰϤΥƥ����Ȥ������ɤ��ơ������ɥ���С���
4678     $CONVERTER �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼�������줿�Х����������ࡣ
4679 
4680     @return
4681     �⤷��������������С�mconv_encode_range ()
4682     �Ͻ����ޤ줿�Х��ȿ����֤��������Ǥʤ���� -1
4683     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
4684 
4685 /***
4686     @errors
4687     @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4688 
4689     @seealso
4690     mconv_rebind_buffer (), mconv_rebind_stream(),
4691     mconv_decode (), mconv_encode ()  */
4692 
4693 int
mconv_encode_range(MConverter * converter,MText * mt,int from,int to)4694 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4695 {
4696   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4697 
4698   M_CHECK_POS_X (mt, from, -1);
4699   M_CHECK_POS_X (mt, to, -1);
4700   if (to < from)
4701     to = from;
4702 
4703   if (converter->at_most > 0 && from + converter->at_most < to)
4704     to = from + converter->at_most;
4705 
4706   converter->nchars = converter->nbytes = 0;
4707   converter->result = MCONVERSION_RESULT_SUCCESS;
4708 
4709   mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4710   if (internal->binding == BINDING_BUFFER)
4711     {
4712       (*internal->coding->encoder) (mt, from, to,
4713 				    internal->buf.out + internal->used,
4714 				    internal->bufsize - internal->used,
4715 				    converter);
4716       internal->used += converter->nbytes;
4717     }
4718   else if (internal->binding == BINDING_STREAM)
4719     {
4720       unsigned char work[CONVERT_WORKSIZE];
4721 
4722       while (from < to)
4723 	{
4724 	  int written = 0;
4725 	  int prev_nbytes = converter->nbytes;
4726 	  int this_nbytes;
4727 
4728 	  (*internal->coding->encoder) (mt, from, to, work,
4729 					CONVERT_WORKSIZE, converter);
4730 	  this_nbytes = converter->nbytes - prev_nbytes;
4731 	  while (written < this_nbytes)
4732 	    {
4733 	      int wrtn = fwrite (work + written, sizeof (unsigned char),
4734 				 this_nbytes - written, internal->fp);
4735 
4736 	      if (ferror (internal->fp))
4737 		break;
4738 	      written += wrtn;
4739 	    }
4740 	  if (written < this_nbytes)
4741 	    {
4742 	      converter->result = MCONVERSION_RESULT_IO_ERROR;
4743 	      break;
4744 	    }
4745 	  from += converter->nchars;
4746 	}
4747     }
4748   else 				/* fail safe */
4749     MERROR (MERROR_CODING, -1);
4750 
4751   return ((converter->result == MCONVERSION_RESULT_SUCCESS
4752 	   || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4753 	  ? converter->nbytes : -1);
4754 }
4755 
4756 /*=*/
4757 
4758 /***en
4759     @brief Encode an M-text into a buffer area.
4760 
4761     The mconv_encode_buffer () function encodes M-text $MT based on
4762     coding system $NAME and writes the resulting byte sequence into the
4763     buffer area pointed to by $BUF.  At most $N bytes are written.  A
4764     temporary code converter for encoding is automatically created
4765     and freed.
4766 
4767     @return
4768     If the operation was successful, mconv_encode_buffer () returns
4769     the number of written bytes.  Otherwise it returns -1 and assigns
4770     an error code to the external variable #merror_code.  */
4771 
4772 /***ja
4773     @brief M-text �������ɤ��ƥХåե��ΰ�˽�����.
4774 
4775     �ؿ� mconv_encode_buffer () ��M-text $MT �����ɷ� $NAME
4776     �˴�Ť��ƥ������ɤ�������줿�Х������ $BUF �λؤ��Хåե��ΰ�˽����ࡣ
4777     $N �Ͻ��������Х��ȿ��Ǥ��롣
4778     �������ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4779 
4780     @return
4781     �⤷��������������С�mconv_encode_buffer () �Ͻ����ޤ줿�Х��ȿ����֤���
4782     �����Ǥʤ����-1���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
4783 
4784 /***
4785     @errors
4786     @c MERROR_IO, @c MERROR_CODING
4787 
4788     @seealso
4789     mconv_encode (), mconv_encode_stream ()  */
4790 
4791 int
mconv_encode_buffer(MSymbol name,MText * mt,unsigned char * buf,int n)4792 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4793 {
4794   MConverter *converter = mconv_buffer_converter (name, buf, n);
4795   int ret;
4796 
4797   if (! converter)
4798     return -1;
4799   ret = mconv_encode (converter, mt);
4800   mconv_free_converter (converter);
4801   return ret;
4802 }
4803 
4804 /*=*/
4805 
4806 /***en
4807     @brief Encode an M-text to write to a stream.
4808 
4809     The mconv_encode_stream () function encodes M-text $MT based on
4810     coding system $NAME and writes the resulting byte sequence to
4811     stream $FP.  A temporary code converter for encoding is
4812     automatically created and freed.
4813 
4814     @return
4815     If the operation was successful, mconv_encode_stream () returns
4816     the number of written bytes.  Otherwise it returns -1 and assigns
4817     an error code to the external variable #merror_code.  */
4818 
4819 /***ja
4820     @brief M-text �������ɤ��ƥ��ȥ꡼��˽�����.
4821 
4822     �ؿ� mconv_encode_stream () ��M-text $MT �����ɷ� $NAME
4823     �˴�Ť��ƥ������ɤ�������줿�Х�������ȥ꡼�� $FP
4824     �˽��Ф����������ɤ�ɬ�פʥ����ɥ���С����κ����Ȳ����ϼ�ưŪ�˹Ԥʤ��롣
4825 
4826     @return
4827     �⤷��������������С�mconv_encode_stream ()
4828     �Ͻ����ޤ줿�Х��ȿ����֤��������Ǥʤ���� -1
4829     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
4830 
4831 /***
4832     @errors
4833     @c MERROR_IO, @c MERROR_CODING
4834 
4835     @seealso
4836     mconv_encode (), mconv_encode_buffer (), mconv_encode_file ()  */
4837 
4838 int
mconv_encode_stream(MSymbol name,MText * mt,FILE * fp)4839 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4840 {
4841   MConverter *converter = mconv_stream_converter (name, fp);
4842   int ret;
4843 
4844   if (! converter)
4845     return -1;
4846   ret = mconv_encode (converter, mt);
4847   mconv_free_converter (converter);
4848   return ret;
4849 }
4850 
4851 /*=*/
4852 
4853 /***en
4854     @brief Read a character via a code converter.
4855 
4856     The mconv_getc () function reads one character from the buffer
4857     area or the stream that is currently bound to code converter
4858     $CONVERTER.  The decoder of $CONVERTER is used to decode the byte
4859     sequence.  The internal status of $CONVERTER is updated
4860     appropriately.
4861 
4862     @return
4863     If the operation was successful, mconv_getc () returns the
4864     character read in.  If the input source reaches EOF, it returns @c
4865     EOF without changing the external variable #merror_code.  If an
4866     error is detected, it returns @c EOF and assigns an error code to
4867     #merror_code.  */
4868 
4869 /***ja
4870     @brief �����ɥ���С�����ͳ�ǰ�ʸ�����ɤߤ���.
4871 
4872     �ؿ� mconv_getc () �ϡ������ɥ���С��� $CONVERTER
4873     �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼�फ��ʸ�������ɤ߹��ࡣ
4874     �Х�����Υǥ����ɤˤ� $CONVERTER �Υǥ��������Ѥ����롣
4875     $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4876 
4877     @return
4878     ��������������С�mconv_getc () ���ɤ߹��ޤ줿ʸ�����֤������ϸ���
4879     EOF ��ã�������ϡ������ѿ� #merror_code ���Ѥ����� @c EOF
4880     ���֤������顼�����Ф��줿���� @c EOF ���֤���#merror_code
4881     �˥��顼�����ɤ����ꤹ�롣  */
4882 
4883 /***
4884     @errors
4885     @c MERROR_CODING
4886 
4887     @seealso
4888     mconv_ungetc (), mconv_putc (), mconv_gets ()  */
4889 
4890 int
mconv_getc(MConverter * converter)4891 mconv_getc (MConverter *converter)
4892 {
4893   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4894   int at_most = converter->at_most;
4895 
4896   mtext_reset (internal->work_mt);
4897   converter->at_most = 1;
4898   mconv_decode (converter, internal->work_mt);
4899   converter->at_most = at_most;
4900   return (converter->nchars == 1
4901 	  ? STRING_CHAR (internal->work_mt->data)
4902 	  : EOF);
4903 }
4904 
4905 /*=*/
4906 
4907 /***en
4908     @brief Push a character back to a code converter.
4909 
4910     The mconv_ungetc () function pushes character $C back to code
4911     converter $CONVERTER.  Any number of characters can be pushed
4912     back.  The lastly pushed back character is firstly read by the
4913     subsequent mconv_getc () call.  The characters pushed back are
4914     registered only in $CONVERTER; they are not written to the input
4915     source.  The internal status of $CONVERTER is updated
4916     appropriately.
4917 
4918     @return
4919     If the operation was successful, mconv_ungetc () returns $C.
4920     Otherwise it returns @c EOF and assigns an error code to the
4921     external variable #merror_code.  */
4922 
4923 /***ja
4924     @brief �����ɥ���С����˰�ʸ���᤹.
4925 
4926     �ؿ� mconv_ungetc () �ϡ������ɥ���С��� $CONVERTER ��ʸ�� $C
4927     �����᤹���ᤵ���ʸ���������¤Ϥʤ������θ�� mconv_getc ()
4928     ��ƤӽФ����ݤˤϡ��Ǹ���ᤵ�줿ʸ�����ǽ���ɤޤ�롣�ᤵ�줿ʸ����
4929     $CONVERTER ���������ߤ���������Ǥ��ꡢ�ºݤ����ϸ��˽����ޤ��櫓�ǤϤʤ���
4930     $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4931 
4932     @return
4933     ��������������С�mconv_ungetc () �� $C ���֤��������Ǥʤ���� @c
4934     EOF ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
4935 
4936 /***
4937     @errors
4938     @c MERROR_CODING, @c MERROR_CHAR
4939 
4940     @seealso
4941     mconv_getc (), mconv_putc (), mconv_gets ()  */
4942 
4943 int
mconv_ungetc(MConverter * converter,int c)4944 mconv_ungetc (MConverter *converter, int c)
4945 {
4946   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4947 
4948   M_CHECK_CHAR (c, EOF);
4949 
4950   converter->result = MCONVERSION_RESULT_SUCCESS;
4951   mtext_cat_char (internal->unread, c);
4952   return c;
4953 }
4954 
4955 /*=*/
4956 
4957 /***en
4958     @brief Write a character via a code converter.
4959 
4960     The mconv_putc () function writes character $C to the buffer area
4961     or the stream that is currently bound to code converter
4962     $CONVERTER.  The encoder of $CONVERTER is used to encode the
4963     character.  The number of bytes actually written is set to the @c
4964     nbytes member of $CONVERTER.  The internal status of $CONVERTER
4965     is updated appropriately.
4966 
4967     @return
4968     If the operation was successful, mconv_putc () returns $C.
4969     If an error is detected, it returns @c EOF and assigns
4970     an error code to the external variable #merror_code.  */
4971 
4972 /***ja
4973     @brief �����ɥ���С������ͳ���ư�ʸ�����Ф�.
4974 
4975     �ؿ� mconv_putc () �ϡ������ɥ���С��� $CONVERTER
4976     �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼���ʸ�� $C
4977     ����Ф���ʸ���Υ������ɤˤ� $CONVERTER
4978     �Υ����������Ѥ����롣�ºݤ˽��Ф��줿�Х��ȿ��ϡ�$CONVERTER �Υ��С�
4979     @c nbytes �˥��åȤ���롣$CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
4980 
4981     @return
4982     ��������������С�mconv_putc () �� $C ���֤������顼�����Ф��줿����
4983     @c EOF ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
4984 
4985 /***
4986     @errors
4987     @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4988 
4989     @seealso
4990     mconv_getc (), mconv_ungetc (), mconv_gets ()  */
4991 
4992 int
mconv_putc(MConverter * converter,int c)4993 mconv_putc (MConverter *converter, int c)
4994 {
4995   MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4996 
4997   M_CHECK_CHAR (c, EOF);
4998   mtext_reset (internal->work_mt);
4999   mtext_cat_char (internal->work_mt, c);
5000   if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
5001     return EOF;
5002   return c;
5003 }
5004 
5005 /*=*/
5006 
5007 /***en
5008     @brief Read a line using a code converter.
5009 
5010     The mconv_gets () function reads one line from the buffer area or
5011     the stream that is currently bound to code converter $CONVERTER.
5012     The decoder of $CONVERTER is used for decoding.  The decoded
5013     character sequence is appended at the end of M-text $MT.  The
5014     final newline character in the original byte sequence is not
5015     appended.  The internal status of $CONVERTER is updated
5016     appropriately.
5017 
5018     @return
5019     If the operation was successful, mconv_gets () returns the
5020     modified $MT.  If it encounters EOF without reading a single
5021     character, it returns $MT without changing it.  If an error is
5022     detected, it returns @c NULL and assigns an error code to
5023     #merror_code.  */
5024 
5025 /***ja
5026     @brief �����ɥ���С�����Ȥäư���ɤ߹���.
5027 
5028     �ؿ� mconv_gets () �ϡ������ɥ���С��� $CONVERTER
5029     �˸��߷���դ����Ƥ���Хåե��ΰ褢�뤤�ϥ��ȥ꡼�फ�� 1 �Ԥ��ɤ߹��ࡣ
5030     �Х�����Υǥ����ɤˤ� $CONVERTER
5031     �Υǥ��������Ѥ����롣�ǥ����ɤ��줿ʸ����� M-text $MT
5032     ���������ɲä���롣���ΥХ�����ν�ü����ʸ�����ɲä���ʤ���
5033     $CONVERTER ���������֤�ɬ�פ˱����ƹ�������롣
5034 
5035     @return
5036     ��������������С�mconv_gets () ���ѹ����줿 $MT
5037     ���֤����⤷1ʸ�����ɤޤ��� EOF �������������ϡ�$MT
5038     ���ѹ������ˤ��Τޤ��֤������顼�����Ф��줿���� @c NULL ���֤���
5039     #merror_code �˥��顼�����ɤ����ꤹ�롣  */
5040 
5041 /***
5042     @errors
5043     @c MERROR_CODING
5044 
5045     @seealso
5046     mconv_getc (), mconv_ungetc (), mconv_putc ()  */
5047 
5048 MText *
mconv_gets(MConverter * converter,MText * mt)5049 mconv_gets (MConverter *converter, MText *mt)
5050 {
5051   int c;
5052 
5053   M_CHECK_READONLY (mt, NULL);
5054   if (mt->format != MTEXT_FORMAT_UTF_8)
5055     mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5056 
5057   while (1)
5058     {
5059       c = mconv_getc (converter);
5060       if (c == EOF || c == '\n')
5061 	break;
5062       mtext_cat_char (mt, c);
5063     }
5064   if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5065     /* mconv_getc () sets #merror_code */
5066     return NULL;
5067   return mt;
5068 }
5069 
5070 /*=*/
5071 
5072 /*** @} */
5073 
5074 /*
5075   Local Variables:
5076   coding: euc-japan
5077   End:
5078 */
5079