multi.c - OpenGrok cross reference for /dports/japanese/less/less-382.262.03.01/multi.c

/*
 * Copyright (c) 1994-2005  Kazushi (Jam) Marukawa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice in the documentation and/or other materials provided with
 *    the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


/*
 * Routines to manipulate a buffer to hold string of multi bytes character.
 * Detect a character set from input string and convert them to internal
 * codes.  And convert it to other codes to display them.
 */

#include "defines.h"
#include "less.h"

#include <stdio.h>
#include <assert.h>

#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif

#if JAPANESE
#include "kanji_map.h"
#include "unicode_map.h"
#endif
#include "unicode_type.h"

#define LESS 1

/* TODO: remove caller control_char(), change_control_char() and ecalloc() */
extern int control_char ();
extern void change_control_char ();
extern void* ecalloc ();


#if ISO

static void multi_reparse();
static int check_ft();


#if JAPANESE

int markwrongchar = 1;


/*
 * Macro for character detection
 */
#define ISJIS(c)		(0x21 <= (c) && (c) <= 0x7e)
#define ISUJIS(c)		(0xa1 <= (c) && (c) <= 0xfe)
#define ISUJISSS(c)		((c) == 0x8e || (c) == 0x8f)
#define ISUJISKANJI(c1,c2)	(ISUJIS(c1) && ISUJIS(c2))
#define ISUJISKANJI1(c)		(ISUJIS(c))
#define ISUJISKANA(c1,c2)	((c1) == 0x8e && ISUJIS(c2))
#define ISUJISKANA1(c)		((c) == 0x8e)
#define ISUJISKANJISUP(c1,c2,c3) ((c1) == 0x8f && ISUJIS(c2) && ISUJIS(c3))
#define ISUJISKANJISUP1(c)	((c) == 0x8f)
#define ISSJISKANJI(c1,c2)	(((0x81 <= (c1) && (c1) <= 0x9f) || \
				  (0xe0 <= (c1) && (c1) <= 0xfc)) && \
				 (0x40 <= (c2) && (c2) <= 0xfc && (c2) != 0x7f))
#define ISSJISKANJI1(c)		((0x81 <= (c) && (c) <= 0x9f) || \
				 (0xe0 <= (c) && (c) <= 0xfc))
#define ISSJISKANA(c)		(0xa1 <= (c) && (c) <= 0xdf)
#define ISUTF8_HEAD(c)		(0xc0 <= (c) && (c) < 0xfe)
#define ISUTF8_REST(c)		(((c) & 0xc0) == 0x80)
#define ISUTF8_1(c)		((c) <= 0x7f)
#define ISUTF8_2(c1,c2)		(((c1) & 0xe0) == 0xc0 && ISUTF8_REST(c2))
#define ISUTF8_3(c1,c2,c3)	(((c1) & 0xf0) == 0xe0 && ISUTF8_REST(c2) && \
				 ISUTF8_REST(c3))
#define ISUTF8_4(c1,c2,c3,c4)	(((c1) & 0xf8) == 0xf0 && ISUTF8_REST(c2) && \
				 ISUTF8_REST(c3) && ISUTF8_REST(c4))
#define ISUTF8_5(c1,c2,c3,c4,c5) \
	(((c1) & 0xfc) == 0xf8 && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \
	 ISUTF8_REST(c4) && ISUTF8_REST(c5))
#define ISUTF8_6(c1,c2,c3,c4,c5,c6) \
	(((c1) & 0xfe) == 0xfc && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \
	 ISUTF8_REST(c4) && ISUTF8_REST(c5) && ISUTF8_REST(c6))

#define UTF8_2(c0, c1)                 ((((c0) & 0x1f) << 6) \
					| (((c1) & 0x3f)))
#define UTF8_3(c0, c1, c2)             ((((c0) & 0x0f) << 12) \
					| (((c1) & 0x3f) << 6)	\
					| ((c2) & 0x3f))
#define UTF8_4(c0, c1, c2, c3)         ((((c0) & 0x07) << 18) \
					| (((c1) & 0x3f) << 12)	\
					| (((c2) & 0x3f) << 6) \
					| ((c3) & 0x3f))
#define UTF8_5(c0, c1, c2, c3, c4)     ((((c0) & 0x03) << 24) \
					| (((c1) & 0x3f) << 18) \
					| (((c2) & 0x3f) << 12) \
					| (((c3) & 0x3f) << 6)	\
					| ((c4) & 0x3f))
#define UTF8_6(c0, c1, c2, c3, c4, c5) ((((c0) & 0x01) << 30)  \
					| (((c1) & 0x3f) << 24) \
					| (((c2) & 0x3f) << 18) \
					| (((c3) & 0x3f) << 12)	\
					| (((c4) & 0x3f) << 6)	\
					| ((c5) & 0x3f))
#endif


/*
 * Definitions for understanding the escape sequence.
 * Following escape sequences which be understood by less:
 *  ESC 2/4 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F
 *  ESC 2/4 4/0,4/1,4/2
 *  ESC 2/6 F
 *  ESC 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F
 *  ESC 2/12 F		This is used in MULE.  Less support this as input.
 *  0/14,0/15
 *  ESC 4/14,4/15,6/14,6/15,7/12,7/13,7/14
 *  8/14,8/15
 */
enum escape_sequence {
    NOESC,		/* No */	ESC_,		/* ^[ */
    ESC_2_4,	/* ^[$ */	ESC_2_4_8,	/* ^[$( */
    ESC_2_4_9,	/* ^[$) */	ESC_2_4_10,	/* ^[$* */
    ESC_2_4_11,	/* ^[$+ */	ESC_2_4_13,	/* ^[$- */
    ESC_2_4_14,	/* ^[$. */	ESC_2_4_15,	/* ^[$/ */
    ESC_2_6,	/* ^[& */	ESC_2_8,	/* ^[( */
    ESC_2_9,	/* ^[) */	ESC_2_10,	/* ^[* */
    ESC_2_11,	/* ^[+ */	ESC_2_12,	/* ^[, */
    ESC_2_13,	/* ^[- */	ESC_2_14,	/* ^[. */
    ESC_2_15,	/* ^[/ */	ESC_5_11,	/* ^[[ */
};


static SETCHARSET def_scs = SCSASCII | SCSOTHERISO;
static ENCSET def_input = ESISO7;	/* Default character set of left plane */
static ENCSET def_inputr = ESISO8;	/* Default character set of right plane */
static int def_gs[4] = {
    ASCII,				/* Default g0 plane status */
    WRONGCS,				/* Default g1 plane status */
    WRONGCS,				/* Default g2 plane status */
    WRONGCS				/* Default g3 plane status */
};

static ENCSET output = ESISO8;		/* Character set for output */
#if JAPANESE
static J_PRIORITY def_priority = PUTF8;	/* Which code was given priority. */
#endif

static UWidth utfwidth = UWIDTH_NORMAL;	/* default UTF-8 Width */

typedef POSITION m_position;
#define M_NULL_POS	((POSITION)(-1))

/*
 * Structure to represent character set information.
 *
 * This data set contains current character set and other information
 * to keep the status of ISO-2022 escape sequence.
 */
struct m_status {
    /* Graphi Sets */
    int gs[4];			/* Current g0..g3 plane sets. */
				/* gl, gr, and sg refer one of 4 planes. */
    int gl;			/* Current gl plane status */
    int gr;			/* Current gr plane status */
    int sg;			/* Current status of single-shifted plane */
#define WRONGPLANE		(-1)
#define ISVALIDPLANE(mp,plane)	((mp)->ms->plane != WRONGPLANE)
#define FINDCS(mp,c)	((mp)->ms->gs[(ISVALIDPLANE((mp), sg) ? (mp)->ms->sg : \
				 ((c) & 0x80) ? (mp)->ms->gr : (mp)->ms->gl)])
#define PLANE2CS(mp,plane)	((mp)->ms->gs[(mp)->ms->plane])

    int irr;			/* Identify revised registration number */
};

struct multibuf {
    struct {
	SETCHARSET scs;
	ENCSET input;
	ENCSET inputr;
    } io;

    ENCSET orig_io_right;
    int rotation_io_right;

    enum escape_sequence eseq;
    /*
     * Variables to control of escape sequences as output.
     */
    int cs;			/* Current character set */
    struct m_status* ms;
#if JAPANESE
    J_PRIORITY priority;	/* Which code was given priority. */
    int sequence_counter;	/* Special counter for detect UJIS KANJI. */
#endif

    CHARSET icharset;		/* Last non ASCII character set of input */

    /*
     * Small buffers to hold all parsing bytes of multi-byte characters.
     *
     * multi_parse() function receive a sequence of byte and buffer it.
     * Each time multi_parse() recognize full data sequence to represent
     * one character, it converts the data into internal data and returns
     * converted data.
     *
     * Caller must buffer it somewhere and output it using outbuf() of
     * outchar().  Those output functions() converts internal data into
     * appropriate data stream for choosen output device.
     *
     * As internal data, we use char[] and CHARSET[] to keep byte and
     * additional information, respectively.  We choose ISO-2022 style
     * data format as our internal data format because it is most easy
     * to work with.  It has completely separated planes for each
     * character set.  This helps code conversion and others alot.
     * For example, we don't need to work to separate Chinese and
     * Japanese because they are separated from the beginning in ISO-2022
     * although UTF-8 uses only single plane with all CJK character sets.
     */
    /*
     * Buffer for input/parsing
     */
    m_position lastpos;		/* position of last byte */
    m_position startpos;	/* position of first byte buffered */
    unsigned char inbuf[80];
    m_position laststartpos;	/* position of first byte buffered last time */
    int lastsg;			/* last single-shifted plane (ms->sg) */
    /*
     * Buffer for internalized/converted data
     */
    unsigned char multiint[80];	/* Byte data */
    CHARSET multics[80];	/* Character set data (no UJIS/SJIS/UTF */
				/* because all of them are converted into */
				/* internal data format) */
    int intindex;		/* Index of multiint */
};

#define INBUF(mp)	((mp)->inbuf[(mp)->lastpos%sizeof((mp)->inbuf)])
#define INBUF0(mp)	((mp)->inbuf[(mp)->startpos%sizeof((mp)->inbuf)])
#define INBUF1(mp)	((mp)->inbuf[((mp)->startpos+1)%sizeof((mp)->inbuf)])
#define INBUF2(mp)	((mp)->inbuf[((mp)->startpos+2)%sizeof((mp)->inbuf)])
#define INBUF3(mp)	((mp)->inbuf[((mp)->startpos+3)%sizeof((mp)->inbuf)])
#define INBUF4(mp)	((mp)->inbuf[((mp)->startpos+4)%sizeof((mp)->inbuf)])
#define INBUF5(mp)	((mp)->inbuf[((mp)->startpos+5)%sizeof((mp)->inbuf)])
#define INBUFI(mp,i)	((mp)->inbuf[(i)%sizeof((mp)->inbuf)])

static int unicode_type(c)
int c;
{
    if (c < 0) {
	return 0;
    } else if (c < 0x20000) {
	return utype_map[c];
    } else if (c < 0x40000) {
	return (UTYPE_EXIST | UTYPE_WIDE);
    } else if (c < 0xe0000) {
	return 0;
    } else if (c < 0xe0080) {
	return (UTYPE_EXIST | UTYPE_FORMAT);
    } else if (c < 0xe01f0) {
	return (UTYPE_EXIST | UTYPE_NSP_MODIFIER);
    } else if (c >= 0xf0000 && c <= 0xffffd) {
	return (UTYPE_EXIST | UTYPE_AMBIGUOUS);
    } else if (c >= 0x100000 && c <= 0x10fffd) {
	return (UTYPE_EXIST | UTYPE_AMBIGUOUS);
    } else {
	return 0;
    }
}

static int
get_utfwidth(uc)
int uc;
{
    int utype = unicode_type(uc);

    if (!(utype * UTYPE_EXIST))
	return WRONGUCS_H;
    if (utype & UTYPE_CONTROL)
	return WRONGUCS_H;
    if (utype & (UTYPE_NSP_MODIFIER | UTYPE_FORMAT | UTYPE_SEPARATOR))
	return UTF8Z;

    switch (utfwidth) {
    case UWIDTH_NONE:
	break;
    case UWIDTH_NORMAL:
	if (utype & UTYPE_WIDE) return UTF8W;
	break;
    case UWIDTH_CJK:
	if (utype & (UTYPE_WIDE | UTYPE_AMBIGUOUS)) return UTF8W;
	break;
    case UWIDTH_JA:
	if (utype & (UTYPE_WIDE | UTYPE_AMBIGUOUS | UTYPE_JA)) return UTF8W;
	break;
    case UWIDTH_ALMOST:
	if (uc >= 0x80) return UTF8W;
	break;
    case UWIDTH_ALL:
	return UTF8W;
    }

    return UTF8;
}

static int code_length(mp, cs)
MULBUF* mp;
CHARSET cs;
{
#if JAPANESE
    unsigned char c;
#endif

    if (CSISWRONG(cs))
	return 1;

#if JAPANESE
    switch (CS2CHARSET(cs)) {
    case UTF8:
    case UTF8Z:
    case UTF8W:
	c = INBUF0(mp);
	if (c < 0xC0) return 1;
	if (c < 0xe0) return 2;
	if (c < 0xf0) return 3;
	if (c < 0xf8) return 4;
	if (c < 0xfc) return 5;
	if (c < 0xfe) return 6;
	return 1;
    case UJIS:
    case UJIS2000:
    case UJIS2004:
	c = INBUF0(mp);
	if (ISUJISKANJI1(c)) return 2;
	if (ISUJISKANA1(c)) return 2;
	if (ISUJISKANJISUP1(c)) return 3;
	return 1;
    case SJIS:
    case SJIS2000:
    case SJIS2004:
	c = INBUF0(mp);
	if (ISSJISKANJI1(c)) return 2;
	if (ISSJISKANA(c)) return 1;
	return 1;
    }
#endif

    switch (CS2TYPE(cs))
    {
    case TYPE_94_CHARSET:
    case TYPE_96_CHARSET:
	return 1;
    case TYPE_94N_CHARSET:
    case TYPE_96N_CHARSET:
	switch (CS2FT(cs) & 0x70)
	{
	case 0x30: return 2;	/* for private use */
	case 0x40:
	case 0x50: return 2;
	case 0x60: return 3;
	case 0x70: return 4;	/* or more bytes */
	}
    }
    assert(0);
    return (0);
}

/*
 * Convert first byte of buffered data as one byte ASCII data
 * without any conversion.
 */
static void noconv1(mp)
MULBUF *mp;
{
    mp->multiint[mp->intindex] = INBUF0(mp);
    mp->multics[mp->intindex] = ASCII;
    mp->intindex++;
    mp->startpos++;
}

/*
 * Convert first byte of buffered data as one byte WRONGCS data
 * without any conversion.
 */
static void wrongcs1(mp)
MULBUF *mp;
{
    mp->multiint[mp->intindex] = INBUF0(mp);
    mp->multics[mp->intindex] = WRONGCS;
    mp->intindex++;
    mp->startpos++;
}

/*
 * Write a wrongmark on out buffer.
 */
static void put_wrongmark(mp)
MULBUF *mp;
{
    mp->multiint[mp->intindex + 0] = '"';
    mp->multiint[mp->intindex + 1] = '.';
    mp->multics[mp->intindex + 0] = JISX0208KANJI;
    mp->multics[mp->intindex + 1] = REST_MASK | JISX0208KANJI;
    mp->intindex += 2;
    /* flush buffer */
    mp->startpos = mp->lastpos + 1;
}

/*
 * Write WRONGUCS characters
 */
static void wrongucs(mp, uc)
MULBUF *mp;
int uc;
{
    if (markwrongchar) {
	put_wrongmark(mp);
	return;
    }

    if (uc < 0x80) {
	wrongcs1(mp);
    } else if (uc < 0x800) {
	mp->multiint[mp->intindex] = INBUF0(mp) & 0x9f;
	mp->multics[mp->intindex] = WRONGUCS_H;
	mp->multiint[mp->intindex + 1] = INBUF1(mp);
	mp->multics[mp->intindex + 1] = WRONGUCS_T | REST_MASK;
	mp->intindex += 2;
    } else if (uc < 0x10000) {
	mp->multiint[mp->intindex] = INBUF0(mp) & 0x8f;
	mp->multics[mp->intindex] = WRONGUCS_H;
	mp->multiint[mp->intindex + 1] = INBUF1(mp);
	mp->multics[mp->intindex + 1] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 2] = INBUF2(mp);
	mp->multics[mp->intindex + 2] = WRONGUCS_T | REST_MASK;
	mp->intindex += 3;
    } else if (uc < 0x200000) {
	mp->multiint[mp->intindex] = INBUF0(mp) & 0x87;
	mp->multics[mp->intindex] = WRONGUCS_H;
	mp->multiint[mp->intindex + 1] = INBUF1(mp);
	mp->multics[mp->intindex + 1] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 2] = INBUF2(mp);
	mp->multics[mp->intindex + 2] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 3] = INBUF3(mp);
	mp->multics[mp->intindex + 3] = WRONGUCS_T | REST_MASK;
	mp->intindex += 4;
    } else if (uc < 0x4000000) {
	mp->multiint[mp->intindex] = INBUF0(mp) & 0x83;
	mp->multics[mp->intindex] = WRONGUCS_H;
	mp->multiint[mp->intindex + 1] = INBUF1(mp);
	mp->multics[mp->intindex + 1] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 2] = INBUF2(mp);
	mp->multics[mp->intindex + 2] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 3] = INBUF3(mp);
	mp->multics[mp->intindex + 3] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 4] = INBUF4(mp);
	mp->multics[mp->intindex + 4] = WRONGUCS_T | REST_MASK;
	mp->intindex += 5;
    } else {
	mp->multiint[mp->intindex] = INBUF0(mp) & 0x81;
	mp->multics[mp->intindex] = WRONGUCS_H;
	mp->multiint[mp->intindex + 1] = INBUF1(mp);
	mp->multics[mp->intindex + 1] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 2] = INBUF2(mp);
	mp->multics[mp->intindex + 2] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 3] = INBUF3(mp);
	mp->multics[mp->intindex + 3] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 4] = INBUF3(mp);
	mp->multics[mp->intindex + 4] = WRONGUCS_M | REST_MASK;
	mp->multiint[mp->intindex + 5] = INBUF5(mp);
	mp->multics[mp->intindex + 5] = WRONGUCS_T | REST_MASK;
	mp->intindex += 5;
    }

    /* flush buffer */
    mp->startpos = mp->lastpos + 1;
}

/*
 * Convert first several bytes of buffered data.
 *
 *  If less is in marking mode, it erase several bytes of data (depend on
 * the current character set) and write "?" mark on output buffer.
 *  If less is not in marking mode, it calls wrongcs1().
 */
static void wrongchar(mp)
MULBUF *mp;
{
    if (markwrongchar) {
	switch (CS2CHARSET(mp->multics[mp->intindex])) {
	case JISX0201KANA:
	case JISX0201ROMAN:
	case LATIN1:
	case LATIN2:
	case LATIN3:
	case LATIN4:
	case GREEK:
	case ARABIC:
	case HEBREW:
	case CYRILLIC:
	case LATIN5:
	    /* Should I use one byte character, like '?' or '_'? */
	    put_wrongmark(mp);
	    break;
	case JISX0208_78KANJI:
	case JISX0208KANJI:
	case JISX0208_90KANJI:
	case JISX0212KANJISUP:
	case JISX0213KANJI1:
	case JISX0213KANJI2:
	case JISX02132004KANJI1:
	case UJIS:
	case UJIS2000:
	case UJIS2004:
	case SJIS:
	case SJIS2000:
	case SJIS2004:
	case UTF8Z:
	case UTF8:
	case UTF8W:
	    put_wrongmark(mp);
	    break;
	case GB2312:
	case KSC5601:
	default:
	    put_wrongmark(mp);
	    break;
	}
    } else {
	while (mp->startpos <= mp->lastpos) {
	    wrongcs1(mp);
	}
    }
}

/*
 * Internalize input stream.
 * We recognized input data as using ISO coding set.
 */
static void internalize_iso(mp)
MULBUF *mp;
{
    register int i;
    m_position pos;
    m_position to;
    int intindex;
    int dummy;

    /*
     * If character set points empty character set, reject buffered data.
     */
    if (CSISWRONG(mp->cs)) {
	wrongcs1(mp);
	return;
    }

    /*
     * If character set points 94 or 94x94 character set, reject
     * DEL and SPACE codes in buffered data.
     */
    if (CS2TYPE(mp->cs) == TYPE_94_CHARSET ||
	CS2TYPE(mp->cs) == TYPE_94N_CHARSET) {
	unsigned char c = INBUF(mp);
	if ((c & 0x7f) == 0x7f) {
	    if (mp->lastpos - mp->startpos + 1 == 1) {
		wrongcs1(mp);
	    } else {
		wrongcs1(mp);
		multi_reparse(mp);
	    }
	    return;
	} else if ((c & 0x7f) == 0x20) {
	    /*
	     * A 0x20 (SPACE) code is wrong, but I treat it as
	     * a SPACE.
	     */
	    if (mp->lastpos - mp->startpos + 1 == 1) {
		noconv1(mp);
	    } else {
		wrongcs1(mp);
		multi_reparse(mp);
	    }
	    return;
	}
    }

    /*
     * Otherwise, keep buffering.
     */
    pos = mp->startpos;
    to = pos + code_length(mp, mp->cs) - 1;
    if (mp->lastpos < to) {
	return;		/* Not enough, so go back to fetch next data. */
    }

    /*
     * We buffered enough data for one character of multi byte characters.
     * Therefore, start to convert this buffered data into a first character.
     */
    intindex = mp->intindex;
    mp->multiint[intindex] = INBUFI(mp, pos) & 0x7f;
    mp->multics[intindex] = mp->cs;
    intindex++;
    for (pos++; pos <= to; pos++) {
	mp->multiint[intindex] = INBUFI(mp, pos) & 0x7f;
	mp->multics[intindex] = REST_MASK | mp->cs;
	intindex++;
    }

    /*
     *  codeset JIS X 0208:1990 validation
     */
    if (mp->cs == JISX0208_90KANJI && !(mp->io.scs & SCSJISX0208_1990)) {
	wrongchar(mp);
	return;
    }

    /*
     * Check newly converted code.  If it is not valid code,
     * less may mark it as not valid code.
     */
    if (chisvalid_cs(mp->io.scs,
		     &mp->multiint[mp->intindex],
		     &mp->multics[mp->intindex])) {
	mp->icharset = mp->cs;
	mp->intindex = intindex;
	mp->startpos = pos;
    } else {
	/*
	 * less ignore the undefined codes
	 */
	wrongchar(mp);
    }
}

#if JAPANESE
/*
 * Internalize input stream encoded by UJIS encoding scheme.
 *
 * Return 1 if input is recognized well.
 * Return 0 if input is rejected.
 */
static int internalize_ujis(mp)
MULBUF *mp;
{
    if (mp->lastpos - mp->startpos + 1 == 1) {
	/* do nothing.  return 1 to get next byte */
	return 1;
    } else if (mp->lastpos - mp->startpos + 1 == 2) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	if (ISUJISKANA(c0, c1)) {
	    mp->cs = JISX0201KANA;
	    mp->icharset = UJIS;
	    mp->multiint[mp->intindex] = c1 & 0x7f;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->intindex += 1;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUJISKANJI(c0, c1)) {
	    int ch = checkKANJI(mp->io.scs,
				SCSJISX0208_1983 | SCSJISX0208_1990
				| SCSJISX0213_2000 | SCSJISX0213_2004,
				c0, c1);
	    if (ch == 0) {
		/* undefined.  less ignore them */
		wrongchar(mp);
		return 1;
	    }
	    mp->icharset = UJIS;
	    mp->cs = (ch >> 16) & 0x7fff;
	    mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = ch & 0x7f;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->intindex += 2;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUJISKANJISUP(c0, c1, 0xa1)) {
	    /* do nothing.  return 1 to get next byte */
	    mp->multics[mp->intindex] = UJIS;
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 3) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	int c2 = INBUF2(mp);
	if (ISUJISKANJISUP(c0, c1, c2)) {
	    int ch = checkKANJI(mp->io.scs,
				SCSJISX0213_2ND | SCSJISX0212_1990,
				c1, c2);
	    if (ch == 0) {
		/* undefined.  less ignore them */
		wrongchar(mp);
		return 1;
	    }
	    mp->icharset = UJIS;
	    mp->cs = (ch >> 16) & 0x7fff;
	    mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = ch & 0x7f;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->intindex += 2;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	}
    }
    /* return 0 because this data sequence is not matched to UJIS */
    return 0;
}

/*
 * Internalize input stream encoded by SJIS encoding scheme.
 *
 * Return 1 if input is recognized well.
 * Return 0 if input is rejected.
 */
static int internalize_sjis(mp)
MULBUF *mp;
{
    if (mp->lastpos - mp->startpos + 1 == 1) {
	int c0 = INBUF(mp);
	if (ISSJISKANA(c0)) {
	    mp->cs = JISX0201KANA;
	    mp->icharset = SJIS;
	    mp->multiint[mp->intindex] = c0 & 0x7f;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->intindex += 1;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 2) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	if (ISSJISKANJI(c0, c1)) {
	    int ktype;

	    if (c0 < 0xf0) {
		/* JIS X 0213:2000 plane 1 or JIS X 0208:1997 */
		if (c0 <= 0x9f) c0 = (c0-0x81)*2 + 0x21;
		else            c0 = (c0-0xc1)*2 + 0x21;
		if (c1 <= 0x7e)      c1 -= 0x1f;
		else if (c1 <= 0x9e) c1 -= 0x20;
		else                 c1 -= 0x7e, c0 += 1;

		int ch = checkKANJI(mp->io.scs,
				    SCSJISX0208_1983 | SCSJISX0208_1990
				    | SCSJISX0213_2000 | SCSJISX0213_2004,
				    c0, c1);
		if (ch == 0) {
		    /* undefined.  less ignore them */
		    wrongchar(mp);
		    return 1;
		}
		mp->icharset = SJIS;
		mp->cs = (ch >> 16) & 0x7fff;
		mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
		mp->multics[mp->intindex] = mp->cs;
		mp->multiint[mp->intindex + 1] = ch & 0x7f;
		mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
		mp->intindex += 2;
		mp->startpos = mp->lastpos + 1;
		return 1;
	    } else {
		/* JIS X 0213:2000 plane 2 */
		if (c0 == 0xf0)
		    if (c1 <= 0x9e)  c0 = 0x21;
		    else             c0 = 0x27;
		else if (c0 == 0xf1) c0 = 0x23;
		else if (c0 == 0xf2)
		    if (c1 <= 0x9e)  c0 = 0x25;
		    else             c0 = 0x2b;
		else if (c0 == 0xf3) c0 = 0x2d;
		else if (c0 == 0xf4)
		    if (c1 <= 0x9e)  c0 = 0x2f;
		    else             c0 = 0x6d;
		else                 c0 = (c0 - 0xf5) * 2 + 0x6f;
		if (c1 <= 0x7e)      c1 -= 0x1f;
		else if (c1 <= 0x9e) c1 -= 0x20;
		else                 c1 -= 0x7e, c0 += 1;

		int ch = checkKANJI(mp->io.scs, SCSJISX0213_2ND, c0, c1);
		if (ch == 0) {
		    /* undefined.  less ignore them */
		    wrongchar(mp);
		    return 1;
		}
		mp->icharset = SJIS;
		mp->cs = (ch >> 16) & 0x7fff;
		mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
		mp->multics[mp->intindex] = mp->cs;
		mp->multiint[mp->intindex + 1] = ch & 0x7f;
		mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
		mp->intindex += 2;
		mp->startpos = mp->lastpos + 1;
		return 1;
	    }
	    /* data are recognized as kanji or wrong data, so return 1 */
	    return 1;
	}
    }
    /* return 0 because this data sequence is not matched to SJIS */
    return 0;
}

/*
 * Internalize input stream encoded by CP932 encoding scheme.
 *
 * Return 1 if input is recognized well.
 * Return 0 if input is rejected.
 */
static int internalize_cp932(mp)
MULBUF *mp;
{
    if (mp->lastpos - mp->startpos + 1 == 1) {
	int c0 = INBUF(mp);
	if (ISSJISKANA(c0)) {
	    mp->cs = JISX0201KANA;
	    mp->icharset = CP932;
	    mp->multiint[mp->intindex] = c0 & 0x7f;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->intindex += 1;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 2) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	if (ISSJISKANJI(c0, c1)) {
	    int ofs;

	    if (c0 <= 0x9f) c0 = (c0-0x81)*2 + 0x21;
	    else            c0 = (c0-0xc1)*2 + 0x21;
	    if (c1 <= 0x7e)      c1 -= 0x1f;
	    else if (c1 <= 0x9e) c1 -= 0x20;
	    else                 c1 -= 0x7e, c0 += 1;

	    ofs = (c0 - 0x21) * 94 + (c1 - 0x21);
	    if ((c0 < 0x30 && c0 != 0x2d
		 && ucode_kanji1[ofs] == ucode_cp932[ofs])
		|| (c0 >= 0x30 && c0 <= 0x74)) {
		int ch = checkKANJI(mp->io.scs,
				    SCSJISX0208_1983 | SCSJISX0208_1990,
				    c0, c1);
		if (ch == 0) {
		    /* undefined.  less ignore them */
		    wrongchar(mp);
		    return 1;
		}
		mp->icharset = CP932;
		mp->cs = (ch >> 16) & 0x7fff;
		mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
		mp->multics[mp->intindex] = mp->cs;
		mp->multiint[mp->intindex + 1] = ch & 0x7f;
		mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
		mp->intindex += 2;
		mp->startpos = mp->lastpos + 1;
		return 1;
	    } else {
		if (c0 > 0x7e) c0 -= 0x4f;
		int ch = checkKANJI(mp->io.scs, SCSCP932EX, c0, c1);
		if (ch == 0) {
		    /* undefined.  less ignore them */
		    wrongchar(mp);
		    return 1;
		}
		mp->icharset = CP932;
		mp->cs = (ch >> 16) & 0x7fff;
		mp->multiint[mp->intindex] = (ch >> 8) & 0x7f;
		mp->multics[mp->intindex] = mp->cs;
		mp->multiint[mp->intindex + 1] = ch & 0x7f;
		mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
		mp->intindex += 2;
		mp->startpos = mp->lastpos + 1;
		return 1;
	    }
	    /* data are recognized as kanji or wrong data, so return 1 */
	    return 1;
	}
    }
    /* return 0 because this data sequence is not matched to CP932 */
    return 0;
}

/*
 * Internalize UTF-8 character to traditional Codeset
 *
 * Return 1 if input has convetred well.
 * Return 0 if input has failed.
 */
static int ucs2codeset(mp, uc)
MULBUF *mp;
int uc;
{
    int plane = (uc & 0x7ffff0000) >> 16;
    int code = uc & 0xffff;
    int umap;
    int dummy;
    int cs;
    int cc;
    int intindex;

    /*
     *  lookup unicode table
     */
    if (plane == 0)
	umap = unicode0_map[code];
    else if (plane == 2)
	umap = unicode2_map[code];
    else
	return 0;
    if (umap == U_error)
	return 0;

    /*
     *  check codeset
     */
    cs = UMAP_CS(umap);

    /*
     * buffering
     */
    cc = UMAP_CHAR(umap);
    switch (CS2TYPE(cs))
    {
    case TYPE_94_CHARSET:
    case TYPE_96_CHARSET:
	mp->icharset = UTF8;
	mp->multiint[mp->intindex] = cc & 0x7f;
	mp->multics[mp->intindex] = cs;
	mp->intindex += 1;
	return 1;
    case TYPE_94N_CHARSET:
    case TYPE_96N_CHARSET:
	mp->icharset = UTF8;
	mp->multiint[mp->intindex] = (cc / 94) + 0x21;
	mp->multics[mp->intindex] = cs;
	mp->multiint[mp->intindex + 1] = (cc % 94) + 0x21;
	mp->multics[mp->intindex + 1] = REST_MASK | cs;
	mp->intindex += 2;
	return 1;
    }

    return 0;
}

static struct st_ucs_combining {
    int c1;
    int c2;
    int u1;
    int u2;
} jisx0213_comb[] = {
    { 4, 87, 0x304b, 0x309a, },
    { 4, 88, 0x304d, 0x309a, },
    { 4, 89, 0x304f, 0x309a, },
    { 4, 90, 0x3051, 0x309a, },
    { 4, 91, 0x3053, 0x309a, },
    { 5, 87, 0x30ab, 0x309a, },
    { 5, 88, 0x30ad, 0x309a, },
    { 5, 89, 0x30af, 0x309a, },
    { 5, 90, 0x30b1, 0x309a, },
    { 5, 91, 0x30b3, 0x309a, },
    { 5, 92, 0x30bb, 0x309a, },
    { 5, 93, 0x30c4, 0x309a, },
    { 5, 94, 0x30c8, 0x309a, },
    { 6, 88, 0x31f7, 0x309a, },
    { 11, 36, 0x00e6, 0x0300, },
    { 11, 40, 0x0254, 0x0300, },
    { 11, 41, 0x0254, 0x0301, },
    { 11, 42, 0x028c, 0x0300, },
    { 11, 43, 0x028c, 0x0301, },
    { 11, 44, 0x0259, 0x0300, },
    { 11, 45, 0x0259, 0x0301, },
    { 11, 46, 0x025a, 0x0300, },
    { 11, 47, 0x025a, 0x0301, },
    { 11, 69, 0x02e9, 0x02e5 },
    { 11, 70, 0x02e5, 0x02e9 },
    { 0, 0, 0, 0 },
};

static int pending_ucs = 0;
static int
flush_pending_ucs(mp)
MULBUF *mp;
{
    if (pending_ucs) {
	ucs2codeset(mp, pending_ucs);
	pending_ucs = 0;
	return 1;
    }
    return 0;
}

static int
ucs2codeset_combind(mp, uc)
MULBUF *mp;
int uc;
{
    struct st_ucs_combining *p;

    if (uc < 0) {
	pending_ucs = 0;
	return 1;
    }

    if (uc == 0) {
	if (pending_ucs)
	    ucs2codeset(mp, pending_ucs);
	pending_ucs = 0;
	return 1;
    }

    if (!(mp->io.scs & (SCSJISX0213_2000 | SCSJISX0213_2004)))
	return ucs2codeset(mp, uc);

    if (pending_ucs) {
	for (p = jisx0213_comb; p->c1; ++ p) {
	    if (p->u1 == pending_ucs && p->u2 == uc) {
		mp->icharset = UTF8;
		mp->cs = JISX0213KANJI1;
		mp->multiint[mp->intindex] = p->c1 + 0x20;
		mp->multics[mp->intindex] = JISX0213KANJI1;
		mp->multiint[mp->intindex + 1] = p->c2 + 0x20;
		mp->multics[mp->intindex + 1] = REST_MASK | JISX0213KANJI1;
		mp->intindex += 2;
		mp->startpos = mp->lastpos + 1;
		pending_ucs = 0;
		return 1;
	    }
	}
	ucs2codeset(mp, pending_ucs);
	pending_ucs = 0;
    }

    for (p = jisx0213_comb; p->c1; ++ p) {
	if (p->u1 == uc) {
	    mp->startpos = mp->lastpos + 1;
	    pending_ucs = uc;
	    return 1;
	}
    }

    mp->startpos = mp->lastpos + 1;
    return ucs2codeset(mp, uc);
}

/*
 * Internalize input stream encoded by UTF8 encoding scheme.
 *
 * Return 1 if input is recognized well.
 * Return 0 if input is rejected.
 */
static int internalize_utf8(mp)
MULBUF *mp;
{
    int uc;
    int cs;

    if (mp->lastpos - mp->startpos + 1 == 1) {
	/* do nothing.  return 1 to get next byte */
	return 1;
    } else if (mp->lastpos - mp->startpos + 1 == 2) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	if (ISUTF8_2(c0, c1)) {
	    uc = UTF8_2(c0, c1);
	    if (ucs2codeset_combind(mp, uc))
		return 1;
	    if (!(mp->io.scs & SCSUTF8)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    cs = get_utfwidth(uc);
	    if (CSISWRONG(cs)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    mp->cs = cs;
	    mp->icharset = UTF8;
	    mp->multiint[mp->intindex] = c0;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = c1;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->intindex += 2;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 3) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	int c2 = INBUF2(mp);
	if (ISUTF8_3(c0, c1, c2)) {
	    uc = UTF8_3(c0, c1, c2);
	    if (ucs2codeset_combind(mp, uc))
		return 1;
	    if (!(mp->io.scs & SCSUTF8)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    cs = get_utfwidth(uc);
	    if (CSISWRONG(cs)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    mp->cs = cs;
	    mp->icharset = UTF8;
	    mp->multiint[mp->intindex] = c0;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = c1;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 2] = c2;
	    mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
	    mp->intindex += 3;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2)) {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 4) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	int c2 = INBUF2(mp);
	int c3 = INBUF3(mp);
	if (ISUTF8_4(c0, c1, c2, c3)) {
	    uc = UTF8_4(c0, c1, c2, c3);
	    if (ucs2codeset_combind(mp, uc))
		return 1;
	    if (!(mp->io.scs & SCSUTF8)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    cs = get_utfwidth(uc);
	    if (CSISWRONG(cs)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    mp->cs = cs;
	    mp->icharset = UTF8;
	    mp->multiint[mp->intindex] = c0;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = c1;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 2] = c2;
	    mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 3] = c3;
	    mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
	    mp->intindex += 4;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) &&
		   ISUTF8_REST(c3)) {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 5) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	int c2 = INBUF2(mp);
	int c3 = INBUF3(mp);
	int c4 = INBUF4(mp);
	if (ISUTF8_5(c0, c1, c2, c3, c4)) {
	    uc = UTF8_5(c0, c1, c2, c3, c4);
	    if (ucs2codeset_combind(mp, uc))
		return 1;
	    if (!(mp->io.scs & SCSUTF8)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    cs = get_utfwidth(uc);
	    if (CSISWRONG(cs)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    mp->cs = cs;
	    mp->icharset = UTF8;
	    mp->multiint[mp->intindex] = c0;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = c1;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 2] = c2;
	    mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 3] = c3;
	    mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 4] = c4;
	    mp->multics[mp->intindex + 4] = REST_MASK | mp->cs;
	    mp->intindex += 5;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) &&
		   ISUTF8_REST(c3) && ISUTF8_REST(c4)) {
	    /* do nothing.  return 1 to get next byte */
	    return 1;
	}
    } else if (mp->lastpos - mp->startpos + 1 == 6) {
	int c0 = INBUF0(mp);
	int c1 = INBUF1(mp);
	int c2 = INBUF2(mp);
	int c3 = INBUF3(mp);
	int c4 = INBUF4(mp);
	int c5 = INBUF5(mp);
	if (ISUTF8_6(c0, c1, c2, c3, c4, c5)) {
	    uc = UTF8_6(c0, c1, c2, c3, c4, c5);
	    if (ucs2codeset_combind(mp, uc))
		return 1;
	    if (!(mp->io.scs & SCSUTF8)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    cs = get_utfwidth(uc);
	    if (CSISWRONG(cs)) {
		wrongucs(mp, uc);
		return 1;
	    }
	    mp->cs = cs;
	    mp->icharset = UTF8;
	    mp->multiint[mp->intindex] = c0;
	    mp->multics[mp->intindex] = mp->cs;
	    mp->multiint[mp->intindex + 1] = c1;
	    mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 2] = c2;
	    mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 3] = c3;
	    mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 4] = c4;
	    mp->multics[mp->intindex + 4] = REST_MASK | mp->cs;
	    mp->multiint[mp->intindex + 5] = c5;
	    mp->multics[mp->intindex + 5] = REST_MASK | mp->cs;
	    mp->intindex += 6;
	    mp->startpos = mp->lastpos + 1;
	    return 1;
	}
    }
    /* return 0 because this data sequence is not matched to UTF8 */
    return 0;
}

#endif

static void internalize(mp)
MULBUF *mp;
{
    int c = INBUF(mp);

    if (mp->lastpos - mp->startpos + 1 == 1) {
	if ((c <= 0x7f && mp->io.input == ESNOCONV) ||
	    (c >= 0x80 && mp->io.inputr == ESNOCONV)) {
#if JAPANESE
	    mp->sequence_counter = 0;
	    flush_pending_ucs(mp);
#endif
	    if (control_char(c)) {
		    wrongcs1(mp);
	    } else {
		    noconv1(mp);
	    }
	    return;
	} else if (c >= 0x80 && mp->io.inputr == ESNONE) {
#if JAPANESE
	    mp->sequence_counter = 0;
	    flush_pending_ucs(mp);
#endif
	    wrongcs1(mp);
	    return;
	}

	mp->cs = ASCII;
	if (c < 0x20) {
#if JAPANESE
	    mp->sequence_counter = 0;
	    flush_pending_ucs(mp);
#endif
	    wrongcs1(mp);
	    return;
	} else if (c <= 0x7f ||
		   ((mp->io.inputr & ESISO8)
		    && (0xa0 <= c && c <= 0xff)
		    && (mp->ms->sg != WRONGPLANE
			|| !CSISWRONG(mp->ms->gs[mp->ms->gr])))) {
#if JAPANESE
	    mp->sequence_counter = 0;
	    flush_pending_ucs(mp);
#endif
	    /*
	     * Decide current character set.
	     */
	    mp->cs = FINDCS(mp, c);

	    /*
	     * Check cs that fit for output code set.
	     */
	    /* JIS cannot output JISX0212, JISX0213_2, or ISO2022 */
	    if ((output == ESJIS83) &&
		mp->cs != ASCII &&
		mp->cs != JISX0201KANA &&
		mp->cs != JISX0201ROMAN &&
		mp->cs != JISX0208_78KANJI &&
		mp->cs != JISX0208KANJI &&
		mp->cs != JISX0208_90KANJI &&
		mp->cs != JISX0213KANJI1 &&
		mp->cs != JISX02132004KANJI1) {
		wrongcs1(mp);
		multi_reparse(mp);
		return;
	    }

	    /* UJIS cannot output regular ISO2022 except JIS */
	    if ((output == ESUJIS) &&
		mp->cs != ASCII &&
		mp->cs != JISX0201KANA &&
		mp->cs != JISX0201ROMAN &&
		mp->cs != JISX0208_78KANJI &&
		mp->cs != JISX0208KANJI &&
		mp->cs != JISX0208_90KANJI &&
		mp->cs != JISX0212KANJISUP &&
		mp->cs != JISX0213KANJI1 &&
		mp->cs != JISX0213KANJI2 &&
		mp->cs != JISX02132004KANJI1) {
		wrongcs1(mp);
		multi_reparse(mp);
		return;
	    }

	    /* SJIS cannot output JISX0212 or ISO2022 */
	    if ((output == ESSJIS) &&
		mp->cs != ASCII &&
		mp->cs != JISX0201KANA &&
		mp->cs != JISX0201ROMAN &&
		mp->cs != JISX0208_78KANJI &&
		mp->cs != JISX0208KANJI &&
		mp->cs != JISX0208_90KANJI &&
		mp->cs != JISX0213KANJI1 &&
		mp->cs != JISX0213KANJI2 &&
		mp->cs != JISX02132004KANJI1) {
		wrongcs1(mp);
		multi_reparse(mp);
		return;
	    }

	    /* CP932 cannot output regular ISO2022 except JIS */
	    if ((output == ESCP932) &&
		mp->cs != ASCII &&
		mp->cs != JISX0201KANA &&
		mp->cs != JISX0201ROMAN &&
		mp->cs != JISX0208_78KANJI &&
		mp->cs != JISX0208KANJI &&
		mp->cs != JISX0208_90KANJI &&
		mp->cs != JISX0212KANJISUP &&
		mp->cs != JISX0213KANJI1 &&
		mp->cs != JISX0213KANJI2 &&
		mp->cs != JISX02132004KANJI1 &&
		mp->cs != CP932) {
		wrongcs1(mp);
		multi_reparse(mp);
		return;
	    }

	    if (mp->cs != ASCII)
		mp->icharset = mp->cs;

	    internalize_iso(mp);
	    return;
	} else if (control_char(c)) {
#if JAPANESE
	    mp->sequence_counter = 0;
	    flush_pending_ucs(mp);
#endif
	    wrongcs1(mp);
	    return;
	}
#if JAPANESE
	if (mp->priority == PSJIS && ISSJISKANA(c)) {
	    if (mp->io.inputr & ESUJIS) {
		mp->sequence_counter++;
		if (mp->sequence_counter % 2 == 1 &&
		    INBUF0(mp) != 0xa4) /* ???? */
		{
		    mp->sequence_counter = 0;
		}
		if (mp->sequence_counter >= 6)
		    /*
		     * It looks like a sequence of UJIS
		     * hiragana.  Thus we give priority
		     * to not PSJIS.
		     */
		    mp->priority = PUJIS;
	    }
	    flush_pending_ucs(mp);
	    if (mp->io.inputr & ESCP932)
		internalize_cp932(mp);
	    else if (mp->io.inputr & ESSJIS)
		internalize_sjis(mp);
	    return;
	} else if (mp->io.inputr & (ESUJIS | ESSJIS | ESUTF8 | ESCP932)) {
	    mp->sequence_counter = 0;
	    return;
	}
	mp->sequence_counter = 0;
#endif
	wrongcs1(mp);
	return;
    }

#if JAPANESE
    assert(mp->sequence_counter == 0);
#endif
    if (c < 0x20) {
	flush_pending_ucs(mp);
	wrongcs1(mp);
	multi_reparse(mp);
	return;
    } else if (mp->cs != ASCII &&
	       (c <= 0x7f ||
		((mp->io.inputr & ESISO8)
		 && (0xa0 <= c && c <= 0xff)
		 && (mp->ms->sg != WRONGPLANE
		     || !CSISWRONG(mp->ms->gs[mp->ms->gr]))))) {
	flush_pending_ucs(mp);
	if (mp->cs != FINDCS(mp, c)) {
	    wrongcs1(mp);
	    multi_reparse(mp);
	} else {
	    internalize_iso(mp);
	}
	return;
    } else if (control_char(c)) {
	flush_pending_ucs(mp);
	wrongcs1(mp);
	multi_reparse(mp);
	return;
    }
#if JAPANESE
    if (mp->lastpos - mp->startpos + 1 == 2) {
	if (mp->priority == PSJIS) {
	    if (mp->io.inputr & ESCP932) {
		if (internalize_cp932(mp))
		    return;
	    } else {
		if (internalize_sjis(mp)) {
		    return;
		}
	    }
	} else if (mp->priority == PUJIS) {
	    if (internalize_ujis(mp)) {
		return;
	    }
	} else if (mp->priority == PUTF8) {
	    if (internalize_utf8(mp)) {
		return;
	    }
	    flush_pending_ucs(mp);
	}

	if (mp->io.inputr & ESUTF8) {
	    if (internalize_utf8(mp)) {
		mp->priority = PUTF8;
		return;
	    }
	    flush_pending_ucs(mp);
	}
	if (mp->io.inputr & ESUJIS) {
	    if (internalize_ujis(mp)) {
		mp->priority = PUJIS;
		return;
	    }
	}
	if (mp->io.inputr & ESSJIS) {
	    flush_pending_ucs(mp);
	    if (internalize_sjis(mp)) {
		mp->priority = PSJIS;
		return;
	    }
	}
	if (mp->io.inputr & ESCP932) {
	    flush_pending_ucs(mp);
	    if (internalize_cp932(mp)) {
		mp->priority = PSJIS;
		return;
	    }
	}
    } else if (mp->lastpos - mp->startpos + 1 == 3) {
	if (mp->priority == PUJIS) {
	    if (internalize_ujis(mp)) {
		return;
	    }
	} else if (mp->priority == PUTF8) {
	    if (internalize_utf8(mp)) {
		return;
	    }
	    flush_pending_ucs(mp);
	}

	if (mp->io.inputr & ESUTF8) {
	    if (internalize_utf8(mp)) {
		mp->priority = PUTF8;
		return;
	    }
	    flush_pending_ucs(mp);
	}
	if (mp->io.inputr & ESUJIS) {
	    if (internalize_ujis(mp)) {
		mp->priority = PUJIS;
		return;
	    }
	}
    } else if (mp->lastpos - mp->startpos + 1 <= 6) {
	if (mp->io.inputr & ESUTF8) {
	    if (internalize_utf8(mp)) {
		mp->priority = PUTF8;
		return;
	    }
	    flush_pending_ucs(mp);
	}
    }
#endif
    wrongcs1(mp);
    multi_reparse(mp);
}

/*
 * Check routines
 */
static int check_ft(mp, c, type, plane)
MULBUF *mp;
register int c;
int type;
int *plane;
{
    if (type == TYPE_94_CHARSET) {
	switch (c) {
	case 'B': /* ASCII */
	    goto ok;
	case 'I': /* JIS X 0201 right half (Katakana) */
	case 'J': /* JIS X 0201 left half (Roman) */
	    if (mp->io.scs & SCSJISX0201_1976) goto ok;
	}
    } else if (type == TYPE_94N_CHARSET) {
	switch (c) {
	case '@': /* JIS C 6226-1978 */
	    if (mp->io.scs & SCSJISC6226_1978) goto ok;
	    break;
	case 'B': /* JIS X 0208-1983, JIS X 0208:1990, or JIS X 0208:1997 */
	    if (mp->io.scs & (SCSJISX0208_1983 | SCSJISX0208_1990)) goto ok;
	    break;
	case 'D': /* JIS X 0212:1990 */
	    if (mp->io.scs & SCSJISX0212_1990) goto ok;
	    break;
	case 'O': /* JIS X 0213:2000 plane 1 */
	    if (mp->io.scs & SCSJISX0213_2000) goto ok;
	    break;
	case 'P': /* JIS X 0213:2000 plane 2 or JIS X 0213:2004 plane 2 */
	    if (mp->io.scs & (SCSJISX0213_2000 | SCSJISX0213_2004)) goto ok;
	    break;
	case 'Q': /* JIS X 0213:2004 plane 1 */
	    if (mp->io.scs & SCSJISX0213_2004) goto ok;
	    break;
	}
    }
    if ((mp->io.scs & SCSOTHERISO) && 0x30 <= c && c <= 0x7e) {
	/* accepting all other ISO, so OK */
	goto ok;
    }
    return (-1);
ok:
    *plane = (mp->ms->irr ? IRR2CS(mp->ms->irr) : 0) | TYPE2CS(type) | FT2CS(c);
    mp->ms->irr = 0;
    mp->eseq = NOESC;
    return (0);
}

static int check_irr(mp, c)
MULBUF *mp;
register int c;
{
    if (0x40 <= c && c <= 0x7e) {
	mp->ms->irr = CODE2IRR(c);
	mp->eseq = NOESC;
	return (0);
    }
    return (-1);
}

static void fix_status_for_escape_sequence(mp)
MULBUF *mp;
{
    if (mp->eseq == NOESC) {
	switch (CS2TYPE(ISVALIDPLANE(mp, sg) ? PLANE2CS(mp, sg) :
					       PLANE2CS(mp, gl))) {
	case TYPE_96_CHARSET:
	case TYPE_96N_CHARSET:
	    change_control_char(0177, 0);
	    break;
	case TYPE_94_CHARSET:
	case TYPE_94N_CHARSET:
	    change_control_char(0177, 1);
	    break;
	}
	switch (CS2TYPE(ISVALIDPLANE(mp, sg) ? PLANE2CS(mp, sg) :
					       PLANE2CS(mp, gr))) {
	case TYPE_96_CHARSET:
	case TYPE_96N_CHARSET:
	    change_control_char(0377, 0);
	    break;
	case TYPE_94_CHARSET:
	case TYPE_94N_CHARSET:
	    change_control_char(0377, 1);
	    break;
	}
    }
}

static int check_escape_sequence(mp)
MULBUF *mp;
{
    int c = INBUF(mp);

    switch (mp->eseq) {
    case ESC_:
	switch (c) {
	case '$': mp->eseq = ESC_2_4; break;
	case '&': mp->eseq = ESC_2_6; break;
	case '(': mp->eseq = ESC_2_8; break;
	case ')': mp->eseq = ESC_2_9; break;
	case '*': mp->eseq = ESC_2_10; break;
	case '+': mp->eseq = ESC_2_11; break;
	case ',': mp->eseq = ESC_2_12; break;
	case '-': mp->eseq = ESC_2_13; break;
	case '.': mp->eseq = ESC_2_14; break;
	case '/': mp->eseq = ESC_2_15; break;
	case 'N': mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/break;
	case 'O': mp->ms->sg = 3; mp->eseq = NOESC; /*SS3*/break;
	case 'n': mp->ms->gl = 2; mp->eseq = NOESC; break;
	case 'o': mp->ms->gl = 3; mp->eseq = NOESC; break;
	case '|': if (!(mp->io.inputr & ESISO8)) goto wrong;
		  mp->ms->gr = 3; mp->eseq = NOESC; break;
	case '}': if (!(mp->io.inputr & ESISO8)) goto wrong;
		  mp->ms->gr = 2; mp->eseq = NOESC; break;
	case '~': if (!(mp->io.inputr & ESISO8)) goto wrong;
		  mp->ms->gr = 1; mp->eseq = NOESC; break;
	case '[': mp->eseq = ESC_5_11; break;
	default:  goto wrong;
	}
	break;
    case ESC_2_4:
	switch (c) {
	case '(': mp->eseq = ESC_2_4_8; break;
	case ')': mp->eseq = ESC_2_4_9; break;
	case '*': mp->eseq = ESC_2_4_10; break;
	case '+': mp->eseq = ESC_2_4_11; break;
	case '-': mp->eseq = ESC_2_4_13; break;
	case '.': mp->eseq = ESC_2_4_14; break;
	case '/': mp->eseq = ESC_2_4_15; break;
	case '@':
	case 'A':
	case 'B': if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[0])) == 0)
			break;
	default:  goto wrong;
	}
	break;
    case ESC_2_6:
	if (check_irr(mp, c) == 0) break;
	goto wrong;
    case ESC_2_8:
	if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[0])) == 0) break;
	goto wrong;
    case ESC_2_9:
	if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[1])) == 0) break;
	goto wrong;
    case ESC_2_10:
	if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[2])) == 0) break;
	goto wrong;
    case ESC_2_11:
	if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[3])) == 0) break;
	goto wrong;
    case ESC_2_12:
	if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[0])) == 0) break;
	goto wrong;
    case ESC_2_13:
	if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[1])) == 0) break;
	goto wrong;
    case ESC_2_14:
	if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[2])) == 0) break;
	goto wrong;
    case ESC_2_15:
	if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[3])) == 0) break;
	goto wrong;
    case ESC_2_4_8:
	if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[0])) == 0) break;
	goto wrong;
    case ESC_2_4_9:
	if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[1])) == 0) break;
	goto wrong;
    case ESC_2_4_10:
	if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[2])) == 0) break;
	goto wrong;
    case ESC_2_4_11:
	if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[3])) == 0) break;
	goto wrong;
    case ESC_2_4_13:
	if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[1])) == 0) break;
	goto wrong;
    case ESC_2_4_14:
	if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[2])) == 0) break;
	goto wrong;
    case ESC_2_4_15:
	if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[3])) == 0) break;
	goto wrong;
    case ESC_5_11:
	if (mp->lastpos - 20 > mp->startpos) /* ESC sequence to long */
	    goto wrong;
	if (is_ansi_end(c))
	    goto disp_esc;
	if (!is_ansi_middle(c))
	    goto wrong;
	break;
    case NOESC:
	/*
	 * This sequence is wrong if we buffered some data.
	 */
	if (mp->lastpos > mp->startpos) {
	    switch (c) {
	    case 0033:
	    case 0016:
	    case 0017:
	    case 0031: goto wrong;
	    default:   goto wrongone;
	    }
	}
	/*
	 * Nothing is buffered.  So, check this sequence.
	 */
	switch (c) {
	case 0033: mp->eseq = ESC_; break;
	case 0016: mp->ms->gl = 1; mp->eseq = NOESC; break;
	case 0017: mp->ms->gl = 0; mp->eseq = NOESC; break;
	case 0031: mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/ break;
	case 0216:
	    if (!(mp->io.inputr & ESISO8) || CSISWRONG(mp->ms->gs[2]))
		goto wrongone;
	    mp->ms->sg = 2;
	    mp->eseq = NOESC; /*SS2*/
	    break;
	case 0217:
	    if (!(mp->io.inputr & ESISO8) || CSISWRONG(mp->ms->gs[3]))
		goto wrongone;
	    mp->ms->sg = 3;
	    mp->eseq = NOESC; /*SS3*/
	    break;
	default:   goto wrongone;
	}
	break;
    default:
	assert(0);
    }
    if (mp->eseq == NOESC) {
	fix_status_for_escape_sequence(mp);
	mp->startpos = mp->lastpos + 1;
	return (0);
    }
    return (0);
disp_esc:
    if (mp->eseq != NOESC) {
	mp->eseq = NOESC;
	fix_status_for_escape_sequence(mp);
    }
    wrongcs1(mp);
    multi_reparse(mp);
    return (0);
wrong:
    if (mp->eseq != NOESC) {
	mp->eseq = NOESC;
	fix_status_for_escape_sequence(mp);
    }
    mp->multiint[mp->intindex] = INBUF0(mp);
    mp->multics[mp->intindex] = WRONG_ESC;
    mp->intindex++;
    mp->startpos++;
    multi_reparse(mp);
    return (0);
wrongone:
    assert(mp->eseq == NOESC);
    return (-1);
}

struct planeset {
    char *name;
    char *planeset;
} planesets[] = {
    { "ascii",		""	},
    { "ctext",		"\\e-A"	},
    { "latin1",		"\\e-A"	},
    { "iso8859-1",	"\\e-A"	},
    { "latin2",		"\\e-B"	},
    { "iso8859-2",	"\\e-B"	},
    { "latin3",		"\\e-C"	},
    { "iso8859-3",	"\\e-C"	},
    { "latin4",		"\\e-D"	},
    { "iso8859-4",	"\\e-D"	},
    { "cyrillic",	"\\e-L"	},
    { "iso8859-5",	"\\e-L"	},
    { "arabic",		"\\e-G"	},
    { "iso8859-6",	"\\e-G"	},
    { "greek",		"\\e-F"	},
    { "iso8859-7",	"\\e-F"	},
    { "hebrew",		"\\e-H"	},
    { "iso8859-8",	"\\e-H"	},
    { "latin5",		"\\e-M"	},
    { "iso8859-9",	"\\e-M"	},
    { "latin6",		"\\e-V"	},
    { "iso8859-10",	"\\e-V"	},
    { "thai",		"\\e-T"	},
    { "iso8859-11",	"\\e-T"	},
    { "latin7",		"\\e-Y"	},
    { "iso8859-13",	"\\e-Y"	},
    { "latin8",		"\\e-_"	},
    { "iso8859-14",	"\\e-_"	},
    { "latin9",		"\\e-b"	},
    { "iso8859-15",	"\\e-b"	},
    { "latin10",	"\\e-f"	},
    { "iso8859-16",	"\\e-f"	},
    { "jisx0201",	"\\e(J\\e)I" },
    { "japanese",	"\\e$)B\\e*I\\e$+D" },
    { "ujis",		"\\e$)B\\e*I\\e$+D" },
    { "euc",		"\\e$)B\\e*I\\e$+D" },
    { "euc-jisx0213",	"\\e$)O\\e*I\\e$+P" },
    { NULL,		"" }
};

int set_planeset(name)
register char *name;
{
    register struct planeset *p;
    MULBUF *mp;
    int ret;
    int i;

    if (name == NULL) {
	return -1;
    }
    for (p = planesets; p->name != NULL; p++) {
	if (strcasecmp(name, p->name) == 0) {
	    name = p->planeset;
	    break;
	}
    }
    mp = new_multibuf();
    init_priority(mp);
    while (*name) {
	if (*name == '\\' &&
	    (*(name + 1) == 'e' || *(name + 1) == 'E')) {
	    ++mp->lastpos;
	    INBUF(mp) = '\033';
	    ret = check_escape_sequence(mp);
	    name += 2;
	} else {
	    ++mp->lastpos;
	    INBUF(mp) = *name++;
	    ret = check_escape_sequence(mp);
	}
	if (ret < 0 || mp->intindex > 0) {
	    free(mp);
	    return -1;
	}
    }
    def_gs[0] = mp->ms->gs[0];
    def_gs[1] = mp->ms->gs[1];
    def_gs[2] = mp->ms->gs[2];
    def_gs[3] = mp->ms->gs[3];
    free(mp);
    return 0;
}

void init_def_scs_es(scs, input, inputr, out)
SETCHARSET scs;
ENCSET input;
ENCSET inputr;
ENCSET out;
{
    def_scs = scs;
    def_input = input;
    def_inputr = inputr;
    output = out;

    if (inputr & ESUTF8)
	    make_unicode_map(scs, output & ESUTF8);
}

void init_def_priority(pri)
J_PRIORITY pri;
{
#if JAPANESE
    assert(pri == PUJIS || pri == PSJIS || pri == PUTF8);
    def_priority = pri;
#endif
}

void init_priority(mp)
MULBUF *mp;
{
#if JAPANESE
    if ((mp->io.inputr & (ESSJIS | ESCP932)) && (mp->io.inputr & ESUJIS))
	mp->priority = def_priority;
    else if (mp->io.inputr & ESUTF8)
	mp->priority = PUTF8;
    else if (mp->io.inputr & ESUJIS)
	mp->priority = PUJIS;
    else if (mp->io.inputr & (ESSJIS | ESCP932))
	mp->priority = PSJIS;
    else
	mp->priority = PNONE;
    mp->sequence_counter = 0;
#endif
}

J_PRIORITY get_priority(mp)
MULBUF *mp;
{
#if JAPANESE
    return (mp->priority);
#else
    return (PNONE);
#endif
}

void set_priority(mp, pri)
MULBUF *mp;
J_PRIORITY pri;
{
#if JAPANESE
    assert(pri == PSJIS || pri == PUJIS || pri == PUTF8 || pri == PNONE);
    mp->priority = pri;
#endif
}

void set_utfwidth(u)
UWidth u;
{
    assert(u >= UWIDTH_NONE && u <= UWIDTH_ALL);
    utfwidth = u;
}

MULBUF *new_multibuf()
{
    MULBUF *mp = (MULBUF*) ecalloc(1, sizeof(MULBUF));
    mp->io.scs = def_scs;
    mp->io.input = def_input;
    mp->io.inputr = def_inputr;
    mp->orig_io_right = def_inputr;
    mp->rotation_io_right = 0;
    mp->eseq = NOESC;
    mp->ms = (struct m_status*) ecalloc(1, sizeof(struct m_status));
    init_multibuf(mp);
    return (mp);
}

void clear_multibuf(mp)
MULBUF *mp;
{
    mp->lastpos = M_NULL_POS;
    mp->startpos = 0;
    mp->laststartpos = 0;
    mp->lastsg = WRONGPLANE;
    mp->intindex = 0;
}

static void init_ms(ms)
struct m_status *ms;
{
    ms->gs[0] = def_gs[0];
    ms->gs[1] = def_gs[1];
    ms->gs[2] = def_gs[2];
    ms->gs[3] = def_gs[3];
    ms->gl = 0;
    ms->gr = 1;
    ms->sg = WRONGPLANE;
    ms->irr = 0;
}

void init_multibuf(mp)
MULBUF *mp;
{
    mp->cs = ASCII;
    init_ms(mp->ms);
    if (mp->eseq != NOESC) {
	mp->eseq = NOESC;
    }
    fix_status_for_escape_sequence(mp);
#if JAPANESE
    mp->sequence_counter = 0;
#endif
    mp->icharset = ASCII;
    clear_multibuf(mp);
}

/*
 * Buffering characters untile get a guarantee that it is right sequence.
 */
static void check_new_buffered_byte(mp)
MULBUF *mp;
{
    m_position last_startpos = mp->startpos;

    if (mp->io.input & (ESJIS83 | ESISO7 | ESISO8)) {
	if (check_escape_sequence(mp) == 0) {
	    return;		/* going process well */
	}
    }

    /* it is not a escape sequence, try to use it as character */
    internalize(mp);

    /*
     * If a character was detected in internalize(),
     * clean sg since single shift affect only one character.
     */
    if (last_startpos != mp->startpos) {
	mp->lastsg = mp->ms->sg;
	if (mp->ms->sg != WRONGPLANE) {
	    mp->ms->sg = WRONGPLANE;
	    fix_status_for_escape_sequence(mp);
	}
    }
}

/*
 * Re-parse all buffered data.
 *
 * This routine is called when we find a problem in buffered data.
 * We firstly take out the first byte of buffered data before we call
 * this function.  This routine parse all rest of buffered data again.
 */
static void multi_reparse(mp)
MULBUF *mp;
{
    m_position to;

    /*
     * We found something wrong and going to move first byte.
     * So, we clear single-shifted character set because it will
     * shift only this one byte being makred wrong.
     */
    if (mp->ms->sg != WRONGPLANE) {
	mp->ms->sg = WRONGPLANE;
	fix_status_for_escape_sequence(mp);
    }

#if JAPANESE
    /*
     * Quick japanese code hack.
     * Check whether character is SJIS KANA or not.
     * If it is SJIS KANA, it means our prediction was failed.
     * Now going to fall back to SJIS KANA mode.
     */
    if ((mp->priority == PSJIS || (mp->io.inputr & (ESSJIS | ESCP932))) &&
	CSISWRONG(mp->multics[mp->intindex - 1]) &&
	ISSJISKANA(mp->multiint[mp->intindex - 1])) {
	mp->cs = JISX0201KANA;
	mp->priority = PSJIS;
	mp->icharset = SJIS;
	mp->multiint[mp->intindex - 1] &= 0x7f;
	mp->multics[mp->intindex - 1] = mp->cs;
    }
#endif

    /*
     * Retry to parse rest of buffered data.
     */
    to = mp->lastpos;
    for (mp->lastpos = mp->startpos; mp->lastpos <= to; mp->lastpos++) {
	check_new_buffered_byte(mp);
    }
    mp->lastpos = to;
}

#if LESS
void multi_find_cs(mp, pos)
MULBUF* mp;
m_position pos;
{
    int c;
    m_position lpos = pos;

    if (ch_seek(pos) == 0) {
	/*
	 * Back up to the beginning of the line.
	 */
	while ((c = ch_back_get()) != '\n' && c != EOI) ;
	if (c == '\n') {
	    (void)ch_forw_get();
	}

	lpos = ch_tell();

	if (lpos != pos) {
	    while (lpos < pos) {
		c = ch_forw_get();
		assert(c != EOI && c != '\n');
		multi_parse(mp, c, NULL_POSITION, NULL, NULL);
		lpos++;
	    }
	    ch_seek(pos);
	}
    }
}
#endif

#define DEBUG 0
#if DEBUG
int debug = 1;
#endif

/*
 * Manage m_status data structure to maintain ISO-2022 status of input stream.
 */
void multi_start_buffering(mp, pos)
MULBUF *mp;
m_position pos;
{
    /* buffer must be empty */
    assert(mp->lastpos < mp->startpos);

    /* initialize m_status if it is necessary */
    if (pos == mp->lastpos + 2 || pos == mp->laststartpos) {
	/*
	 * pos == mp->lastpos+2 if this line is started after \n.
	 * pos == mp->laststartpos if this line is started by a non-fit
	 * character.
	 */
	/* restore backed up sg */
	if (mp->ms->sg != mp->lastsg) {
	    mp->ms->sg = mp->lastsg;
	    fix_status_for_escape_sequence(mp);
	}
	/* adjust pointers */
	mp->startpos = pos;
	mp->lastpos = pos - 1;
    } else {
	/*
	 * pos == somewhere else if this function is called after jump_loc().
	 */
#if DEBUG
	if (debug) {
	    fprintf(stderr, "%qd, %qd, %qd, %qd\n", pos, mp->lastpos,
		mp->startpos, mp->laststartpos);
	    fprintf(stderr, "oct %qo, %qo, %qo, %qo\n", pos, mp->lastpos,
		mp->startpos, mp->laststartpos);
	}
#endif
	init_multibuf(mp);
#if LESS
	multi_find_cs(mp, pos);
	clear_multibuf(mp);
#endif

	/* adjust pointers */
	mp->startpos = pos;
	mp->lastpos = pos - 1;
	mp->laststartpos = pos;
    }
}

/*
 * Buffering characters untile get a guarantee that it is right sequence.
 */
void multi_parse(mp, c, pos, mbd, mpos)
MULBUF* mp;
int c;
m_position pos;
M_BUFDATA* mbd;
POSITION* mpos;
{
    if (c < 0) {
	if (mpos != NULL) {
	    *mpos = mp->startpos;
	}

	/*
	 * output pending unicode character
	 */
	flush_pending_ucs(mp);

	/*
	 * Force to flush all buffering characters.
	 */
	if (mp->eseq != NOESC) {
	    mp->eseq = NOESC;
	    fix_status_for_escape_sequence(mp);
	}
	while (mp->startpos <= mp->lastpos) {
	    wrongcs1(mp);
	    multi_reparse(mp);
	}

	if (mbd != NULL) {
	    mbd->cbuf = mp->multiint;
	    mbd->csbuf = mp->multics;
	    mbd->byte = mp->intindex;
	}
	mp->intindex = 0;
    } else {
	if (pos != NULL_POSITION) {
	    assert(pos == mp->lastpos + 1);
	    mp->lastpos = pos;
	} else {
	    mp->lastpos++;
	}
	INBUF(mp) = c;

	mp->laststartpos = mp->startpos;
	if (mpos != NULL) {
	    *mpos = mp->startpos;
	}

	/*
	 * Put it into buffer and parse it.
	 */
	check_new_buffered_byte(mp);

	if (mbd != NULL) {
	    mbd->cbuf = mp->multiint;
	    mbd->csbuf = mp->multics;
	    mbd->byte = mp->intindex;
	}
	mp->intindex = 0;
    }
}

/*
 * Flush buffered data.
 */
void multi_flush(mp, mbd, mpos)
MULBUF* mp;
M_BUFDATA* mbd;
POSITION* mpos;
{
    multi_parse(mp, -1, NULL_POSITION, mbd, mpos);
}

/*
 * Discard buffered data.
 */
void multi_discard(mp)
MULBUF* mp;
{
    multi_parse(mp, -1, NULL_POSITION, NULL, NULL);
}

void set_codesets(mp, input, inputr)
MULBUF *mp;
ENCSET input;
ENCSET inputr;
{
    mp->io.input = input;
    mp->io.inputr = inputr;
    if (inputr & ESUTF8)
	    make_unicode_map(mp->io.scs, output & ESUTF8);
}

/*
 * Return string representation about multi bytes character
 * which was buffered.
 */
char *get_icharset_string(mp)
MULBUF *mp;
{
	static char buf[10];

	switch (mp->icharset)
	{
#if JAPANESE
	/*
	 * Code set
	 */
	case SJIS:		return ("SJIS");
	case SJIS2000:		return ("SJIS-2000");
	case SJIS2004:		return ("SJIS-2004");
	case CP932:             return ("CP932");
	case UJIS:		return ("UJIS");
	case UJIS2000:		return ("UJIS-2000");
	case UJIS2004:		return ("UJIS-2004");
	case UTF8Z:             return ("UTF-8");
	case UTF8:              return ("UTF-8");
	case UTF8W:             return ("UTF-8");
#endif
	/*
	 * Character set
	 */
	case ASCII:		return ("ASCII");
	case JISX0201KANA:	return ("JIS-KANA");
	case JISX0201ROMAN:	return ("JIS-ROMAN");
	case LATIN1:		return ("LATIN1");
	case LATIN2:		return ("LATIN2");
	case LATIN3:		return ("LATIN3");
	case LATIN4:		return ("LATIN4");
	case GREEK:		return ("GREEK");
	case ARABIC:		return ("ARABIC");
	case HEBREW:		return ("HEBREW");
	case CYRILLIC:		return ("CYRILLIC");
	case LATIN5:		return ("LATIN5");
	case THAI:		return ("THAI");
	case LATIN6:		return ("LATIN6");
	case LATIN7:		return ("LATIN7");
	case LATIN8:		return ("LATIN8");
	case LATIN9:		return ("LATIN9");
	case LATIN10:		return ("LATIN10");
	case JISX0208_78KANJI:	return ("KANJI:1978");
	case GB2312:		return ("GB2312");
	case JISX0208KANJI:	return ("KANJI:1983");
	case JISX0208_90KANJI:	return ("KANJI:1990");
	case KSC5601:		return ("KSC5601");
	case JISX0212KANJISUP:	return ("JIS-KANJISP");
	case JISX0213KANJI1:	return ("X0213:2000-1");
	case JISX0213KANJI2:	return ("X0213:2000-2");
	case JISX02132004KANJI1:return ("X0213:2004-1");
	}
	switch (CS2TYPE(mp->icharset))
	{
	case TYPE_94_CHARSET:
		strcpy(buf, "94( )");
		buf[3] = CS2FT(mp->icharset);
		break;
	case TYPE_96_CHARSET:
		strcpy(buf, "96( )");
		buf[3] = CS2FT(mp->icharset);
		break;
	case TYPE_94N_CHARSET:
		strcpy(buf, "94N( )");
		buf[4] = CS2FT(mp->icharset);
		break;
	case TYPE_96N_CHARSET:
		strcpy(buf, "96N( )");
		buf[4] = CS2FT(mp->icharset);
		break;
	default:
		assert(0);
	}
	if (CS2IRR(mp->icharset) > 0)
	{
		char num[3];
		sprintf(num, "%d", CS2IRR(mp->icharset));
		strcat(buf, num);
	}
	return (buf);
}

static int old_gl_output_charset = ASCII; /* Last displayed character set */
static int old_gr_output_charset = WRONGCS;
static int old_shift = 0;

static unsigned char *make_escape_sequence(charset)
int charset;
{
	static unsigned char p[9];
	int len;

	if (CSISWRONG(charset))
	{
		charset = ASCII;
	}

	if (old_gl_output_charset != charset
	    || old_gr_output_charset != charset) {
		p[0] = '\033';
		len = 1;
		if ((output & (ESISO7 | ESISO8)) && CS2IRR(charset) > 0)
		{
			p[len] = '&';
			p[len + 1] = IRR2CODE(CS2IRR(charset));
			p[len + 2] = '\033';
			len += 3;
		}
		/*
		 * Call 94 or 94N character set to G0/GL plane.
		 * Call 96 or 96N character set to G1/GR plane.
		 */
		switch (CS2TYPE(charset))
		{
		case TYPE_94_CHARSET:
			p[len] = '(';
			p[len + 1] = CS2FT(charset);
			len += 2;
			old_gl_output_charset = charset;
			break;
		case TYPE_94N_CHARSET:
			switch (CS2FT(charset))
			{
			case '@':
			case 'A':
			case 'B':
				p[len] = '$';
				p[len + 1] = CS2FT(charset);
				len += 2;
				break;
			default:
				p[len] = '$';
				p[len + 1] = '(';
				p[len + 2] = CS2FT(charset);
				len += 3;
				break;
			}
			old_gl_output_charset = charset;
			break;
		case TYPE_96_CHARSET:
			p[len] = '-';
			p[len + 1] = CS2FT(charset);
			len += 2;
			old_gr_output_charset = charset;
			break;
		case TYPE_96N_CHARSET:
			p[len] = '$';
			p[len + 1] = '-';
			p[len + 2] = CS2FT(charset);
			len += 3;
			old_gr_output_charset = charset;
			break;
		}
	}
	/*
	 * If output is not ESISO8, use SO and SI to call G1 to GL.
	 * Otherwise, we use GR directly, so no need to call G1
	 * since G1 is called GR already.
	 */
	if (!(output & ESISO8))
	{
		switch (CS2TYPE(charset))
		{
		case TYPE_94_CHARSET:
		case TYPE_94N_CHARSET:
			if (old_shift) {
				p[len] = '\017';
				len++;
				old_shift = 0;
			}
			break;
		case TYPE_96_CHARSET:
		case TYPE_96N_CHARSET:
			if (!old_shift) {
				p[len] = '\016';
				len++;
				old_shift = 1;
			}
			break;
		}
	}
	p[len] = '\0';
	return (p);
}

static char cvbuffer[32];
static int cvindex = 0;
static char *nullcvbuffer = "";

static char *convert_to_iso(c, cs)
int c;
int cs;
{
	register unsigned char *p;
	static char buffer2[2];

	if ((output & ESISO8) && c != 0 &&
	    (CS2TYPE(cs) == TYPE_96_CHARSET ||
	     CS2TYPE(cs) == TYPE_96N_CHARSET))
		c |= 0x80;

	buffer2[0] = c;
	buffer2[1] = '\0';

	cs = CS2CHARSET(cs);
	if (cs == CP932)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	}
	if (CSISREST(cs))
	{
		return (buffer2);
	}
	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	if (c & 0x80) {
	    if (cs == old_gr_output_charset) {
		return (buffer2);
	    }
	} else {
	    if (cs == old_gl_output_charset && old_shift == 0) {
		return (buffer2);
	    } else if (cs == old_gr_output_charset && old_shift == 1) {
		return (buffer2);
	    }
	}

	p = make_escape_sequence(cs);
	strcpy(cvbuffer, p);
	strcat(cvbuffer, buffer2);
	return (cvbuffer);
}

static char *convert_to_jis(c, cs)
int c;
int cs;
{
	register unsigned char *p;
	static char buffer2[3];

	if (c == 0)
	{
		cvindex = 0;
		return (nullcvbuffer);
	}

	buffer2[cvindex++] = c;
	buffer2[cvindex] = '\0';

	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	cs = CS2CHARSET(cs);

	if (cs == ASCII || cs == JISX0201ROMAN)
	{
		assert(cvindex == 1);
		cvindex = 0;
	} else if (cs == JISX0201KANA)
	{
		assert(cvindex == 1);
		cvindex = 0;
	} else if (cs == JISX0208_78KANJI)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		jis78to90(buffer2);
		cs = JISX0208_90KANJI;
		cvindex = 0;
	} else if (cs == JISX0208KANJI || cs == JISX0208_90KANJI)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		cvindex = 0;
	} else if (cs == JISX0213KANJI1)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		cvindex = 0;
		cs = JISX0208KANJI;
	} else if (cs == JISX02132004KANJI1)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		cvindex = 0;
		cs = JISX0208KANJI;
	} else if (cs == CP932)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	} else
	{
		assert(0);
		cvindex = 0;
	}

	if (cs == old_gl_output_charset)
	{
		return (buffer2);
	}
	else
	{
		p = make_escape_sequence(cs);
		strcpy(cvbuffer, p);
		strcat(cvbuffer, buffer2);
		return (cvbuffer);
	}
}

#if JAPANESE
static char *convert_to_ujis(c, cs)
int c;
int cs;
{
	if (c == 0)
	{
		cvindex = 0;
		return (nullcvbuffer);
	}

	cvbuffer[cvindex++] = c;
	cvbuffer[cvindex] = '\0';

	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	cs = CS2CHARSET(cs);
	if (cs == ASCII || cs == JISX0201ROMAN)
	{
		assert(cvindex == 1);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0201KANA)
	{
		assert(cvindex == 1);
		cvbuffer[2] = '\0';
		cvbuffer[1] = cvbuffer[0] | 0x80;
		cvbuffer[0] = 0x8e;
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
		   cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
		   cs == JISX02132004KANJI1)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		if (cs == JISX0208_78KANJI)
			jis78to90(cvbuffer);
		cvbuffer[0] |= 0x80;
		cvbuffer[1] |= 0x80;
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0212KANJISUP || cs == JISX0213KANJI2)
	{
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		cvbuffer[2] = cvbuffer[1] | 0x80;
		cvbuffer[1] = cvbuffer[0] | 0x80;
		cvbuffer[0] = 0x8f;
		cvbuffer[3] = '\0';
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == CP932)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	}
	assert(0);
	cvindex = 0;
	return (cvbuffer);
}

static char *convert_to_sjis(c, cs)
int c;
int cs;
{
	if (c == 0)
	{
		cvindex = 0;
		return (nullcvbuffer);
	}

	cvbuffer[cvindex++] = c;
	cvbuffer[cvindex] = '\0';

	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	cs = CS2CHARSET(cs);

	if (cs == ASCII || cs == JISX0201ROMAN)
	{
		assert(cvindex == 1);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0201KANA)
	{
		assert(cvindex == 1);
		cvbuffer[0] |= 0x80;
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
		   cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
		   cs == JISX02132004KANJI1)
	{
		register int c1, c2, c3;
		static unsigned char table_sjis[] = {
			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			   0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
			0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
			0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
			0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
			0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
			0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
		};

		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		if (cs == JISX0208_78KANJI)
			jis78to90(cvbuffer);
		c3 = cvbuffer[0] & 0x7f;
		c1 = c3 & 1;
		c2 = (cvbuffer[1] & 0x7f) + (c1 ? 0x40 - 0x21 : 0x9e - 0x21);
		c1 = table_sjis[c3 / 2 + c1];
		cvbuffer[0] = c1;
		cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0213KANJI2)
	{
		register int c1, c2, c3;
		if (cvindex == 1)
			return (nullcvbuffer);
		assert(cvindex == 2);
		c3 = cvbuffer[0] & 0x7f;
		c1 = c3 & 1;
		c2 = (cvbuffer[1] & 0x7f) +
		     (c1 ? 0x40 - 0x21 : 0x9e - 0x21);
		if (c3 <= 0x25) {
			/* Map 1, 3, 4, and 5-KU */
			/* Note: 2-KU is rejected already. */
			c1 = (c3 - 0x21) / 2 + 0xf0;
		} else if (c3 == 0x28) {
			/* Map 8-KU */
			c1 = 0xf0;
		} else if (c3 <= 0x2f) {
			/* Map 12, 13, 14, and 15-KU */
			c1 = (c3 - 0x2b) / 2 + 0xf2;
		} else {
			/* Map 78-94 KU. */
			/* Note: 16-77 KU is rejected already. */
			c1 = (c3 - 0x6d) / 2 + 0xf4;
		}
		cvbuffer[0] = c1;
		cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == CP932)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	}
	assert(0);
	cvindex = 0;
	return (cvbuffer);
}

static char *convert_to_cp932(c, cs)
int c;
int cs;
{
	if (c == 0)
	{
		cvindex = 0;
		return (nullcvbuffer);
	}

	cvbuffer[cvindex++] = c;
	cvbuffer[cvindex] = '\0';

	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	cs = CS2CHARSET(cs);

	if (cs == ASCII || cs == JISX0201ROMAN)
	{
		assert(cvindex == 1);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0201KANA)
	{
		assert(cvindex == 1);
		cvbuffer[0] |= 0x80;
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
		   cs == JISX0208_90KANJI)
	{
		int i = cvbuffer[0] & 0x7f;
		int j = cvbuffer[1] & 0x7f;

                if (cvindex == 1)
			return (nullcvbuffer);
                assert(cvindex == 2);

		cvbuffer[0] = (i - 0x21) / 2 + ((i <= 0x5e) ? 0x81 : 0xc1);
		cvbuffer[1] = j + ((i & 1) ?((j <= 0x5f) ?0x1f :0x20) :0x7e);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == CP932)
	{
		int i = cvbuffer[0] & 0x7f;
		int j = cvbuffer[1] & 0x7f;

                if (cvindex == 1)
			return (nullcvbuffer);
                assert(cvindex == 2);

		if (i >= 0x30 && i <= 0x74) i += 0x4f;
		cvbuffer[0] = (i - 0x21) / 2 + ((i <= 0x5e) ? 0x81 : 0xc1);
		cvbuffer[1] = j + ((i & 1) ?((j <= 0x5f) ?0x1f :0x20) :0x7e);
		cvindex = 0;
		return (cvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
		/* not supported */
		cvindex = 0;
		return (nullcvbuffer);
	}
	assert(0);
	cvindex = 0;
	return (cvbuffer);
}

#endif

static char *convUTF8(buf, c)
char *buf;
int c;
{
    buf[0] = '\0';

    if (c < 0) {
       	return (buf);
    } else if (c < 0x80) {
	buf[0] = c;
	buf[1] = '\0';
    } else if (c < 0x800) {
	buf[0] = ((c >> 6) & 0x1f) | 0xc0;
	buf[1] = (c & 0x3f) | 0x80;
	buf[2] = '\0';
    } else if (c < 0x10000) {
	buf[0] = ((c >> 12) & 0x0f) | 0xe0;
	buf[1] = ((c >> 6) & 0x3f) | 0x80;
	buf[2] = (c & 0x3f) | 0x80;
	buf[3] = '\0';
    } else if (c < 0x200000) {
	buf[0] = ((c >> 18) & 0x07) | 0xf0;
	buf[1] = ((c >> 12) & 0x3f) | 0x80;
	buf[2] = ((c >> 6) & 0x3f) | 0x80;
	buf[3] = (c & 0x3f) | 0x80;
	buf[4] = '\0';
    } else if (c < 0x4000000) {
	buf[0] = ((c >> 24) & 0x03) | 0xf8;
	buf[1] = ((c >> 18) & 0x3f) | 0x80;
	buf[2] = ((c >> 12) & 0x3f) | 0x80;
	buf[3] = ((c >> 6) & 0x3f) | 0x80;
	buf[4] = (c & 0x3f) | 0x80;
	buf[5] = '\0';
    } else {
	buf[0] = ((c >> 30) & 0x01) | 0xfc;
	buf[1] = ((c >> 24) & 0x3f) | 0x80;
	buf[2] = ((c >> 18) & 0x3f) | 0x80;
	buf[3] = ((c >> 12) & 0x3f) | 0x80;
	buf[4] = ((c >> 6) & 0x3f) | 0x80;
	buf[5] = (c & 0x3f) | 0x80;
	buf[6] = '\0';
    }

    return (buf);
}

static int
need_combining_utf8(c, cs)
int c;
int cs;
{
#if JAPANESE
    if (cs == JISX0213KANJI1 || cs == JISX02132004KANJI1) {
	struct st_ucs_combining *p;
	for (p = jisx0213_comb; p->c1; ++ p) {
	    if (p->c1 +0x20 == (cvbuffer[0] & 0x7f)
		&& p->c2 + 0x20 == (c & 0x7f)) {
		convUTF8(cvbuffer, p->u1);
		if (p->u2 > 0)
		    convUTF8(cvbuffer + strlen(cvbuffer), p->u2);
		return 1;
	    }
	}
  }
#endif

  return 0;
}


static char *convert_to_utf8(c, cs)
int c;
int cs;
{
	if (c == 0)
	{
		cvindex = 0;
		return (nullcvbuffer);
	}

	cvbuffer[cvindex++] = c;
	cvbuffer[cvindex] = '\0';

	if (CSISWRONG(cs))
	{
		cs = ASCII;
	}

	cs = CS2CHARSET(cs);
	if (cs == ASCII)
	{
	    assert(cvindex == 1);
	    cvindex = 0;
	    return (cvbuffer);
	} else if (cs == UTF8 || cs == UTF8W  || cs == UTF8Z)
	{
	    if (ISUTF8_HEAD(c)) {
		assert(cvindex == 1);
		return (nullcvbuffer);
	    } else if (ISUTF8_REST(c)) {
		int head = cvbuffer[0];
		if ((head & 0xe0) == 0xc0) {
		    assert(cvindex == 2);
		    cvindex = 0;
		    return (cvbuffer);
		} else if ((head & 0xf0) == 0xe0) {
		    if (cvindex <= 2)
			return (nullcvbuffer);
		    assert(cvindex == 3);
		    cvindex = 0;
		    return (cvbuffer);
		} else if ((head & 0xf8) == 0xf0) {
		    if (cvindex <= 3)
			return (nullcvbuffer);
		    assert(cvindex == 4);
		    cvindex = 0;
		    return (cvbuffer);
		} else if ((head & 0xfc) == 0xf8) {
		    if (cvindex <= 4)
			return (nullcvbuffer);
		    assert(cvindex == 5);
		    cvindex = 0;
		    return (cvbuffer);
		} else if ((head & 0xfe) == 0xfc) {
		    if (cvindex <= 5)
			return (nullcvbuffer);
		    assert(cvindex == 6);
		    cvindex = 0;
		    return (cvbuffer);
		}
		assert(0);
	    }
	} else if (cs == JISX0201ROMAN)
	{
	    assert(cvindex == 1);
	    cvindex = 0;
	    return convUTF8(cvbuffer, ucode_latin[UMAP_JISX0201][c]);
	} else if (cs == JISX0201KANA)
	{
	    assert(cvindex == 1);
	    cvindex = 0;
	    return convUTF8(cvbuffer, ucode_latin[UMAP_JISX0201][c | 0x80]);
	} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
                   cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
                   cs == JISX02132004KANJI1)
	{
	    int num;
	    if (cvindex == 1)
		return (nullcvbuffer);
	    assert(cvindex == 2);
	    if (need_combining_utf8(c, cs)) {
		cvindex = 0;
		return (cvbuffer);
	    }
	    if (cs == JISX0208_78KANJI)
		jis78to90(cvbuffer);
	    num = (cvbuffer[0] - 0x21) * 94 + (cvbuffer[1] - 0x21);
	    if (num < 0 || num > U_kanji) {
		cvindex = 0;
		return (nullcvbuffer);
	    }
	    cvindex = 0;
	    return convUTF8(cvbuffer, ucode_kanji1[num]);
        } else if (cs == JISX0212KANJISUP || cs == JISX0213KANJI2)
        {
	    int num;
	    if (cvindex == 1)
		return (nullcvbuffer);
	    assert(cvindex == 2);
	    num = (cvbuffer[0] - 0x21) * 94 + (cvbuffer[1] - 0x21);
	    if (num < 0 || num > U_kanji) {
		cvindex = 0;
		return (nullcvbuffer);
	    }
	    cvindex = 0;
	    return convUTF8(cvbuffer, ucode_kanji2[num]);
	} else if (cs == CP932)
	{
	    int num;
	    if (cvindex == 1)
		return (nullcvbuffer);
	    assert(cvindex == 2);
	    num = (cvbuffer[0] - 0x21) * 94 + (cvbuffer[1] - 0x21);
	    if (num < 0 || num > U_kanji) {
		cvindex = 0;
		return (nullcvbuffer);
	    }
	    cvindex = 0;
	    return convUTF8(cvbuffer, ucode_cp932[num]);
	} else {
	    int i;
	    for (i = UMAP_ISO8859_1; i <= UMAP_ISO8859_16; ++ i) {
		if (cs == iso8859_list[i]) {
		    assert(cvindex == 1);
		    cvindex = 0;
		    return convUTF8(cvbuffer, ucode_latin[i][c | 0x80]);
		}
	    }
	}
	cvindex = 0;
	return (cvbuffer);
}

char *outchar(c, cs)
int c;
CHARSET cs;
{
	if (c < 0)
	{
		c = 0;
		cs = ASCII;
	}

	if (output & (ESISO7 | ESISO8))
		return (convert_to_iso(c, cs));
	if (output & ESJIS83)
		return (convert_to_jis(c, cs));
#if JAPANESE
	if (output & ESUJIS)
		return (convert_to_ujis(c, cs));
	if (output & ESSJIS)
		return (convert_to_sjis(c, cs));
	if (output & ESCP932)
		return (convert_to_cp932(c, cs));
#endif
	if (output & ESUTF8)
		return (convert_to_utf8(c, cs));
	cvbuffer[0] = c;
	cvbuffer[1] = '\0';
	return (cvbuffer);
}

char *outbuf(p, cs)
unsigned char *p;
CHARSET cs;
{
	static char buffer[1024];
	char *s;
	int i = 0;

	while (*p != '\0')
	{
		s = outchar(*p++, cs);
		while (*s != '\0')
			buffer[i++] = *s++;
		assert(i < (int)sizeof(buffer));
	}
	buffer[i] = '\0';
	return (buffer);
}

int mwidth(c, cs)
int c;
CHARSET cs;
{
	if (CSISREST(cs))
		return (0);

	switch (cs)
	{
	case UTF8Z:
	    return 0;
	case UTF8:
	    return 1;
	case UTF8W:
	    return 2;
	}

	switch (CS2TYPE(cs))
	{
	case TYPE_94_CHARSET:
	case TYPE_96_CHARSET:
		return (1);
	case TYPE_94N_CHARSET:
	case TYPE_96N_CHARSET:
		return (2);
	default:
		assert(0);
		return (0);
	}
}

char *rotate_right_codeset(mp)
MULBUF *mp;
{
	char *p = NULL;

	mp->rotation_io_right++;
	mp->rotation_io_right %= 8;
	switch (mp->rotation_io_right) {
	case 0: p = "original"; mp->io.inputr = mp->orig_io_right; break;
	case 1: p = "utf-8"; mp->io.inputr = ESUTF8;
		make_unicode_map(mp->io.scs, output & ESUTF8); break;
	case 2: p = "ujis"; mp->io.inputr = ESUJIS; break;
	case 3: p = "sjis"; mp->io.inputr = ESSJIS; break;
	case 4: p = "cp932"; mp->io.inputr = ESCP932; break;
	case 5: p = "iso8"; mp->io.inputr = ESISO8; break;
	case 6: p = "noconv"; mp->io.inputr = ESNOCONV; break;
	case 7: p = "none"; mp->io.inputr = ESNONE; break;
	default: assert(0); break;
	}
	init_priority(mp);
	return (p);
}

#endif

int strlen_cs(str, cs)
char* str;
CHARSET* cs;
{
	int i = 0;
	if (cs == NULL)
		return strlen(str);
	while (*str != NULCH || !CSISNULLCS(*cs)) {
		str++;
		cs++;
		i++;
	}
	return i;
}

int chlen_cs(chstr, cs)
char* chstr;
CHARSET* cs;
{
	int i;
	if (cs == NULL)
	{
		if (chstr == NULL || *chstr == NULCH)
			return 0;
		else
			return 1;
	}
	if (*chstr == NULCH && CSISNULLCS(*cs))
		return 0;
	i = 0;
	do {
		i++;
		cs++;
	} while (CSISREST(*cs));
	return i;
}

char* strdup_cs(str, cs, csout)
char* str;
CHARSET* cs;
CHARSET** csout;
{
	int len = strlen_cs(str, cs);
	char* save_str = (char *)ecalloc(len + 1, 1);
	CHARSET* save_cs = (CHARSET *)ecalloc(len + 1, sizeof(CHARSET));
	memcpy(save_str, str, sizeof(char) * (len + 1));
	if (cs)
		memcpy(save_cs, cs, sizeof(CHARSET) * (len + 1));
	else {
		cs = save_cs;
		while (--len >= 0)
			*cs++ = ASCII;
		*cs = NULLCS;
	}
	*csout = save_cs;
	return save_str;
}