1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2018, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.5"
24 #define NKF_RELEASE_DATE "2018-12-15"
25 #define COPY_RIGHT \
26     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27     "Copyright (C) 1996-2018, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46    c2           0 means ASCII
47    JIS_X_0201_1976_K
48    ISO_8859_1
49    JIS_X_0208
50    EOF      all termination
51    c1           32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define         FIXED_MIME      7
58 #define         STRICT_MIME     8
59 
60 /* byte order */
61 enum byte_order {
62     ENDIAN_BIG    = 1,
63     ENDIAN_LITTLE = 2,
64     ENDIAN_2143   = 3,
65     ENDIAN_3412   = 4
66 };
67 
68 /* ASCII CODE */
69 
70 #define         BS      0x08
71 #define         TAB     0x09
72 #define         LF      0x0a
73 #define         CR      0x0d
74 #define         ESC     0x1b
75 #define         SP      0x20
76 #define         DEL     0x7f
77 #define         SI      0x0f
78 #define         SO      0x0e
79 #define         SS2     0x8e
80 #define         SS3     0x8f
81 #define         CRLF    0x0D0A
82 
83 
84 /* encodings */
85 
86 enum nkf_encodings {
87     ASCII,
88     ISO_8859_1,
89     ISO_2022_JP,
90     CP50220,
91     CP50221,
92     CP50222,
93     ISO_2022_JP_1,
94     ISO_2022_JP_3,
95     ISO_2022_JP_2004,
96     SHIFT_JIS,
97     WINDOWS_31J,
98     CP10001,
99     EUC_JP,
100     EUCJP_NKF,
101     CP51932,
102     EUCJP_MS,
103     EUCJP_ASCII,
104     SHIFT_JISX0213,
105     SHIFT_JIS_2004,
106     EUC_JISX0213,
107     EUC_JIS_2004,
108     UTF_8,
109     UTF_8N,
110     UTF_8_BOM,
111     UTF8_MAC,
112     UTF_16,
113     UTF_16BE,
114     UTF_16BE_BOM,
115     UTF_16LE,
116     UTF_16LE_BOM,
117     UTF_32,
118     UTF_32BE,
119     UTF_32BE_BOM,
120     UTF_32LE,
121     UTF_32LE_BOM,
122     BINARY,
123     NKF_ENCODING_TABLE_SIZE,
124     JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125     /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126     /* JIS_X_0208_1978   = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127     /* JIS_X_0208_1983   = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128     JIS_X_0208        = 0x1168, /* @B */
129     JIS_X_0212        = 0x1159, /* D */
130     /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131     JIS_X_0213_2 = 0x1229, /* P */
132     JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148     const char *name;
149     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150     void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
152 
153 nkf_native_encoding NkfEncodingASCII =		{ "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP =	{ "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS =	{ "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP =		{ "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 =		{ "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 =		{ "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 =		{ "UTF-32", w_iconv32, w_oconv32 };
160 
161 typedef struct {
162     const int id;
163     const char *name;
164     const nkf_native_encoding *base_encoding;
165 } nkf_encoding;
166 
167 nkf_encoding nkf_encoding_table[] = {
168     {ASCII,		"US-ASCII",		&NkfEncodingASCII},
169     {ISO_8859_1,	"ISO-8859-1",		&NkfEncodingASCII},
170     {ISO_2022_JP,	"ISO-2022-JP",		&NkfEncodingISO_2022_JP},
171     {CP50220,		"CP50220",		&NkfEncodingISO_2022_JP},
172     {CP50221,		"CP50221",		&NkfEncodingISO_2022_JP},
173     {CP50222,		"CP50222",		&NkfEncodingISO_2022_JP},
174     {ISO_2022_JP_1,	"ISO-2022-JP-1",	&NkfEncodingISO_2022_JP},
175     {ISO_2022_JP_3,	"ISO-2022-JP-3",	&NkfEncodingISO_2022_JP},
176     {ISO_2022_JP_2004,	"ISO-2022-JP-2004",	&NkfEncodingISO_2022_JP},
177     {SHIFT_JIS,		"Shift_JIS",		&NkfEncodingShift_JIS},
178     {WINDOWS_31J,	"Windows-31J",		&NkfEncodingShift_JIS},
179     {CP10001,		"CP10001",		&NkfEncodingShift_JIS},
180     {EUC_JP,		"EUC-JP",		&NkfEncodingEUC_JP},
181     {EUCJP_NKF,		"eucJP-nkf",		&NkfEncodingEUC_JP},
182     {CP51932,		"CP51932",		&NkfEncodingEUC_JP},
183     {EUCJP_MS,		"eucJP-MS",		&NkfEncodingEUC_JP},
184     {EUCJP_ASCII,	"eucJP-ASCII",		&NkfEncodingEUC_JP},
185     {SHIFT_JISX0213,	"Shift_JISX0213",	&NkfEncodingShift_JIS},
186     {SHIFT_JIS_2004,	"Shift_JIS-2004",	&NkfEncodingShift_JIS},
187     {EUC_JISX0213,	"EUC-JISX0213",		&NkfEncodingEUC_JP},
188     {EUC_JIS_2004,	"EUC-JIS-2004",		&NkfEncodingEUC_JP},
189     {UTF_8,		"UTF-8",		&NkfEncodingUTF_8},
190     {UTF_8N,		"UTF-8N",		&NkfEncodingUTF_8},
191     {UTF_8_BOM,		"UTF-8-BOM",		&NkfEncodingUTF_8},
192     {UTF8_MAC,		"UTF8-MAC",		&NkfEncodingUTF_8},
193     {UTF_16,		"UTF-16",		&NkfEncodingUTF_16},
194     {UTF_16BE,		"UTF-16BE",		&NkfEncodingUTF_16},
195     {UTF_16BE_BOM,	"UTF-16BE-BOM",		&NkfEncodingUTF_16},
196     {UTF_16LE,		"UTF-16LE",		&NkfEncodingUTF_16},
197     {UTF_16LE_BOM,	"UTF-16LE-BOM",		&NkfEncodingUTF_16},
198     {UTF_32,		"UTF-32",		&NkfEncodingUTF_32},
199     {UTF_32BE,		"UTF-32BE",		&NkfEncodingUTF_32},
200     {UTF_32BE_BOM,	"UTF-32BE-BOM",		&NkfEncodingUTF_32},
201     {UTF_32LE,		"UTF-32LE",		&NkfEncodingUTF_32},
202     {UTF_32LE_BOM,	"UTF-32LE-BOM",		&NkfEncodingUTF_32},
203     {BINARY,		"BINARY",		&NkfEncodingASCII},
204     {-1,		NULL,			NULL}
205 };
206 
207 struct {
208     const char *name;
209     const int id;
210 } encoding_name_to_id_table[] = {
211     {"US-ASCII",		ASCII},
212     {"ASCII",			ASCII},
213     {"646",			ASCII},
214     {"ROMAN8",			ASCII},
215     {"ISO-2022-JP",		ISO_2022_JP},
216     {"ISO2022JP-CP932",		CP50220},
217     {"CP50220",			CP50220},
218     {"CP50221",			CP50221},
219     {"CSISO2022JP",		CP50221},
220     {"CP50222",			CP50222},
221     {"ISO-2022-JP-1",		ISO_2022_JP_1},
222     {"ISO-2022-JP-3",		ISO_2022_JP_3},
223     {"ISO-2022-JP-2004",	ISO_2022_JP_2004},
224     {"SHIFT_JIS",		SHIFT_JIS},
225     {"SJIS",			SHIFT_JIS},
226     {"MS_Kanji",		SHIFT_JIS},
227     {"PCK",			SHIFT_JIS},
228     {"WINDOWS-31J",		WINDOWS_31J},
229     {"CSWINDOWS31J",		WINDOWS_31J},
230     {"CP932",			WINDOWS_31J},
231     {"MS932",			WINDOWS_31J},
232     {"CP10001",			CP10001},
233     {"EUCJP",			EUC_JP},
234     {"EUC-JP",			EUC_JP},
235     {"EUCJP-NKF",		EUCJP_NKF},
236     {"CP51932",			CP51932},
237     {"EUC-JP-MS",		EUCJP_MS},
238     {"EUCJP-MS",		EUCJP_MS},
239     {"EUCJPMS",			EUCJP_MS},
240     {"EUC-JP-ASCII",		EUCJP_ASCII},
241     {"EUCJP-ASCII",		EUCJP_ASCII},
242     {"SHIFT_JISX0213",		SHIFT_JISX0213},
243     {"SHIFT_JIS-2004",		SHIFT_JIS_2004},
244     {"EUC-JISX0213",		EUC_JISX0213},
245     {"EUC-JIS-2004",		EUC_JIS_2004},
246     {"UTF-8",			UTF_8},
247     {"UTF-8N",			UTF_8N},
248     {"UTF-8-BOM",		UTF_8_BOM},
249     {"UTF8-MAC",		UTF8_MAC},
250     {"UTF-8-MAC",		UTF8_MAC},
251     {"UTF-16",			UTF_16},
252     {"UTF-16BE",		UTF_16BE},
253     {"UTF-16BE-BOM",		UTF_16BE_BOM},
254     {"UTF-16LE",		UTF_16LE},
255     {"UTF-16LE-BOM",		UTF_16LE_BOM},
256     {"UTF-32",			UTF_32},
257     {"UTF-32BE",		UTF_32BE},
258     {"UTF-32BE-BOM",		UTF_32BE_BOM},
259     {"UTF-32LE",		UTF_32LE},
260     {"UTF-32LE-BOM",		UTF_32LE_BOM},
261     {"BINARY",			BINARY},
262     {NULL,			-1}
263 };
264 
265 #if defined(DEFAULT_CODE_JIS)
266 #define	    DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define	    DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define	    DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define	    DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define	    DEFAULT_ENCIDX UTF_8
275 #endif
276 
277 
278 #define		is_alnum(c)  \
279     (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c)  ('0'<=c && c<='7')
284 #define nkf_isdigit(c)  ('0'<=c && c<='9')
285 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 		    ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 		    ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 			      ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 			       && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 
304 #define         HOLD_SIZE       1024
305 #if defined(INT_IS_SHORT)
306 #define         IOBUF_SIZE      2048
307 #else
308 #define         IOBUF_SIZE      16384
309 #endif
310 
311 #define         DEFAULT_J       'B'
312 #define         DEFAULT_R       'B'
313 
314 
315 #define         GETA1   0x22
316 #define         GETA2   0x2e
317 
318 
319 /* MIME preprocessor */
320 
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
323 #endif
324 
325 struct input_code{
326     const char *name;
327     nkf_char stat;
328     nkf_char score;
329     nkf_char index;
330     nkf_char buf[3];
331     void (*status_func)(struct input_code *, nkf_char);
332     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
333     int _file_stat;
334 };
335 
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
339 
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 /* UCS Mapping
342  * 0: Shift_JIS, eucJP-ascii
343  * 1: eucJP-ms
344  * 2: CP932, CP51932
345  * 3: CP10001
346  */
347 #define UCS_MAP_ASCII   0
348 #define UCS_MAP_MS      1
349 #define UCS_MAP_CP932   2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static  int     no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static  int     no_best_fit_chars_f = FALSE;
358 static  int     input_endian = ENDIAN_BIG;
359 static  int     input_bom_f = FALSE;
360 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
361 static  void    (*encode_fallback)(nkf_char c) = NULL;
362 static  void    w_status(struct input_code *, nkf_char);
363 #endif
364 #ifdef UTF8_OUTPUT_ENABLE
365 static  int     output_bom_f = FALSE;
366 static  int     output_endian = ENDIAN_BIG;
367 #endif
368 
369 static  void    std_putc(nkf_char c);
370 static  nkf_char     std_getc(FILE *f);
371 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
372 
373 static  nkf_char     broken_getc(FILE *f);
374 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
375 
376 static  nkf_char     mime_getc(FILE *f);
377 
378 static void mime_putc(nkf_char c);
379 
380 /* buffers */
381 
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 static unsigned char   stdibuf[IOBUF_SIZE];
384 static unsigned char   stdobuf[IOBUF_SIZE];
385 #endif
386 
387 #define NKF_UNSPECIFIED (-TRUE)
388 
389 /* flags */
390 static int             unbuf_f = FALSE;
391 static int             estab_f = FALSE;
392 static int             nop_f = FALSE;
393 static int             binmode_f = TRUE;       /* binary mode */
394 static int             rot_f = FALSE;          /* rot14/43 mode */
395 static int             hira_f = FALSE;          /* hira/kata henkan */
396 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
397 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
398 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
399 static int             mimebuf_f = FALSE;      /* MIME buffered input */
400 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
401 static int             iso8859_f = FALSE;      /* ISO8859 through */
402 static int             mimeout_f = FALSE;       /* base64 mode */
403 static int             x0201_f = NKF_UNSPECIFIED;   /* convert JIS X 0201 */
404 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
405 
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
410 #endif
411 
412 #ifdef INPUT_OPTION
413 static int cap_f = FALSE;
414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416 
417 static int url_f = FALSE;
418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420 #endif
421 
422 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
423 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
425 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 
434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435 
436 #ifdef NUMCHAR_OPTION
437 static int numchar_f = FALSE;
438 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
439 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440 #endif
441 
442 #ifdef CHECK_OPTION
443 static int noout_f = FALSE;
444 static void no_putc(nkf_char c);
445 static int debug_f = FALSE;
446 static void debug(const char *str);
447 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448 #endif
449 
450 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451 static  void    set_input_codename(const char *codename);
452 
453 #ifdef EXEC_IO
454 static int exec_f = 0;
455 #endif
456 
457 #ifdef SHIFTJIS_CP932
458 /* invert IBM extended characters to others */
459 static int cp51932_f = FALSE;
460 
461 /* invert NEC-selected IBM extended characters to IBM extended characters */
462 static int cp932inv_f = TRUE;
463 
464 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465 #endif /* SHIFTJIS_CP932 */
466 
467 static int x0212_f = FALSE;
468 static int x0213_f = FALSE;
469 
470 static unsigned char prefix_table[256];
471 
472 static void e_status(struct input_code *, nkf_char);
473 static void s_status(struct input_code *, nkf_char);
474 
475 struct input_code input_code_list[] = {
476     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE
479     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480     {"UTF-16",     0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481     {"UTF-32",     0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482 #endif
483     {NULL,        0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484 };
485 
486 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int              base64_count = 0;
488 
489 /* X0208 -> ASCII converter */
490 
491 /* fold parameter */
492 static int             f_line = 0;    /* chars in line */
493 static int             f_prev = 0;
494 static int             fold_preserve_f = FALSE; /* preserve new lines */
495 static int             fold_f  = FALSE;
496 static int             fold_len  = 0;
497 
498 /* options */
499 static unsigned char   kanji_intro = DEFAULT_J;
500 static unsigned char   ascii_intro = DEFAULT_R;
501 
502 /* Folding */
503 
504 #define FOLD_MARGIN  10
505 #define DEFAULT_FOLD 60
506 
507 static int             fold_margin  = FOLD_MARGIN;
508 
509 /* process default */
510 
511 static nkf_char
no_connection2(ARG_UNUSED nkf_char c2,ARG_UNUSED nkf_char c1,ARG_UNUSED nkf_char c0)512 no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
513 {
514     fprintf(stderr,"nkf internal module connection failure.\n");
515     exit(EXIT_FAILURE);
516     return 0; /* LINT */
517 }
518 
519 static void
no_connection(nkf_char c2,nkf_char c1)520 no_connection(nkf_char c2, nkf_char c1)
521 {
522     no_connection2(c2,c1,0);
523 }
524 
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527 
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
535 
536 /* static redirections */
537 
538 static  void   (*o_putc)(nkf_char c) = std_putc;
539 
540 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
541 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
542 
543 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
545 
546 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547 
548 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
550 
551 /* for strict mime */
552 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554 
555 /* Global states */
556 static int output_mode = ASCII;    /* output kanji mode */
557 static int input_mode =  ASCII;    /* input kanji mode */
558 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
559 
560 /* X0201 / X0208 conversion tables */
561 
562 /* X0201 kana conversion table */
563 /* 90-9F A0-DF */
564 static const unsigned char cv[]= {
565     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581     0x00,0x00};
582 
583 
584 /* X0201 kana conversion table for daguten */
585 /* 90-9F A0-DF */
586 static const unsigned char dv[]= {
587     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603     0x00,0x00};
604 
605 /* X0201 kana conversion table for han-daguten */
606 /* 90-9F A0-DF */
607 static const unsigned char ev[]= {
608     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624     0x00,0x00};
625 
626 /* X0201 kana to X0213 conversion table for han-daguten */
627 /* 90-9F A0-DF */
628 static const unsigned char ev_x0213[]= {
629     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634     0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635     0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636     0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637     0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638     0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645     0x00,0x00};
646 
647 
648 /* X0208 kigou conversion table */
649 /* 0x8140 - 0x819e */
650 static const unsigned char fv[] = {
651 
652     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664 } ;
665 
666 
667 
668 static int option_mode = 0;
669 static int             file_out_f = FALSE;
670 #ifdef OVERWRITE
671 static int             overwrite_f = FALSE;
672 static int             preserve_time_f = FALSE;
673 static int             backup_f = FALSE;
674 static char            *backup_suffix = "";
675 #endif
676 
677 static int eolmode_f = 0;   /* CR, LF, CRLF */
678 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679 static nkf_char prev_cr = 0; /* CR or 0 */
680 #ifdef EASYWIN /*Easy Win */
681 static int             end_check;
682 #endif /*Easy Win */
683 
684 static void *
nkf_xmalloc(size_t size)685 nkf_xmalloc(size_t size)
686 {
687     void *ptr;
688 
689     if (size == 0) size = 1;
690 
691     ptr = malloc(size);
692     if (ptr == NULL) {
693 	perror("can't malloc");
694 	exit(EXIT_FAILURE);
695     }
696 
697     return ptr;
698 }
699 
700 static void *
nkf_xrealloc(void * ptr,size_t size)701 nkf_xrealloc(void *ptr, size_t size)
702 {
703     if (size == 0) size = 1;
704 
705     ptr = realloc(ptr, size);
706     if (ptr == NULL) {
707 	perror("can't realloc");
708 	exit(EXIT_FAILURE);
709     }
710 
711     return ptr;
712 }
713 
714 #define nkf_xfree(ptr) free(ptr)
715 
716 static int
nkf_str_caseeql(const char * src,const char * target)717 nkf_str_caseeql(const char *src, const char *target)
718 {
719     int i;
720     for (i = 0; src[i] && target[i]; i++) {
721 	if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722     }
723     if (src[i] || target[i]) return FALSE;
724     else return TRUE;
725 }
726 
727 static nkf_encoding*
nkf_enc_from_index(int idx)728 nkf_enc_from_index(int idx)
729 {
730     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731 	return 0;
732     }
733     return &nkf_encoding_table[idx];
734 }
735 
736 static int
nkf_enc_find_index(const char * name)737 nkf_enc_find_index(const char *name)
738 {
739     int i;
740     if (name[0] == 'X' && *(name+1) == '-') name += 2;
741     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742 	if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743 	    return encoding_name_to_id_table[i].id;
744 	}
745     }
746     return -1;
747 }
748 
749 static nkf_encoding*
nkf_enc_find(const char * name)750 nkf_enc_find(const char *name)
751 {
752     int idx = -1;
753     idx = nkf_enc_find_index(name);
754     if (idx < 0) return 0;
755     return nkf_enc_from_index(idx);
756 }
757 
758 #define nkf_enc_name(enc) (enc)->name
759 #define nkf_enc_to_index(enc) (enc)->id
760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763 #define nkf_enc_asciicompat(enc) (\
764 				  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765 				  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766 #define nkf_enc_unicode_p(enc) (\
767 				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768 				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769 				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770 #define nkf_enc_cp5022x_p(enc) (\
771 				nkf_enc_to_index(enc) == CP50220 ||\
772 				nkf_enc_to_index(enc) == CP50221 ||\
773 				nkf_enc_to_index(enc) == CP50222)
774 
775 #ifdef DEFAULT_CODE_LOCALE
776 static const char*
nkf_locale_charmap(void)777 nkf_locale_charmap(void)
778 {
779 #ifdef HAVE_LANGINFO_H
780     return nl_langinfo(CODESET);
781 #elif defined(__WIN32__)
782     static char buf[16];
783     sprintf(buf, "CP%d", GetACP());
784     return buf;
785 #elif defined(__OS2__)
786 # if defined(INT_IS_SHORT)
787     /* OS/2 1.x */
788     return NULL;
789 # else
790     /* OS/2 32bit */
791     static char buf[16];
792     ULONG ulCP[1], ulncp;
793     DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794     if (ulCP[0] == 932 || ulCP[0] == 943)
795         strcpy(buf, "Shift_JIS");
796     else
797         sprintf(buf, "CP%lu", ulCP[0]);
798     return buf;
799 # endif
800 #endif
801     return NULL;
802 }
803 
804 static nkf_encoding*
nkf_locale_encoding(void)805 nkf_locale_encoding(void)
806 {
807     nkf_encoding *enc = 0;
808     const char *encname = nkf_locale_charmap();
809     if (encname)
810 	enc = nkf_enc_find(encname);
811     return enc;
812 }
813 #endif /* DEFAULT_CODE_LOCALE */
814 
815 static nkf_encoding*
nkf_utf8_encoding(void)816 nkf_utf8_encoding(void)
817 {
818     return &nkf_encoding_table[UTF_8];
819 }
820 
821 static nkf_encoding*
nkf_default_encoding(void)822 nkf_default_encoding(void)
823 {
824     nkf_encoding *enc = 0;
825 #ifdef DEFAULT_CODE_LOCALE
826     enc = nkf_locale_encoding();
827 #elif defined(DEFAULT_ENCIDX)
828     enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829 #endif
830     if (!enc) enc = nkf_utf8_encoding();
831     return enc;
832 }
833 
834 typedef struct {
835     long capa;
836     long len;
837     nkf_char *ptr;
838 } nkf_buf_t;
839 
840 static nkf_buf_t *
nkf_buf_new(int length)841 nkf_buf_new(int length)
842 {
843     nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844     buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845     buf->capa = length;
846     buf->len = 0;
847     return buf;
848 }
849 
850 #if 0
851 static void
852 nkf_buf_dispose(nkf_buf_t *buf)
853 {
854     nkf_xfree(buf->ptr);
855     nkf_xfree(buf);
856 }
857 #endif
858 
859 #define nkf_buf_length(buf) ((buf)->len)
860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
861 
862 static nkf_char
nkf_buf_at(nkf_buf_t * buf,int index)863 nkf_buf_at(nkf_buf_t *buf, int index)
864 {
865     assert(index <= buf->len);
866     return buf->ptr[index];
867 }
868 
869 static void
nkf_buf_clear(nkf_buf_t * buf)870 nkf_buf_clear(nkf_buf_t *buf)
871 {
872     buf->len = 0;
873 }
874 
875 static void
nkf_buf_push(nkf_buf_t * buf,nkf_char c)876 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
877 {
878     if (buf->capa <= buf->len) {
879 	exit(EXIT_FAILURE);
880     }
881     buf->ptr[buf->len++] = c;
882 }
883 
884 static nkf_char
nkf_buf_pop(nkf_buf_t * buf)885 nkf_buf_pop(nkf_buf_t *buf)
886 {
887     assert(!nkf_buf_empty_p(buf));
888     return buf->ptr[--buf->len];
889 }
890 
891 /* Normalization Form C */
892 #ifndef PERL_XS
893 #ifdef WIN32DLL
894 #define fprintf dllprintf
895 #endif
896 
897 static void
version(void)898 version(void)
899 {
900     fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901 }
902 
903 static void
usage(void)904 usage(void)
905 {
906     fprintf(HELP_OUTPUT,
907 	    "Usage:  nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908 #ifdef UTF8_OUTPUT_ENABLE
909 	    " j/s/e/w  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910 	    "          UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911 #else
912 #endif
913 #ifdef UTF8_INPUT_ENABLE
914 	    " J/S/E/W  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915 	    "          UTF option is -W[8,[16,32][B,L]]\n"
916 #else
917 	    " J/S/E    Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918 #endif
919 	    );
920     fprintf(HELP_OUTPUT,
921 	    " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922 	    " M[BQ]    MIME encode [B:base64 Q:quoted]\n"
923 	    " f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924 	    );
925     fprintf(HELP_OUTPUT,
926 	    " Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
927 	    "          1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
928 	    "          4: JISX0208 Katakana to JISX0201 Katakana\n"
929 	    " X,x      Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930 	    );
931     fprintf(HELP_OUTPUT,
932 	    " O        Output to File (DEFAULT 'nkf.out')\n"
933 	    " L[uwm]   Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934 	    );
935     fprintf(HELP_OUTPUT,
936 	    " --ic=<encoding>        Specify the input encoding\n"
937 	    " --oc=<encoding>        Specify the output encoding\n"
938 	    " --hiragana --katakana  Hiragana/Katakana Conversion\n"
939 	    " --katakana-hiragana    Converts each other\n"
940 	    );
941     fprintf(HELP_OUTPUT,
942 #ifdef INPUT_OPTION
943 	    " --{cap, url}-input     Convert hex after ':' or '%%'\n"
944 #endif
945 #ifdef NUMCHAR_OPTION
946 	    " --numchar-input        Convert Unicode Character Reference\n"
947 #endif
948 #ifdef UTF8_INPUT_ENABLE
949 	    " --fb-{skip, html, xml, perl, java, subchar}\n"
950 	    "                        Specify unassigned character's replacement\n"
951 #endif
952 	    );
953     fprintf(HELP_OUTPUT,
954 #ifdef OVERWRITE
955 	    " --in-place[=SUF]       Overwrite original files\n"
956 	    " --overwrite[=SUF]      Preserve timestamp of original files\n"
957 #endif
958 	    " -g --guess             Guess the input code\n"
959 	    " -v --version           Print the version\n"
960 	    " --help/-V              Print this help / configuration\n"
961 	    );
962     version();
963 }
964 
965 static void
show_configuration(void)966 show_configuration(void)
967 {
968     fprintf(HELP_OUTPUT,
969 	    "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970 	    "  Compile-time options:\n"
971 	    "    Compiled at:                 " __DATE__ " " __TIME__ "\n"
972 	   );
973     fprintf(HELP_OUTPUT,
974 	    "    Default output encoding:     "
975 #ifdef DEFAULT_CODE_LOCALE
976 	    "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977 #elif defined(DEFAULT_ENCIDX)
978 	    "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979 #else
980 	    "NONE\n"
981 #endif
982 	   );
983     fprintf(HELP_OUTPUT,
984 	    "    Default output end of line:  "
985 #if DEFAULT_NEWLINE == CR
986 	    "CR"
987 #elif DEFAULT_NEWLINE == CRLF
988 	    "CRLF"
989 #else
990 	    "LF"
991 #endif
992 	    "\n"
993 	    "    Decode MIME encoded string:  "
994 #if MIME_DECODE_DEFAULT
995 	    "ON"
996 #else
997 	    "OFF"
998 #endif
999 	    "\n"
1000 	    "    Convert JIS X 0201 Katakana: "
1001 #if X0201_DEFAULT
1002 	    "ON"
1003 #else
1004 	    "OFF"
1005 #endif
1006 	    "\n"
1007 	    "    --help, --version output:    "
1008 #if HELP_OUTPUT_HELP_OUTPUT
1009 	    "HELP_OUTPUT"
1010 #else
1011 	    "STDOUT"
1012 #endif
1013 	    "\n");
1014 }
1015 #endif /*PERL_XS*/
1016 
1017 #ifdef OVERWRITE
1018 static char*
get_backup_filename(const char * suffix,const char * filename)1019 get_backup_filename(const char *suffix, const char *filename)
1020 {
1021     char *backup_filename;
1022     int asterisk_count = 0;
1023     int i, j;
1024     int filename_length = strlen(filename);
1025 
1026     for(i = 0; suffix[i]; i++){
1027 	if(suffix[i] == '*') asterisk_count++;
1028     }
1029 
1030     if(asterisk_count){
1031 	backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032 	for(i = 0, j = 0; suffix[i];){
1033 	    if(suffix[i] == '*'){
1034 		backup_filename[j] = '\0';
1035 		strncat(backup_filename, filename, filename_length);
1036 		i++;
1037 		j += filename_length;
1038 	    }else{
1039 		backup_filename[j++] = suffix[i++];
1040 	    }
1041 	}
1042 	backup_filename[j] = '\0';
1043     }else{
1044 	j = filename_length + strlen(suffix);
1045 	backup_filename = nkf_xmalloc(j + 1);
1046 	strcpy(backup_filename, filename);
1047 	strcat(backup_filename, suffix);
1048 	backup_filename[j] = '\0';
1049     }
1050     return backup_filename;
1051 }
1052 #endif
1053 
1054 #ifdef UTF8_INPUT_ENABLE
1055 static void
nkf_each_char_to_hex(void (* f)(nkf_char c2,nkf_char c1),nkf_char c)1056 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1057 {
1058     int shift = 20;
1059     c &= VALUE_MASK;
1060     while(shift >= 0){
1061 	if(c >= NKF_INT32_C(1)<<shift){
1062 	    while(shift >= 0){
1063 		(*f)(0, bin2hex(c>>shift));
1064 		shift -= 4;
1065 	    }
1066 	}else{
1067 	    shift -= 4;
1068 	}
1069     }
1070     return;
1071 }
1072 
1073 static void
encode_fallback_html(nkf_char c)1074 encode_fallback_html(nkf_char c)
1075 {
1076     (*oconv)(0, '&');
1077     (*oconv)(0, '#');
1078     c &= VALUE_MASK;
1079     if(c >= NKF_INT32_C(1000000))
1080 	(*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081     if(c >= NKF_INT32_C(100000))
1082 	(*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083     if(c >= 10000)
1084 	(*oconv)(0, 0x30+(c/10000  )%10);
1085     if(c >= 1000)
1086 	(*oconv)(0, 0x30+(c/1000   )%10);
1087     if(c >= 100)
1088 	(*oconv)(0, 0x30+(c/100    )%10);
1089     if(c >= 10)
1090 	(*oconv)(0, 0x30+(c/10     )%10);
1091     if(c >= 0)
1092 	(*oconv)(0, 0x30+ c         %10);
1093     (*oconv)(0, ';');
1094     return;
1095 }
1096 
1097 static void
encode_fallback_xml(nkf_char c)1098 encode_fallback_xml(nkf_char c)
1099 {
1100     (*oconv)(0, '&');
1101     (*oconv)(0, '#');
1102     (*oconv)(0, 'x');
1103     nkf_each_char_to_hex(oconv, c);
1104     (*oconv)(0, ';');
1105     return;
1106 }
1107 
1108 static void
encode_fallback_java(nkf_char c)1109 encode_fallback_java(nkf_char c)
1110 {
1111     (*oconv)(0, '\\');
1112     c &= VALUE_MASK;
1113     if(!nkf_char_unicode_bmp_p(c)){
1114         int high = (c >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
1115         int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
1116 	(*oconv)(0, 'u');
1117 	(*oconv)(0, bin2hex(high>>12));
1118 	(*oconv)(0, bin2hex(high>> 8));
1119 	(*oconv)(0, bin2hex(high>> 4));
1120 	(*oconv)(0, bin2hex(high    ));
1121 	(*oconv)(0, '\\');
1122 	(*oconv)(0, 'u');
1123 	(*oconv)(0, bin2hex(low>>12));
1124 	(*oconv)(0, bin2hex(low>> 8));
1125 	(*oconv)(0, bin2hex(low>> 4));
1126 	(*oconv)(0, bin2hex(low    ));
1127     }else{
1128 	(*oconv)(0, 'u');
1129 	(*oconv)(0, bin2hex(c>>12));
1130 	(*oconv)(0, bin2hex(c>> 8));
1131 	(*oconv)(0, bin2hex(c>> 4));
1132 	(*oconv)(0, bin2hex(c    ));
1133     }
1134     return;
1135 }
1136 
1137 static void
encode_fallback_perl(nkf_char c)1138 encode_fallback_perl(nkf_char c)
1139 {
1140     (*oconv)(0, '\\');
1141     (*oconv)(0, 'x');
1142     (*oconv)(0, '{');
1143     nkf_each_char_to_hex(oconv, c);
1144     (*oconv)(0, '}');
1145     return;
1146 }
1147 
1148 static void
encode_fallback_subchar(nkf_char c)1149 encode_fallback_subchar(nkf_char c)
1150 {
1151     c = unicode_subchar;
1152     (*oconv)((c>>8)&0xFF, c&0xFF);
1153     return;
1154 }
1155 #endif
1156 
1157 static const struct {
1158     const char *name;
1159     const char *alias;
1160 } long_option[] = {
1161     {"ic=", ""},
1162     {"oc=", ""},
1163     {"base64","jMB"},
1164     {"euc","e"},
1165     {"euc-input","E"},
1166     {"fj","jm"},
1167     {"help",""},
1168     {"jis","j"},
1169     {"jis-input","J"},
1170     {"mac","sLm"},
1171     {"mime","jM"},
1172     {"mime-input","m"},
1173     {"msdos","sLw"},
1174     {"sjis","s"},
1175     {"sjis-input","S"},
1176     {"unix","eLu"},
1177     {"version","v"},
1178     {"windows","sLw"},
1179     {"hiragana","h1"},
1180     {"katakana","h2"},
1181     {"katakana-hiragana","h3"},
1182     {"guess=", ""},
1183     {"guess", "g2"},
1184     {"cp932", ""},
1185     {"no-cp932", ""},
1186 #ifdef X0212_ENABLE
1187     {"x0212", ""},
1188 #endif
1189 #ifdef UTF8_OUTPUT_ENABLE
1190     {"utf8", "w"},
1191     {"utf16", "w16"},
1192     {"ms-ucs-map", ""},
1193     {"fb-skip", ""},
1194     {"fb-html", ""},
1195     {"fb-xml", ""},
1196     {"fb-perl", ""},
1197     {"fb-java", ""},
1198     {"fb-subchar", ""},
1199     {"fb-subchar=", ""},
1200 #endif
1201 #ifdef UTF8_INPUT_ENABLE
1202     {"utf8-input", "W"},
1203     {"utf16-input", "W16"},
1204     {"no-cp932ext", ""},
1205     {"no-best-fit-chars",""},
1206 #endif
1207 #ifdef UNICODE_NORMALIZATION
1208     {"utf8mac-input", ""},
1209 #endif
1210 #ifdef OVERWRITE
1211     {"overwrite", ""},
1212     {"overwrite=", ""},
1213     {"in-place", ""},
1214     {"in-place=", ""},
1215 #endif
1216 #ifdef INPUT_OPTION
1217     {"cap-input", ""},
1218     {"url-input", ""},
1219 #endif
1220 #ifdef NUMCHAR_OPTION
1221     {"numchar-input", ""},
1222 #endif
1223 #ifdef CHECK_OPTION
1224     {"no-output", ""},
1225     {"debug", ""},
1226 #endif
1227 #ifdef SHIFTJIS_CP932
1228     {"cp932inv", ""},
1229 #endif
1230 #ifdef EXEC_IO
1231     {"exec-in", ""},
1232     {"exec-out", ""},
1233 #endif
1234     {"prefix=", ""},
1235 };
1236 
1237 static void
set_input_encoding(nkf_encoding * enc)1238 set_input_encoding(nkf_encoding *enc)
1239 {
1240     switch (nkf_enc_to_index(enc)) {
1241     case ISO_8859_1:
1242 	iso8859_f = TRUE;
1243 	break;
1244     case CP50221:
1245     case CP50222:
1246 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1247     case CP50220:
1248 #ifdef SHIFTJIS_CP932
1249 	cp51932_f = TRUE;
1250 #endif
1251 #ifdef UTF8_OUTPUT_ENABLE
1252 	ms_ucs_map_f = UCS_MAP_CP932;
1253 #endif
1254 	break;
1255     case ISO_2022_JP_1:
1256 	x0212_f = TRUE;
1257 	break;
1258     case ISO_2022_JP_3:
1259 	x0212_f = TRUE;
1260 	x0213_f = TRUE;
1261 	break;
1262     case ISO_2022_JP_2004:
1263 	x0212_f = TRUE;
1264 	x0213_f = TRUE;
1265 	break;
1266     case SHIFT_JIS:
1267 	break;
1268     case WINDOWS_31J:
1269 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1270 #ifdef SHIFTJIS_CP932
1271 	cp51932_f = TRUE;
1272 #endif
1273 #ifdef UTF8_OUTPUT_ENABLE
1274 	ms_ucs_map_f = UCS_MAP_CP932;
1275 #endif
1276 	break;
1277 	break;
1278     case CP10001:
1279 #ifdef SHIFTJIS_CP932
1280 	cp51932_f = TRUE;
1281 #endif
1282 #ifdef UTF8_OUTPUT_ENABLE
1283 	ms_ucs_map_f = UCS_MAP_CP10001;
1284 #endif
1285 	break;
1286     case EUC_JP:
1287 	break;
1288     case EUCJP_NKF:
1289 	break;
1290     case CP51932:
1291 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1292 #ifdef SHIFTJIS_CP932
1293 	cp51932_f = TRUE;
1294 #endif
1295 #ifdef UTF8_OUTPUT_ENABLE
1296 	ms_ucs_map_f = UCS_MAP_CP932;
1297 #endif
1298 	break;
1299     case EUCJP_MS:
1300 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1301 #ifdef SHIFTJIS_CP932
1302 	cp51932_f = FALSE;
1303 #endif
1304 #ifdef UTF8_OUTPUT_ENABLE
1305 	ms_ucs_map_f = UCS_MAP_MS;
1306 #endif
1307 	break;
1308     case EUCJP_ASCII:
1309 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1310 #ifdef SHIFTJIS_CP932
1311 	cp51932_f = FALSE;
1312 #endif
1313 #ifdef UTF8_OUTPUT_ENABLE
1314 	ms_ucs_map_f = UCS_MAP_ASCII;
1315 #endif
1316 	break;
1317     case SHIFT_JISX0213:
1318     case SHIFT_JIS_2004:
1319 	x0213_f = TRUE;
1320 #ifdef SHIFTJIS_CP932
1321 	cp51932_f = FALSE;
1322 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1323 #endif
1324 	break;
1325     case EUC_JISX0213:
1326     case EUC_JIS_2004:
1327 	x0213_f = TRUE;
1328 #ifdef SHIFTJIS_CP932
1329 	cp51932_f = FALSE;
1330 #endif
1331 	break;
1332 #ifdef UTF8_INPUT_ENABLE
1333 #ifdef UNICODE_NORMALIZATION
1334     case UTF8_MAC:
1335 	nfc_f = TRUE;
1336 	break;
1337 #endif
1338     case UTF_16:
1339     case UTF_16BE:
1340     case UTF_16BE_BOM:
1341 	input_endian = ENDIAN_BIG;
1342 	break;
1343     case UTF_16LE:
1344     case UTF_16LE_BOM:
1345 	input_endian = ENDIAN_LITTLE;
1346 	break;
1347     case UTF_32:
1348     case UTF_32BE:
1349     case UTF_32BE_BOM:
1350 	input_endian = ENDIAN_BIG;
1351 	break;
1352     case UTF_32LE:
1353     case UTF_32LE_BOM:
1354 	input_endian = ENDIAN_LITTLE;
1355 	break;
1356 #endif
1357     }
1358 }
1359 
1360 static void
set_output_encoding(nkf_encoding * enc)1361 set_output_encoding(nkf_encoding *enc)
1362 {
1363     switch (nkf_enc_to_index(enc)) {
1364     case CP50220:
1365 #ifdef SHIFTJIS_CP932
1366 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1367 #endif
1368 #ifdef UTF8_OUTPUT_ENABLE
1369 	ms_ucs_map_f = UCS_MAP_CP932;
1370 #endif
1371 	break;
1372     case CP50221:
1373 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1374 #ifdef SHIFTJIS_CP932
1375 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1378 	ms_ucs_map_f = UCS_MAP_CP932;
1379 #endif
1380 	break;
1381     case ISO_2022_JP:
1382 #ifdef SHIFTJIS_CP932
1383 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1384 #endif
1385 	break;
1386     case ISO_2022_JP_1:
1387 	x0212_f = TRUE;
1388 #ifdef SHIFTJIS_CP932
1389 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390 #endif
1391 	break;
1392     case ISO_2022_JP_3:
1393     case ISO_2022_JP_2004:
1394 	x0212_f = TRUE;
1395 	x0213_f = TRUE;
1396 #ifdef SHIFTJIS_CP932
1397 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1398 #endif
1399 	break;
1400     case SHIFT_JIS:
1401 	break;
1402     case WINDOWS_31J:
1403 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1404 #ifdef UTF8_OUTPUT_ENABLE
1405 	ms_ucs_map_f = UCS_MAP_CP932;
1406 #endif
1407 	break;
1408     case CP10001:
1409 #ifdef UTF8_OUTPUT_ENABLE
1410 	ms_ucs_map_f = UCS_MAP_CP10001;
1411 #endif
1412 	break;
1413     case EUC_JP:
1414 	x0212_f = TRUE;
1415 #ifdef SHIFTJIS_CP932
1416 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1417 #endif
1418 #ifdef UTF8_OUTPUT_ENABLE
1419 	ms_ucs_map_f = UCS_MAP_ASCII;
1420 #endif
1421 	break;
1422     case EUCJP_NKF:
1423 	x0212_f = FALSE;
1424 #ifdef SHIFTJIS_CP932
1425 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1426 #endif
1427 #ifdef UTF8_OUTPUT_ENABLE
1428 	ms_ucs_map_f = UCS_MAP_ASCII;
1429 #endif
1430 	break;
1431     case CP51932:
1432 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1433 #ifdef SHIFTJIS_CP932
1434 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1435 #endif
1436 #ifdef UTF8_OUTPUT_ENABLE
1437 	ms_ucs_map_f = UCS_MAP_CP932;
1438 #endif
1439 	break;
1440     case EUCJP_MS:
1441 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1442 	x0212_f = TRUE;
1443 #ifdef UTF8_OUTPUT_ENABLE
1444 	ms_ucs_map_f = UCS_MAP_MS;
1445 #endif
1446 	break;
1447     case EUCJP_ASCII:
1448 	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1449 	x0212_f = TRUE;
1450 #ifdef UTF8_OUTPUT_ENABLE
1451 	ms_ucs_map_f = UCS_MAP_ASCII;
1452 #endif
1453 	break;
1454     case SHIFT_JISX0213:
1455     case SHIFT_JIS_2004:
1456 	x0213_f = TRUE;
1457 #ifdef SHIFTJIS_CP932
1458 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459 #endif
1460 	break;
1461     case EUC_JISX0213:
1462     case EUC_JIS_2004:
1463 	x0212_f = TRUE;
1464 	x0213_f = TRUE;
1465 #ifdef SHIFTJIS_CP932
1466 	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1467 #endif
1468 	break;
1469 #ifdef UTF8_OUTPUT_ENABLE
1470     case UTF_8_BOM:
1471 	output_bom_f = TRUE;
1472 	break;
1473     case UTF_16:
1474     case UTF_16BE_BOM:
1475 	output_bom_f = TRUE;
1476 	break;
1477     case UTF_16LE:
1478 	output_endian = ENDIAN_LITTLE;
1479 	output_bom_f = FALSE;
1480 	break;
1481     case UTF_16LE_BOM:
1482 	output_endian = ENDIAN_LITTLE;
1483 	output_bom_f = TRUE;
1484 	break;
1485     case UTF_32:
1486     case UTF_32BE_BOM:
1487 	output_bom_f = TRUE;
1488 	break;
1489     case UTF_32LE:
1490 	output_endian = ENDIAN_LITTLE;
1491 	output_bom_f = FALSE;
1492 	break;
1493     case UTF_32LE_BOM:
1494 	output_endian = ENDIAN_LITTLE;
1495 	output_bom_f = TRUE;
1496 	break;
1497 #endif
1498     }
1499 }
1500 
1501 static struct input_code*
find_inputcode_byfunc(nkf_char (* iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))1502 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1503 {
1504     if (iconv_func){
1505 	struct input_code *p = input_code_list;
1506 	while (p->name){
1507 	    if (iconv_func == p->iconv_func){
1508 		return p;
1509 	    }
1510 	    p++;
1511 	}
1512     }
1513     return 0;
1514 }
1515 
1516 static void
set_iconv(nkf_char f,nkf_char (* iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))1517 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1518 {
1519 #ifdef INPUT_CODE_FIX
1520     if (f || !input_encoding)
1521 #endif
1522 	if (estab_f != f){
1523 	    estab_f = f;
1524 	}
1525 
1526     if (iconv_func
1527 #ifdef INPUT_CODE_FIX
1528 	&& (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1529 #endif
1530        ){
1531 	iconv = iconv_func;
1532     }
1533 #ifdef CHECK_OPTION
1534     if (estab_f && iconv_for_check != iconv){
1535 	struct input_code *p = find_inputcode_byfunc(iconv);
1536 	if (p){
1537 	    set_input_codename(p->name);
1538 	    debug(p->name);
1539 	}
1540 	iconv_for_check = iconv;
1541     }
1542 #endif
1543 }
1544 
1545 #ifdef X0212_ENABLE
1546 static nkf_char
x0212_shift(nkf_char c)1547 x0212_shift(nkf_char c)
1548 {
1549     nkf_char ret = c;
1550     c &= 0x7f;
1551     if (is_eucg3(ret)){
1552 	if (0x75 <= c && c <= 0x7f){
1553 	    ret = c + (0x109 - 0x75);
1554 	}
1555     }else{
1556 	if (0x75 <= c && c <= 0x7f){
1557 	    ret = c + (0x113 - 0x75);
1558 	}
1559     }
1560     return ret;
1561 }
1562 
1563 
1564 static nkf_char
x0212_unshift(nkf_char c)1565 x0212_unshift(nkf_char c)
1566 {
1567     nkf_char ret = c;
1568     if (0x7f <= c && c <= 0x88){
1569 	ret = c + (0x75 - 0x7f);
1570     }else if (0x89 <= c && c <= 0x92){
1571 	ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1572     }
1573     return ret;
1574 }
1575 #endif /* X0212_ENABLE */
1576 
1577 static int
is_x0213_2_in_x0212(nkf_char c1)1578 is_x0213_2_in_x0212(nkf_char c1)
1579 {
1580     static const char x0213_2_table[] =
1581 	{0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1582     int ku = c1 - 0x20;
1583     if (ku <= 15)
1584 	return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1585     if (78 <= ku && ku <= 94)
1586 	return 1;
1587     return 0;
1588 }
1589 
1590 static nkf_char
e2s_conv(nkf_char c2,nkf_char c1,nkf_char * p2,nkf_char * p1)1591 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1592 {
1593     nkf_char ndx;
1594     if (is_eucg3(c2)){
1595 	ndx = c2 & 0x7f;
1596 	if (x0213_f && is_x0213_2_in_x0212(ndx)){
1597 	    if((0x21 <= ndx && ndx <= 0x2F)){
1598 		if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1599 		if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1600 		return 0;
1601 	    }else if(0x6E <= ndx && ndx <= 0x7E){
1602 		if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1603 		if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1604 		return 0;
1605 	    }
1606 	    return 1;
1607 	}
1608 #ifdef X0212_ENABLE
1609 	else if(nkf_isgraph(ndx)){
1610 	    nkf_char val = 0;
1611 	    const unsigned short *ptr;
1612 	    ptr = x0212_shiftjis[ndx - 0x21];
1613 	    if (ptr){
1614 		val = ptr[(c1 & 0x7f) - 0x21];
1615 	    }
1616 	    if (val){
1617 		c2 = val >> 8;
1618 		c1 = val & 0xff;
1619 		if (p2) *p2 = c2;
1620 		if (p1) *p1 = c1;
1621 		return 0;
1622 	    }
1623 	    c2 = x0212_shift(c2);
1624 	}
1625 #endif /* X0212_ENABLE */
1626     }
1627     if(0x7F < c2) return 1;
1628     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1629     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1630     return 0;
1631 }
1632 
1633 static nkf_char
s2e_conv(nkf_char c2,nkf_char c1,nkf_char * p2,nkf_char * p1)1634 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1635 {
1636 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1637     nkf_char val;
1638 #endif
1639     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1640     if (0xFC < c1) return 1;
1641 #ifdef SHIFTJIS_CP932
1642     if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1643 	val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1644 	if (val){
1645 	    c2 = val >> 8;
1646 	    c1 = val & 0xff;
1647 	}
1648     }
1649     if (cp932inv_f
1650 	&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1651 	val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1652 	if (val){
1653 	    c2 = val >> 8;
1654 	    c1 = val & 0xff;
1655 	}
1656     }
1657 #endif /* SHIFTJIS_CP932 */
1658 #ifdef X0212_ENABLE
1659     if (!x0213_f && is_ibmext_in_sjis(c2)){
1660 	val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1661 	if (val){
1662 	    if (val > 0x7FFF){
1663 		c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1664 		c1 = val & 0xff;
1665 	    }else{
1666 		c2 = val >> 8;
1667 		c1 = val & 0xff;
1668 	    }
1669 	    if (p2) *p2 = c2;
1670 	    if (p1) *p1 = c1;
1671 	    return 0;
1672 	}
1673     }
1674 #endif
1675     if(c2 >= 0x80){
1676 	if(x0213_f && c2 >= 0xF0){
1677 	    if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1678 		c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1679 	    }else{ /* 78<=k<=94 */
1680 		c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1681 		if (0x9E < c1) c2++;
1682 	    }
1683 	}else{
1684 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
1685 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
1686 	    c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1687 	    if (0x9E < c1) c2++;
1688 	}
1689 	if (c1 < 0x9F)
1690 	    c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1691 	else {
1692 	    c1 = c1 - 0x7E;
1693 	}
1694     }
1695 
1696 #ifdef X0212_ENABLE
1697     c2 = x0212_unshift(c2);
1698 #endif
1699     if (p2) *p2 = c2;
1700     if (p1) *p1 = c1;
1701     return 0;
1702 }
1703 
1704 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1705 static void
nkf_unicode_to_utf8(nkf_char val,nkf_char * p1,nkf_char * p2,nkf_char * p3,nkf_char * p4)1706 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1707 {
1708     val &= VALUE_MASK;
1709     if (val < 0x80){
1710 	*p1 = val;
1711 	*p2 = 0;
1712 	*p3 = 0;
1713 	*p4 = 0;
1714     }else if (val < 0x800){
1715 	*p1 = 0xc0 | (val >> 6);
1716 	*p2 = 0x80 | (val & 0x3f);
1717 	*p3 = 0;
1718 	*p4 = 0;
1719     } else if (nkf_char_unicode_bmp_p(val)) {
1720 	*p1 = 0xe0 |  (val >> 12);
1721 	*p2 = 0x80 | ((val >>  6) & 0x3f);
1722 	*p3 = 0x80 | ( val        & 0x3f);
1723 	*p4 = 0;
1724     } else if (nkf_char_unicode_value_p(val)) {
1725 	*p1 = 0xf0 |  (val >> 18);
1726 	*p2 = 0x80 | ((val >> 12) & 0x3f);
1727 	*p3 = 0x80 | ((val >>  6) & 0x3f);
1728 	*p4 = 0x80 | ( val        & 0x3f);
1729     } else {
1730 	*p1 = 0;
1731 	*p2 = 0;
1732 	*p3 = 0;
1733 	*p4 = 0;
1734     }
1735 }
1736 
1737 static nkf_char
nkf_utf8_to_unicode(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)1738 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1739 {
1740     nkf_char wc;
1741     if (c1 <= 0x7F) {
1742 	/* single byte */
1743 	wc = c1;
1744     }
1745     else if (c1 <= 0xC1) {
1746 	/* trail byte or invalid */
1747 	return -1;
1748     }
1749     else if (c1 <= 0xDF) {
1750 	/* 2 bytes */
1751 	wc  = (c1 & 0x1F) << 6;
1752 	wc |= (c2 & 0x3F);
1753     }
1754     else if (c1 <= 0xEF) {
1755 	/* 3 bytes */
1756 	wc  = (c1 & 0x0F) << 12;
1757 	wc |= (c2 & 0x3F) << 6;
1758 	wc |= (c3 & 0x3F);
1759     }
1760     else if (c2 <= 0xF4) {
1761 	/* 4 bytes */
1762 	wc  = (c1 & 0x0F) << 18;
1763 	wc |= (c2 & 0x3F) << 12;
1764 	wc |= (c3 & 0x3F) << 6;
1765 	wc |= (c4 & 0x3F);
1766     }
1767     else {
1768 	return -1;
1769     }
1770     return wc;
1771 }
1772 #endif
1773 
1774 #ifdef UTF8_INPUT_ENABLE
1775 static int
unicode_to_jis_common2(nkf_char c1,nkf_char c0,const unsigned short * const * pp,nkf_char psize,nkf_char * p2,nkf_char * p1)1776 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1777 		       const unsigned short *const *pp, nkf_char psize,
1778 		       nkf_char *p2, nkf_char *p1)
1779 {
1780     nkf_char c2;
1781     const unsigned short *p;
1782     unsigned short val;
1783 
1784     if (pp == 0) return 1;
1785 
1786     c1 -= 0x80;
1787     if (c1 < 0 || psize <= c1) return 1;
1788     p = pp[c1];
1789     if (p == 0)  return 1;
1790 
1791     c0 -= 0x80;
1792     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1793     val = p[c0];
1794     if (val == 0) return 1;
1795     if (no_cp932ext_f && (
1796 			  (val>>8) == 0x2D || /* NEC special characters */
1797 			  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1798 			 )) return 1;
1799 
1800     c2 = val >> 8;
1801     if (val > 0x7FFF){
1802 	c2 &= 0x7f;
1803 	c2 |= PREFIX_EUCG3;
1804     }
1805     if (c2 == SO) c2 = JIS_X_0201_1976_K;
1806     c1 = val & 0xFF;
1807     if (p2) *p2 = c2;
1808     if (p1) *p1 = c1;
1809     return 0;
1810 }
1811 
1812 static int
unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char * p2,nkf_char * p1)1813 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1814 {
1815     const unsigned short *const *pp;
1816     const unsigned short *const *const *ppp;
1817     static const char no_best_fit_chars_table_C2[] =
1818     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1819 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1820 	1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1821 	0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1822     static const char no_best_fit_chars_table_C2_ms[] =
1823     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1824 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1825 	1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1826 	0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1827     static const char no_best_fit_chars_table_932_C2[] =
1828     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1829 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1830 	1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1831 	0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1832     static const char no_best_fit_chars_table_932_C3[] =
1833     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1834 	1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1835 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1836 	1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1837     nkf_char ret = 0;
1838 
1839     if(c2 < 0x80){
1840 	*p2 = 0;
1841 	*p1 = c2;
1842     }else if(c2 < 0xe0){
1843 	if(no_best_fit_chars_f){
1844 	    if(ms_ucs_map_f == UCS_MAP_CP932){
1845 		switch(c2){
1846 		case 0xC2:
1847 		    if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1848 		    break;
1849 		case 0xC3:
1850 		    if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1851 		    break;
1852 		}
1853 	    }else if(!cp932inv_f){
1854 		switch(c2){
1855 		case 0xC2:
1856 		    if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1857 		    break;
1858 		case 0xC3:
1859 		    if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1860 		    break;
1861 		}
1862 	    }else if(ms_ucs_map_f == UCS_MAP_MS){
1863 		if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1864 	    }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1865 		switch(c2){
1866 		case 0xC2:
1867 		    switch(c1){
1868 		    case 0xA2:
1869 		    case 0xA3:
1870 		    case 0xA5:
1871 		    case 0xA6:
1872 		    case 0xAC:
1873 		    case 0xAF:
1874 		    case 0xB8:
1875 			return 1;
1876 		    }
1877 		    break;
1878 		}
1879 	    }
1880 	}
1881 	pp =
1882 	    ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1883 	    ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1884 	    ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1885 	    x0213_f ? utf8_to_euc_2bytes_x0213 :
1886 	    utf8_to_euc_2bytes;
1887 	ret =  unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1888     }else if(c0 < 0xF0){
1889 	if(no_best_fit_chars_f){
1890 	    if(ms_ucs_map_f == UCS_MAP_CP932){
1891 		if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1892 	    }else if(ms_ucs_map_f == UCS_MAP_MS){
1893 		switch(c2){
1894 		case 0xE2:
1895 		    switch(c1){
1896 		    case 0x80:
1897 			if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1898 			break;
1899 		    case 0x88:
1900 			if(c0 == 0x92) return 1;
1901 			break;
1902 		    }
1903 		    break;
1904 		case 0xE3:
1905 		    if(c1 == 0x80 || c0 == 0x9C) return 1;
1906 		    break;
1907 		}
1908 	    }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1909 		switch(c2){
1910 		case 0xE3:
1911 		    switch(c1){
1912 		    case 0x82:
1913 			if(c0 == 0x94) return 1;
1914 			break;
1915 		    case 0x83:
1916 			if(c0 == 0xBB) return 1;
1917 			break;
1918 		    }
1919 		    break;
1920 		}
1921 	    }else{
1922 		switch(c2){
1923 		case 0xE2:
1924 		    switch(c1){
1925 		    case 0x80:
1926 			if(c0 == 0x95) return 1;
1927 			break;
1928 		    case 0x88:
1929 			if(c0 == 0xA5) return 1;
1930 			break;
1931 		    }
1932 		    break;
1933 		case 0xEF:
1934 		    switch(c1){
1935 		    case 0xBC:
1936 			if(c0 == 0x8D) return 1;
1937 			break;
1938 		    case 0xBD:
1939 			if(c0 == 0x9E && !cp932inv_f) return 1;
1940 			break;
1941 		    case 0xBF:
1942 			if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1943 			break;
1944 		    }
1945 		    break;
1946 		}
1947 	    }
1948 	}
1949 	ppp =
1950 	    ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1951 	    ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1952 	    ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1953 	    x0213_f ? utf8_to_euc_3bytes_x0213 :
1954 	    utf8_to_euc_3bytes;
1955 	ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1956     }else return -1;
1957 #ifdef SHIFTJIS_CP932
1958     if (!ret&& is_eucg3(*p2)) {
1959 	if (cp932inv_f) {
1960 	    if (encode_fallback) ret = 1;
1961 	}
1962 	else {
1963 	    nkf_char s2, s1;
1964 	    if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1965 		s2e_conv(s2, s1, p2, p1);
1966 	    }else{
1967 		ret = 1;
1968 	    }
1969 	}
1970     }
1971 #endif
1972     return ret;
1973 }
1974 
1975 #ifdef UTF8_OUTPUT_ENABLE
1976 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1977 	int i; \
1978 	for (i = 0; i < size; i++) \
1979 	    if (tbl[i][0] == euc) { \
1980 		low = tbl[i][2]; \
1981 		break; \
1982 	    } \
1983     } while (0)
1984 
1985 static nkf_char
e2w_conv(nkf_char c2,nkf_char c1)1986 e2w_conv(nkf_char c2, nkf_char c1)
1987 {
1988     const unsigned short *p;
1989 
1990     if (c2 == JIS_X_0201_1976_K) {
1991 	if (ms_ucs_map_f == UCS_MAP_CP10001) {
1992 	    switch (c1) {
1993 	    case 0x20:
1994 		return 0xA0;
1995 	    case 0x7D:
1996 		return 0xA9;
1997 	    }
1998 	}
1999 	p = euc_to_utf8_1byte;
2000 #ifdef X0212_ENABLE
2001     } else if (is_eucg3(c2)){
2002 	if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
2003 	    return 0xA6;
2004 	}
2005 	c2 = (c2&0x7f) - 0x21;
2006 	if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2007 	    p =
2008 		x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
2009 		x0212_to_utf8_2bytes[c2];
2010 	else
2011 	    return 0;
2012 #endif
2013     } else {
2014 	c2 &= 0x7f;
2015 	c2 = (c2&0x7f) - 0x21;
2016 	if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2017 	    p =
2018 		x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
2019 		ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
2020 		ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
2021 		euc_to_utf8_2bytes_ms[c2];
2022 	else
2023 	    return 0;
2024     }
2025     if (!p) return 0;
2026     c1 = (c1 & 0x7f) - 0x21;
2027     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2028 	nkf_char val = p[c1];
2029 	if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2030 	    nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2031 	    nkf_char low = 0;
2032 	    if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2033 		X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2034 	    } else {
2035 		X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2036 	    }
2037 	    if (!low) return 0;
2038 	    return UTF16_TO_UTF32(val, low);
2039 	} else {
2040 	    return val;
2041 	}
2042     }
2043     return 0;
2044 }
2045 
2046 static nkf_char
e2w_combining(nkf_char comb,nkf_char c2,nkf_char c1)2047 e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
2048 {
2049     nkf_char euc;
2050     int i;
2051     for (i = 0; i < sizeof_x0213_combining_chars; i++)
2052 	if (x0213_combining_chars[i] == comb)
2053 	    break;
2054     if (i >= sizeof_x0213_combining_chars)
2055 	return 0;
2056     euc = (c2&0x7f)<<8 | (c1&0x7f);
2057     for (i = 0; i < sizeof_x0213_combining_table; i++)
2058 	if (x0213_combining_table[i][0] == euc)
2059 	    return x0213_combining_table[i][1];
2060     return 0;
2061 }
2062 #endif
2063 
2064 static nkf_char
w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char * p2,nkf_char * p1)2065 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2066 {
2067     nkf_char ret = 0;
2068 
2069     if (!c1){
2070 	*p2 = 0;
2071 	*p1 = c2;
2072     }else if (0xc0 <= c2 && c2 <= 0xef) {
2073 	ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
2074 #ifdef NUMCHAR_OPTION
2075 	if (ret > 0){
2076 	    if (p2) *p2 = 0;
2077 	    if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2078 	    ret = 0;
2079 	}
2080 #endif
2081     }
2082     return ret;
2083 }
2084 
2085 #ifdef UTF8_INPUT_ENABLE
2086 static nkf_char
w16e_conv(nkf_char val,nkf_char * p2,nkf_char * p1)2087 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
2088 {
2089     nkf_char c1, c2, c3, c4;
2090     nkf_char ret = 0;
2091     val &= VALUE_MASK;
2092     if (val < 0x80) {
2093 	*p2 = 0;
2094 	*p1 = val;
2095     }
2096     else if (nkf_char_unicode_bmp_p(val)){
2097 	nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2098 	ret =  unicode_to_jis_common(c1, c2, c3, p2, p1);
2099 	if (ret > 0){
2100 	    *p2 = 0;
2101 	    *p1 = nkf_char_unicode_new(val);
2102 	    ret = 0;
2103 	}
2104     }
2105     else {
2106 	int i;
2107 	if (x0213_f) {
2108 	    c1 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
2109 	    c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2110 	    for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2111 		if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2112 		    val = x0213_1_surrogate_table[i][0];
2113 		    *p2 = val >> 8;
2114 		    *p1 = val & 0xFF;
2115 		    return 0;
2116 		}
2117 	    for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2118 		if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2119 		    val = x0213_2_surrogate_table[i][0];
2120 		    *p2 = PREFIX_EUCG3 | (val >> 8);
2121 		    *p1 = val & 0xFF;
2122 		    return 0;
2123 		}
2124 	}
2125 	*p2 = 0;
2126 	*p1 = nkf_char_unicode_new(val);
2127     }
2128     return ret;
2129 }
2130 #endif
2131 
2132 static nkf_char
e_iconv(nkf_char c2,nkf_char c1,nkf_char c0)2133 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2134 {
2135     if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2136 	if (iso2022jp_f && !x0201_f) {
2137 	    c2 = GETA1; c1 = GETA2;
2138 	} else {
2139 	    c2 = JIS_X_0201_1976_K;
2140 	    c1 &= 0x7f;
2141 	}
2142 #ifdef X0212_ENABLE
2143     }else if (c2 == 0x8f){
2144 	if (c0 == 0){
2145 	    return -1;
2146 	}
2147 	if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2148 	    /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2149 	    c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2150 	    c2 = 0;
2151 	} else {
2152 	    c2 = (c2 << 8) | (c1 & 0x7f);
2153 	    c1 = c0 & 0x7f;
2154 #ifdef SHIFTJIS_CP932
2155 	    if (cp51932_f){
2156 		nkf_char s2, s1;
2157 		if (e2s_conv(c2, c1, &s2, &s1) == 0){
2158 		    s2e_conv(s2, s1, &c2, &c1);
2159 		    if (c2 < 0x100){
2160 			c1 &= 0x7f;
2161 			c2 &= 0x7f;
2162 		    }
2163 		}
2164 	    }
2165 #endif /* SHIFTJIS_CP932 */
2166 	}
2167 #endif /* X0212_ENABLE */
2168     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2169 	/* NOP */
2170     } else {
2171 	if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2172 	    /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2173 	    c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2174 	    c2 = 0;
2175 	} else {
2176 	    c1 &= 0x7f;
2177 	    c2 &= 0x7f;
2178 #ifdef SHIFTJIS_CP932
2179 	    if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2180 		nkf_char s2, s1;
2181 		if (e2s_conv(c2, c1, &s2, &s1) == 0){
2182 		    s2e_conv(s2, s1, &c2, &c1);
2183 		    if (c2 < 0x100){
2184 			c1 &= 0x7f;
2185 			c2 &= 0x7f;
2186 		    }
2187 		}
2188 	    }
2189 #endif /* SHIFTJIS_CP932 */
2190 	}
2191     }
2192     (*oconv)(c2, c1);
2193     return 0;
2194 }
2195 
2196 static nkf_char
s_iconv(ARG_UNUSED nkf_char c2,nkf_char c1,ARG_UNUSED nkf_char c0)2197 s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2198 {
2199     if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2200 	if (iso2022jp_f && !x0201_f) {
2201 	    c2 = GETA1; c1 = GETA2;
2202 	} else {
2203 	    c1 &= 0x7f;
2204 	}
2205     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2206 	/* NOP */
2207     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2208 	/* CP932 UDC */
2209 	if(c1 == 0x7F) return 0;
2210 	c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2211 	c2 = 0;
2212     } else {
2213 	nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2214 	if (ret) return ret;
2215     }
2216     (*oconv)(c2, c1);
2217     return 0;
2218 }
2219 
2220 static int
x0213_wait_combining_p(nkf_char wc)2221 x0213_wait_combining_p(nkf_char wc)
2222 {
2223     int i;
2224     for (i = 0; i < sizeof_x0213_combining_table; i++) {
2225 	if (x0213_combining_table[i][1] == wc) {
2226 	    return TRUE;
2227 	}
2228     }
2229     return FALSE;
2230 }
2231 
2232 static int
x0213_combining_p(nkf_char wc)2233 x0213_combining_p(nkf_char wc)
2234 {
2235     int i;
2236     for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2237 	if (x0213_combining_chars[i] == wc) {
2238 	    return TRUE;
2239 	}
2240     }
2241     return FALSE;
2242 }
2243 
2244 static nkf_char
w_iconv(nkf_char c1,nkf_char c2,nkf_char c3)2245 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2246 {
2247     nkf_char ret = 0, c4 = 0;
2248     static const char w_iconv_utf8_1st_byte[] =
2249     { /* 0xC0 - 0xFF */
2250 	20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2251 	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2252 	30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2253 	40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2254 
2255     if (c3 > 0xFF) {
2256 	c4 = c3 & 0xFF;
2257 	c3 >>= 8;
2258     }
2259 
2260     if (c1 < 0 || 0xff < c1) {
2261     }else if (c1 == 0) { /* 0 : 1 byte*/
2262 	c3 = 0;
2263     } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2264 	return 0;
2265     } else{
2266 	switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2267 	case 21:
2268 	    if (c2 < 0x80 || 0xBF < c2) return 0;
2269 	    break;
2270 	case 30:
2271 	    if (c3 == 0) return -1;
2272 	    if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2273 		return 0;
2274 	    break;
2275 	case 31:
2276 	case 33:
2277 	    if (c3 == 0) return -1;
2278 	    if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2279 		return 0;
2280 	    break;
2281 	case 32:
2282 	    if (c3 == 0) return -1;
2283 	    if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2284 		return 0;
2285 	    break;
2286 	case 40:
2287 	    if (c3 == 0) return -2;
2288 	    if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2289 		return 0;
2290 	    break;
2291 	case 41:
2292 	    if (c3 == 0) return -2;
2293 	    if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2294 		return 0;
2295 	    break;
2296 	case 42:
2297 	    if (c3 == 0) return -2;
2298 	    if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2299 		return 0;
2300 	    break;
2301 	default:
2302 	    return 0;
2303 	    break;
2304 	}
2305     }
2306     if (c1 == 0 || c1 == EOF){
2307     } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2308 	c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2309 	c1 = 0;
2310     } else {
2311 	if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2312 	    return -3;
2313 	ret = w2e_conv(c1, c2, c3, &c1, &c2);
2314     }
2315     if (ret == 0){
2316 	(*oconv)(c1, c2);
2317     }
2318     return ret;
2319 }
2320 
2321 static nkf_char
w_iconv_nocombine(nkf_char c1,nkf_char c2,nkf_char c3)2322 w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
2323 {
2324     /* continue from the line below 'return -3;' in w_iconv() */
2325     nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2326     if (ret == 0){
2327 	(*oconv)(c1, c2);
2328     }
2329     return ret;
2330 }
2331 
2332 #define NKF_ICONV_INVALID_CODE_RANGE -13
2333 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
2334 #define NKF_ICONV_NOT_COMBINED -15
2335 static size_t
unicode_iconv(nkf_char wc,int nocombine)2336 unicode_iconv(nkf_char wc, int nocombine)
2337 {
2338     nkf_char c1, c2;
2339     int ret = 0;
2340 
2341     if (wc < 0x80) {
2342 	c2 = 0;
2343 	c1 = wc;
2344     }else if ((wc>>11) == 27) {
2345 	/* unpaired surrogate */
2346 	return NKF_ICONV_INVALID_CODE_RANGE;
2347     }else if (wc < 0xFFFF) {
2348 	if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2349 	    return NKF_ICONV_WAIT_COMBINING_CHAR;
2350 	ret = w16e_conv(wc, &c2, &c1);
2351 	if (ret) return ret;
2352     }else if (wc < 0x10FFFF) {
2353 	c2 = 0;
2354 	c1 = nkf_char_unicode_new(wc);
2355     } else {
2356 	return NKF_ICONV_INVALID_CODE_RANGE;
2357     }
2358     (*oconv)(c2, c1);
2359     return 0;
2360 }
2361 
2362 static nkf_char
unicode_iconv_combine(nkf_char wc,nkf_char wc2)2363 unicode_iconv_combine(nkf_char wc, nkf_char wc2)
2364 {
2365     nkf_char c1, c2;
2366     int i;
2367 
2368     if (wc2 < 0x80) {
2369 	return NKF_ICONV_NOT_COMBINED;
2370     }else if ((wc2>>11) == 27) {
2371 	/* unpaired surrogate */
2372 	return NKF_ICONV_INVALID_CODE_RANGE;
2373     }else if (wc2 < 0xFFFF) {
2374 	if (!x0213_combining_p(wc2))
2375 	    return NKF_ICONV_NOT_COMBINED;
2376 	for (i = 0; i < sizeof_x0213_combining_table; i++) {
2377 	    if (x0213_combining_table[i][1] == wc &&
2378 		x0213_combining_table[i][2] == wc2) {
2379 		c2 = x0213_combining_table[i][0] >> 8;
2380 		c1 = x0213_combining_table[i][0] & 0x7f;
2381 		(*oconv)(c2, c1);
2382 		return 0;
2383 	    }
2384 	}
2385     }else if (wc2 < 0x10FFFF) {
2386 	return NKF_ICONV_NOT_COMBINED;
2387     } else {
2388 	return NKF_ICONV_INVALID_CODE_RANGE;
2389     }
2390     return NKF_ICONV_NOT_COMBINED;
2391 }
2392 
2393 static nkf_char
w_iconv_combine(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4,nkf_char c5,nkf_char c6)2394 w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
2395 {
2396     nkf_char wc, wc2;
2397     wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2398     wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2399     if (wc2 < 0)
2400 	return wc2;
2401     return unicode_iconv_combine(wc, wc2);
2402 }
2403 
2404 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2405 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2406 static size_t
nkf_iconv_utf_16(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)2407 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2408 {
2409     nkf_char wc;
2410 
2411     if (c1 == EOF) {
2412 	(*oconv)(EOF, 0);
2413 	return 0;
2414     }
2415 
2416     if (input_endian == ENDIAN_BIG) {
2417 	if (0xD8 <= c1 && c1 <= 0xDB) {
2418 	    if (0xDC <= c3 && c3 <= 0xDF) {
2419 		wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2420 	    } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2421 	} else {
2422 	    wc = c1 << 8 | c2;
2423 	}
2424     } else {
2425 	if (0xD8 <= c2 && c2 <= 0xDB) {
2426 	    if (0xDC <= c4 && c4 <= 0xDF) {
2427 		wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2428 	    } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2429 	} else {
2430 	    wc = c2 << 8 | c1;
2431 	}
2432     }
2433 
2434     return (*unicode_iconv)(wc, FALSE);
2435 }
2436 
2437 static size_t
nkf_iconv_utf_16_combine(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)2438 nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2439 {
2440     nkf_char wc, wc2;
2441 
2442     if (input_endian == ENDIAN_BIG) {
2443 	if (0xD8 <= c3 && c3 <= 0xDB) {
2444 	    return NKF_ICONV_NOT_COMBINED;
2445 	} else {
2446 	    wc = c1 << 8 | c2;
2447 	    wc2 = c3 << 8 | c4;
2448 	}
2449     } else {
2450 	if (0xD8 <= c2 && c2 <= 0xDB) {
2451 	    return NKF_ICONV_NOT_COMBINED;
2452 	} else {
2453 	    wc = c2 << 8 | c1;
2454 	    wc2 = c4 << 8 | c3;
2455 	}
2456     }
2457 
2458     return unicode_iconv_combine(wc, wc2);
2459 }
2460 
2461 static size_t
nkf_iconv_utf_16_nocombine(nkf_char c1,nkf_char c2)2462 nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
2463 {
2464     nkf_char wc;
2465     if (input_endian == ENDIAN_BIG)
2466 	wc = c1 << 8 | c2;
2467     else
2468 	wc = c2 << 8 | c1;
2469     return (*unicode_iconv)(wc, TRUE);
2470 }
2471 
2472 static nkf_char
w_iconv16(nkf_char c2,nkf_char c1,ARG_UNUSED nkf_char c0)2473 w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2474 {
2475     (*oconv)(c2, c1);
2476     return 16; /* different from w_iconv32 */
2477 }
2478 
2479 static nkf_char
w_iconv32(nkf_char c2,nkf_char c1,ARG_UNUSED nkf_char c0)2480 w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2481 {
2482     (*oconv)(c2, c1);
2483     return 32; /* different from w_iconv16 */
2484 }
2485 
2486 static nkf_char
utf32_to_nkf_char(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)2487 utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2488 {
2489     nkf_char wc;
2490 
2491     switch(input_endian){
2492     case ENDIAN_BIG:
2493 	wc = c2 << 16 | c3 << 8 | c4;
2494 	break;
2495     case ENDIAN_LITTLE:
2496 	wc = c3 << 16 | c2 << 8 | c1;
2497 	break;
2498     case ENDIAN_2143:
2499 	wc = c1 << 16 | c4 << 8 | c3;
2500 	break;
2501     case ENDIAN_3412:
2502 	wc = c4 << 16 | c1 << 8 | c2;
2503 	break;
2504     default:
2505 	return NKF_ICONV_INVALID_CODE_RANGE;
2506     }
2507     return wc;
2508 }
2509 
2510 static size_t
nkf_iconv_utf_32(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)2511 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2512 {
2513     nkf_char wc;
2514 
2515     if (c1 == EOF) {
2516 	(*oconv)(EOF, 0);
2517 	return 0;
2518     }
2519 
2520     wc = utf32_to_nkf_char(c1, c2, c3, c4);
2521     if (wc < 0)
2522 	return wc;
2523 
2524     return (*unicode_iconv)(wc, FALSE);
2525 }
2526 
2527 static nkf_char
nkf_iconv_utf_32_combine(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4,nkf_char c5,nkf_char c6,nkf_char c7,nkf_char c8)2528 nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
2529 {
2530     nkf_char wc, wc2;
2531 
2532     wc = utf32_to_nkf_char(c1, c2, c3, c4);
2533     if (wc < 0)
2534 	return wc;
2535     wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2536     if (wc2 < 0)
2537 	return wc2;
2538 
2539     return unicode_iconv_combine(wc, wc2);
2540 }
2541 
2542 static size_t
nkf_iconv_utf_32_nocombine(nkf_char c1,nkf_char c2,nkf_char c3,nkf_char c4)2543 nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2544 {
2545     nkf_char wc;
2546 
2547     wc = utf32_to_nkf_char(c1, c2, c3, c4);
2548     return (*unicode_iconv)(wc, TRUE);
2549 }
2550 #endif
2551 
2552 #define output_ascii_escape_sequence(mode) do { \
2553 	    if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2554 		    (*o_putc)(ESC); \
2555 		    (*o_putc)('('); \
2556 		    (*o_putc)(ascii_intro); \
2557 		    output_mode = mode; \
2558 	    } \
2559     } while (0)
2560 
2561 static void
output_escape_sequence(int mode)2562 output_escape_sequence(int mode)
2563 {
2564     if (output_mode == mode)
2565 	return;
2566     switch(mode) {
2567     case ISO_8859_1:
2568 	(*o_putc)(ESC);
2569 	(*o_putc)('.');
2570 	(*o_putc)('A');
2571 	break;
2572     case JIS_X_0201_1976_K:
2573 	(*o_putc)(ESC);
2574 	(*o_putc)('(');
2575 	(*o_putc)('I');
2576 	break;
2577     case JIS_X_0208:
2578 	(*o_putc)(ESC);
2579 	(*o_putc)('$');
2580 	(*o_putc)(kanji_intro);
2581 	break;
2582     case JIS_X_0212:
2583 	(*o_putc)(ESC);
2584 	(*o_putc)('$');
2585 	(*o_putc)('(');
2586 	(*o_putc)('D');
2587 	break;
2588     case JIS_X_0213_1:
2589 	(*o_putc)(ESC);
2590 	(*o_putc)('$');
2591 	(*o_putc)('(');
2592 	(*o_putc)('Q');
2593 	break;
2594     case JIS_X_0213_2:
2595 	(*o_putc)(ESC);
2596 	(*o_putc)('$');
2597 	(*o_putc)('(');
2598 	(*o_putc)('P');
2599 	break;
2600     }
2601     output_mode = mode;
2602 }
2603 
2604 static void
j_oconv(nkf_char c2,nkf_char c1)2605 j_oconv(nkf_char c2, nkf_char c1)
2606 {
2607 #ifdef NUMCHAR_OPTION
2608     if (c2 == 0 && nkf_char_unicode_p(c1)){
2609 	w16e_conv(c1, &c2, &c1);
2610 	if (c2 == 0 && nkf_char_unicode_p(c1)){
2611 	    c2 = c1 & VALUE_MASK;
2612 	    if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2613 		/* CP5022x UDC */
2614 		c1 &= 0xFFF;
2615 		c2 = 0x7F + c1 / 94;
2616 		c1 = 0x21 + c1 % 94;
2617 	    } else {
2618 		if (encode_fallback) (*encode_fallback)(c1);
2619 		return;
2620 	    }
2621 	}
2622     }
2623 #endif
2624     if (c2 == 0) {
2625 	output_ascii_escape_sequence(ASCII);
2626 	(*o_putc)(c1);
2627     }
2628     else if (c2 == EOF) {
2629 	output_ascii_escape_sequence(ASCII);
2630 	(*o_putc)(EOF);
2631     }
2632     else if (c2 == ISO_8859_1) {
2633 	output_ascii_escape_sequence(ISO_8859_1);
2634 	(*o_putc)(c1|0x80);
2635     }
2636     else if (c2 == JIS_X_0201_1976_K) {
2637 	output_escape_sequence(JIS_X_0201_1976_K);
2638 	(*o_putc)(c1);
2639 #ifdef X0212_ENABLE
2640     } else if (is_eucg3(c2)){
2641 	output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2642 	(*o_putc)(c2 & 0x7f);
2643 	(*o_putc)(c1);
2644 #endif
2645     } else {
2646 	if(ms_ucs_map_f
2647 	   ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2648 	   : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2649 	output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2650 	(*o_putc)(c2);
2651 	(*o_putc)(c1);
2652     }
2653 }
2654 
2655 static void
e_oconv(nkf_char c2,nkf_char c1)2656 e_oconv(nkf_char c2, nkf_char c1)
2657 {
2658     if (c2 == 0 && nkf_char_unicode_p(c1)){
2659 	w16e_conv(c1, &c2, &c1);
2660 	if (c2 == 0 && nkf_char_unicode_p(c1)){
2661 	    c2 = c1 & VALUE_MASK;
2662 	    if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2663 		/* eucJP-ms UDC */
2664 		c1 &= 0xFFF;
2665 		c2 = c1 / 94;
2666 		c2 += c2 < 10 ? 0x75 : 0x8FEB;
2667 		c1 = 0x21 + c1 % 94;
2668 		if (is_eucg3(c2)){
2669 		    (*o_putc)(0x8f);
2670 		    (*o_putc)((c2 & 0x7f) | 0x080);
2671 		    (*o_putc)(c1 | 0x080);
2672 		}else{
2673 		    (*o_putc)((c2 & 0x7f) | 0x080);
2674 		    (*o_putc)(c1 | 0x080);
2675 		}
2676 		return;
2677 	    } else {
2678 		if (encode_fallback) (*encode_fallback)(c1);
2679 		return;
2680 	    }
2681 	}
2682     }
2683 
2684     if (c2 == EOF) {
2685 	(*o_putc)(EOF);
2686     } else if (c2 == 0) {
2687 	output_mode = ASCII;
2688 	(*o_putc)(c1);
2689     } else if (c2 == JIS_X_0201_1976_K) {
2690 	output_mode = EUC_JP;
2691 	(*o_putc)(SS2); (*o_putc)(c1|0x80);
2692     } else if (c2 == ISO_8859_1) {
2693 	output_mode = ISO_8859_1;
2694 	(*o_putc)(c1 | 0x080);
2695 #ifdef X0212_ENABLE
2696     } else if (is_eucg3(c2)){
2697 	output_mode = EUC_JP;
2698 #ifdef SHIFTJIS_CP932
2699 	if (!cp932inv_f){
2700 	    nkf_char s2, s1;
2701 	    if (e2s_conv(c2, c1, &s2, &s1) == 0){
2702 		s2e_conv(s2, s1, &c2, &c1);
2703 	    }
2704 	}
2705 #endif
2706 	if (c2 == 0) {
2707 	    output_mode = ASCII;
2708 	    (*o_putc)(c1);
2709 	}else if (is_eucg3(c2)){
2710 	    if (x0212_f){
2711 		(*o_putc)(0x8f);
2712 		(*o_putc)((c2 & 0x7f) | 0x080);
2713 		(*o_putc)(c1 | 0x080);
2714 	    }
2715 	}else{
2716 	    (*o_putc)((c2 & 0x7f) | 0x080);
2717 	    (*o_putc)(c1 | 0x080);
2718 	}
2719 #endif
2720     } else {
2721 	if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2722 	    set_iconv(FALSE, 0);
2723 	    return; /* too late to rescue this char */
2724 	}
2725 	output_mode = EUC_JP;
2726 	(*o_putc)(c2 | 0x080);
2727 	(*o_putc)(c1 | 0x080);
2728     }
2729 }
2730 
2731 static void
s_oconv(nkf_char c2,nkf_char c1)2732 s_oconv(nkf_char c2, nkf_char c1)
2733 {
2734 #ifdef NUMCHAR_OPTION
2735     if (c2 == 0 && nkf_char_unicode_p(c1)){
2736 	w16e_conv(c1, &c2, &c1);
2737 	if (c2 == 0 && nkf_char_unicode_p(c1)){
2738 	    c2 = c1 & VALUE_MASK;
2739 	    if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2740 		/* CP932 UDC */
2741 		c1 &= 0xFFF;
2742 		c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2743 		c1 = c1 % 188;
2744 		c1 += 0x40 + (c1 > 0x3e);
2745 		(*o_putc)(c2);
2746 		(*o_putc)(c1);
2747 		return;
2748 	    } else {
2749 		if(encode_fallback)(*encode_fallback)(c1);
2750 		return;
2751 	    }
2752 	}
2753     }
2754 #endif
2755     if (c2 == EOF) {
2756 	(*o_putc)(EOF);
2757 	return;
2758     } else if (c2 == 0) {
2759 	output_mode = ASCII;
2760 	(*o_putc)(c1);
2761     } else if (c2 == JIS_X_0201_1976_K) {
2762 	output_mode = SHIFT_JIS;
2763 	(*o_putc)(c1|0x80);
2764     } else if (c2 == ISO_8859_1) {
2765 	output_mode = ISO_8859_1;
2766 	(*o_putc)(c1 | 0x080);
2767 #ifdef X0212_ENABLE
2768     } else if (is_eucg3(c2)){
2769 	output_mode = SHIFT_JIS;
2770 	if (e2s_conv(c2, c1, &c2, &c1) == 0){
2771 	    (*o_putc)(c2);
2772 	    (*o_putc)(c1);
2773 	}
2774 #endif
2775     } else {
2776 	if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2777 	    set_iconv(FALSE, 0);
2778 	    return; /* too late to rescue this char */
2779 	}
2780 	output_mode = SHIFT_JIS;
2781 	e2s_conv(c2, c1, &c2, &c1);
2782 
2783 #ifdef SHIFTJIS_CP932
2784 	if (cp932inv_f
2785 	    && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2786 	    nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2787 	    if (c){
2788 		c2 = c >> 8;
2789 		c1 = c & 0xff;
2790 	    }
2791 	}
2792 #endif /* SHIFTJIS_CP932 */
2793 
2794 	(*o_putc)(c2);
2795 	if (prefix_table[(unsigned char)c1]){
2796 	    (*o_putc)(prefix_table[(unsigned char)c1]);
2797 	}
2798 	(*o_putc)(c1);
2799     }
2800 }
2801 
2802 #ifdef UTF8_OUTPUT_ENABLE
2803 #define OUTPUT_UTF8(val) do { \
2804 	nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2805 	(*o_putc)(c1); \
2806 	if (c2) (*o_putc)(c2); \
2807 	if (c3) (*o_putc)(c3); \
2808 	if (c4) (*o_putc)(c4); \
2809     } while (0)
2810 
2811 static void
w_oconv(nkf_char c2,nkf_char c1)2812 w_oconv(nkf_char c2, nkf_char c1)
2813 {
2814     nkf_char c3, c4;
2815     nkf_char val, val2;
2816 
2817     if (output_bom_f) {
2818 	output_bom_f = FALSE;
2819 	(*o_putc)('\357');
2820 	(*o_putc)('\273');
2821 	(*o_putc)('\277');
2822     }
2823 
2824     if (c2 == EOF) {
2825 	(*o_putc)(EOF);
2826 	return;
2827     }
2828 
2829     if (c2 == 0 && nkf_char_unicode_p(c1)){
2830 	val = c1 & VALUE_MASK;
2831 	OUTPUT_UTF8(val);
2832 	return;
2833     }
2834 
2835     if (c2 == 0) {
2836 	(*o_putc)(c1);
2837     } else {
2838 	val = e2w_conv(c2, c1);
2839 	if (val){
2840 	    val2 = e2w_combining(val, c2, c1);
2841 	    if (val2)
2842 		OUTPUT_UTF8(val2);
2843 	    OUTPUT_UTF8(val);
2844 	}
2845     }
2846 }
2847 
2848 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
2849 	if (output_endian == ENDIAN_LITTLE){ \
2850 	    (*o_putc)(c1); \
2851 	    (*o_putc)(c2); \
2852 	}else{ \
2853 	    (*o_putc)(c2); \
2854 	    (*o_putc)(c1); \
2855 	} \
2856     } while (0)
2857 
2858 #define OUTPUT_UTF16(val) do { \
2859 	if (nkf_char_unicode_bmp_p(val)) { \
2860 	    c2 = (val >> 8) & 0xff; \
2861 	    c1 = val & 0xff; \
2862 	    OUTPUT_UTF16_BYTES(c1, c2); \
2863 	} else { \
2864 	    val &= VALUE_MASK; \
2865 	    if (val <= UNICODE_MAX) { \
2866 		c2 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */ \
2867 		c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
2868 		OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2869 		OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2870 	    } \
2871 	} \
2872     } while (0)
2873 
2874 static void
w_oconv16(nkf_char c2,nkf_char c1)2875 w_oconv16(nkf_char c2, nkf_char c1)
2876 {
2877     if (output_bom_f) {
2878 	output_bom_f = FALSE;
2879 	OUTPUT_UTF16_BYTES(0xFF, 0xFE);
2880     }
2881 
2882     if (c2 == EOF) {
2883 	(*o_putc)(EOF);
2884 	return;
2885     }
2886 
2887     if (c2 == 0 && nkf_char_unicode_p(c1)) {
2888 	OUTPUT_UTF16(c1);
2889     } else if (c2) {
2890 	nkf_char val, val2;
2891 	val = e2w_conv(c2, c1);
2892 	if (!val) return;
2893 	val2 = e2w_combining(val, c2, c1);
2894 	if (val2)
2895 	    OUTPUT_UTF16(val2);
2896 	OUTPUT_UTF16(val);
2897     } else {
2898 	OUTPUT_UTF16_BYTES(c1, c2);
2899     }
2900 }
2901 
2902 #define OUTPUT_UTF32(c) do { \
2903 	if (output_endian == ENDIAN_LITTLE){ \
2904 	    (*o_putc)( (c)        & 0xFF); \
2905 	    (*o_putc)(((c) >>  8) & 0xFF); \
2906 	    (*o_putc)(((c) >> 16) & 0xFF); \
2907 	    (*o_putc)(0); \
2908 	}else{ \
2909 	    (*o_putc)(0); \
2910 	    (*o_putc)(((c) >> 16) & 0xFF); \
2911 	    (*o_putc)(((c) >>  8) & 0xFF); \
2912 	    (*o_putc)( (c)        & 0xFF); \
2913 	} \
2914     } while (0)
2915 
2916 static void
w_oconv32(nkf_char c2,nkf_char c1)2917 w_oconv32(nkf_char c2, nkf_char c1)
2918 {
2919     if (output_bom_f) {
2920 	output_bom_f = FALSE;
2921 	if (output_endian == ENDIAN_LITTLE){
2922 	    (*o_putc)(0xFF);
2923 	    (*o_putc)(0xFE);
2924 	    (*o_putc)(0);
2925 	    (*o_putc)(0);
2926 	}else{
2927 	    (*o_putc)(0);
2928 	    (*o_putc)(0);
2929 	    (*o_putc)(0xFE);
2930 	    (*o_putc)(0xFF);
2931 	}
2932     }
2933 
2934     if (c2 == EOF) {
2935 	(*o_putc)(EOF);
2936 	return;
2937     }
2938 
2939     if (c2 == ISO_8859_1) {
2940 	c1 |= 0x80;
2941     } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2942 	c1 &= VALUE_MASK;
2943     } else if (c2) {
2944 	nkf_char val, val2;
2945 	val = e2w_conv(c2, c1);
2946 	if (!val) return;
2947 	val2 = e2w_combining(val, c2, c1);
2948 	if (val2)
2949 	    OUTPUT_UTF32(val2);
2950 	c1 = val;
2951     }
2952     OUTPUT_UTF32(c1);
2953 }
2954 #endif
2955 
2956 #define SCORE_L2       (1)                   /* Kanji Level 2 */
2957 #define SCORE_KANA     (SCORE_L2 << 1)       /* Halfwidth Katakana */
2958 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* MD Characters */
2959 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* IBM extended characters */
2960 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2961 #define SCORE_X0213    (SCORE_X0212 << 1)    /* JIS X 0213 */
2962 #define SCORE_NO_EXIST (SCORE_X0213 << 1)    /* Undefined Characters */
2963 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME selected */
2964 #define SCORE_ERROR    (SCORE_iMIME << 1) /* Error */
2965 
2966 #define SCORE_INIT (SCORE_iMIME)
2967 
2968 static const nkf_char score_table_A0[] = {
2969     0, 0, 0, 0,
2970     0, 0, 0, 0,
2971     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2972     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213,
2973 };
2974 
2975 static const nkf_char score_table_F0[] = {
2976     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2977     SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213,
2978     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2979     SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR,
2980 };
2981 
2982 static const nkf_char score_table_8FA0[] = {
2983     0, SCORE_X0213, SCORE_X0212, SCORE_X0213,
2984     SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212,
2985     SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2986     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2987 };
2988 
2989 static const nkf_char score_table_8FE0[] = {
2990     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2991     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2992     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2993     SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213,
2994 };
2995 
2996 static const nkf_char score_table_8FF0[] = {
2997     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212,
2998     SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2999     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
3000     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
3001 };
3002 
3003 static void
set_code_score(struct input_code * ptr,nkf_char score)3004 set_code_score(struct input_code *ptr, nkf_char score)
3005 {
3006     if (ptr){
3007 	ptr->score |= score;
3008     }
3009 }
3010 
3011 static void
clr_code_score(struct input_code * ptr,nkf_char score)3012 clr_code_score(struct input_code *ptr, nkf_char score)
3013 {
3014     if (ptr){
3015 	ptr->score &= ~score;
3016     }
3017 }
3018 
3019 static void
code_score(struct input_code * ptr)3020 code_score(struct input_code *ptr)
3021 {
3022     nkf_char c2 = ptr->buf[0];
3023     nkf_char c1 = ptr->buf[1];
3024     if (c2 < 0){
3025 	set_code_score(ptr, SCORE_ERROR);
3026     }else if (c2 == SS2){
3027 	set_code_score(ptr, SCORE_KANA);
3028     }else if (c2 == 0x8f){
3029 	if ((c1 & 0x70) == 0x20){
3030 	    set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3031 	}else if ((c1 & 0x70) == 0x60){
3032 	    set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3033 	}else if ((c1 & 0x70) == 0x70){
3034 	    set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3035 	}else{
3036 	    set_code_score(ptr, SCORE_X0212);
3037 	}
3038 #ifdef UTF8_OUTPUT_ENABLE
3039     }else if (!e2w_conv(c2, c1)){
3040 	set_code_score(ptr, SCORE_NO_EXIST);
3041 #endif
3042     }else if ((c2 & 0x70) == 0x20){
3043 	set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3044     }else if ((c2 & 0x70) == 0x70){
3045 	set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3046     }else if ((c2 & 0x70) >= 0x50){
3047 	set_code_score(ptr, SCORE_L2);
3048     }
3049 }
3050 
3051 static void
status_disable(struct input_code * ptr)3052 status_disable(struct input_code *ptr)
3053 {
3054     ptr->stat = -1;
3055     ptr->buf[0] = -1;
3056     code_score(ptr);
3057     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
3058 }
3059 
3060 static void
status_push_ch(struct input_code * ptr,nkf_char c)3061 status_push_ch(struct input_code *ptr, nkf_char c)
3062 {
3063     ptr->buf[ptr->index++] = c;
3064 }
3065 
3066 static void
status_clear(struct input_code * ptr)3067 status_clear(struct input_code *ptr)
3068 {
3069     ptr->stat = 0;
3070     ptr->index = 0;
3071 }
3072 
3073 static void
status_reset(struct input_code * ptr)3074 status_reset(struct input_code *ptr)
3075 {
3076     status_clear(ptr);
3077     ptr->score = SCORE_INIT;
3078 }
3079 
3080 static void
status_reinit(struct input_code * ptr)3081 status_reinit(struct input_code *ptr)
3082 {
3083     status_reset(ptr);
3084     ptr->_file_stat = 0;
3085 }
3086 
3087 static void
status_check(struct input_code * ptr,nkf_char c)3088 status_check(struct input_code *ptr, nkf_char c)
3089 {
3090     if (c <= DEL && estab_f){
3091 	status_reset(ptr);
3092     }
3093 }
3094 
3095 static void
s_status(struct input_code * ptr,nkf_char c)3096 s_status(struct input_code *ptr, nkf_char c)
3097 {
3098     switch(ptr->stat){
3099     case -1:
3100 	status_check(ptr, c);
3101 	break;
3102     case 0:
3103 	if (c <= DEL){
3104 	    break;
3105 	}else if (nkf_char_unicode_p(c)){
3106 	    break;
3107 	}else if (0xa1 <= c && c <= 0xdf){
3108 	    status_push_ch(ptr, SS2);
3109 	    status_push_ch(ptr, c);
3110 	    code_score(ptr);
3111 	    status_clear(ptr);
3112 	}else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3113 	    ptr->stat = 1;
3114 	    status_push_ch(ptr, c);
3115 	}else if (0xed <= c && c <= 0xee){
3116 	    ptr->stat = 3;
3117 	    status_push_ch(ptr, c);
3118 #ifdef SHIFTJIS_CP932
3119 	}else if (is_ibmext_in_sjis(c)){
3120 	    ptr->stat = 2;
3121 	    status_push_ch(ptr, c);
3122 #endif /* SHIFTJIS_CP932 */
3123 #ifdef X0212_ENABLE
3124 	}else if (0xf0 <= c && c <= 0xfc){
3125 	    ptr->stat = 1;
3126 	    status_push_ch(ptr, c);
3127 #endif /* X0212_ENABLE */
3128 	}else{
3129 	    status_disable(ptr);
3130 	}
3131 	break;
3132     case 1:
3133 	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3134 	    status_push_ch(ptr, c);
3135 	    s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3136 	    code_score(ptr);
3137 	    status_clear(ptr);
3138 	}else{
3139 	    status_disable(ptr);
3140 	}
3141 	break;
3142     case 2:
3143 #ifdef SHIFTJIS_CP932
3144 	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3145 	    status_push_ch(ptr, c);
3146 	    if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
3147 		set_code_score(ptr, SCORE_CP932);
3148 		status_clear(ptr);
3149 		break;
3150 	    }
3151 	}
3152 #endif /* SHIFTJIS_CP932 */
3153 	status_disable(ptr);
3154 	break;
3155     case 3:
3156 	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3157 	    status_push_ch(ptr, c);
3158 	    s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3159 	    set_code_score(ptr, SCORE_CP932);
3160 	    status_clear(ptr);
3161 	}else{
3162 	    status_disable(ptr);
3163 	}
3164 	break;
3165     }
3166 }
3167 
3168 static void
e_status(struct input_code * ptr,nkf_char c)3169 e_status(struct input_code *ptr, nkf_char c)
3170 {
3171     switch (ptr->stat){
3172     case -1:
3173 	status_check(ptr, c);
3174 	break;
3175     case 0:
3176 	if (c <= DEL){
3177 	    break;
3178 	}else if (nkf_char_unicode_p(c)){
3179 	    break;
3180 	}else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
3181 	    ptr->stat = 1;
3182 	    status_push_ch(ptr, c);
3183 #ifdef X0212_ENABLE
3184 	}else if (0x8f == c){
3185 	    ptr->stat = 2;
3186 	    status_push_ch(ptr, c);
3187 #endif /* X0212_ENABLE */
3188 	}else{
3189 	    status_disable(ptr);
3190 	}
3191 	break;
3192     case 1:
3193 	if (0xa1 <= c && c <= 0xfe){
3194 	    status_push_ch(ptr, c);
3195 	    code_score(ptr);
3196 	    status_clear(ptr);
3197 	}else{
3198 	    status_disable(ptr);
3199 	}
3200 	break;
3201 #ifdef X0212_ENABLE
3202     case 2:
3203 	if (0xa1 <= c && c <= 0xfe){
3204 	    ptr->stat = 1;
3205 	    status_push_ch(ptr, c);
3206 	}else{
3207 	    status_disable(ptr);
3208 	}
3209 #endif /* X0212_ENABLE */
3210     }
3211 }
3212 
3213 #ifdef UTF8_INPUT_ENABLE
3214 static void
w_status(struct input_code * ptr,nkf_char c)3215 w_status(struct input_code *ptr, nkf_char c)
3216 {
3217     switch (ptr->stat){
3218     case -1:
3219 	status_check(ptr, c);
3220 	break;
3221     case 0:
3222 	if (c <= DEL){
3223 	    break;
3224 	}else if (nkf_char_unicode_p(c)){
3225 	    break;
3226 	}else if (0xc0 <= c && c <= 0xdf){
3227 	    ptr->stat = 1;
3228 	    status_push_ch(ptr, c);
3229 	}else if (0xe0 <= c && c <= 0xef){
3230 	    ptr->stat = 2;
3231 	    status_push_ch(ptr, c);
3232 	}else if (0xf0 <= c && c <= 0xf4){
3233 	    ptr->stat = 3;
3234 	    status_push_ch(ptr, c);
3235 	}else{
3236 	    status_disable(ptr);
3237 	}
3238 	break;
3239     case 1:
3240     case 2:
3241 	if (0x80 <= c && c <= 0xbf){
3242 	    status_push_ch(ptr, c);
3243 	    if (ptr->index > ptr->stat){
3244 		int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
3245 			   && ptr->buf[2] == 0xbf);
3246 		w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
3247 			 &ptr->buf[0], &ptr->buf[1]);
3248 		if (!bom){
3249 		    code_score(ptr);
3250 		}
3251 		status_clear(ptr);
3252 	    }
3253 	}else{
3254 	    status_disable(ptr);
3255 	}
3256 	break;
3257     case 3:
3258 	if (0x80 <= c && c <= 0xbf){
3259 	    if (ptr->index < ptr->stat){
3260 		status_push_ch(ptr, c);
3261 	    } else {
3262 		status_clear(ptr);
3263 	    }
3264 	}else{
3265 	    status_disable(ptr);
3266 	}
3267 	break;
3268     }
3269 }
3270 #endif
3271 
3272 static void
code_status(nkf_char c)3273 code_status(nkf_char c)
3274 {
3275     int action_flag = 1;
3276     struct input_code *result = 0;
3277     struct input_code *p = input_code_list;
3278     while (p->name){
3279 	if (!p->status_func) {
3280 	    ++p;
3281 	    continue;
3282 	}
3283 	if (!p->status_func)
3284 	    continue;
3285 	(p->status_func)(p, c);
3286 	if (p->stat > 0){
3287 	    action_flag = 0;
3288 	}else if(p->stat == 0){
3289 	    if (result){
3290 		action_flag = 0;
3291 	    }else{
3292 		result = p;
3293 	    }
3294 	}
3295 	++p;
3296     }
3297 
3298     if (action_flag){
3299 	if (result && !estab_f){
3300 	    set_iconv(TRUE, result->iconv_func);
3301 	}else if (c <= DEL){
3302 	    struct input_code *ptr = input_code_list;
3303 	    while (ptr->name){
3304 		status_reset(ptr);
3305 		++ptr;
3306 	    }
3307 	}
3308     }
3309 }
3310 
3311 typedef struct {
3312     nkf_buf_t *std_gc_buf;
3313     nkf_char broken_state;
3314     nkf_buf_t *broken_buf;
3315     nkf_char mimeout_state;
3316     nkf_buf_t *nfc_buf;
3317 } nkf_state_t;
3318 
3319 static nkf_state_t *nkf_state = NULL;
3320 
3321 #define STD_GC_BUFSIZE (256)
3322 
3323 static void
nkf_state_init(void)3324 nkf_state_init(void)
3325 {
3326     if (nkf_state) {
3327 	nkf_buf_clear(nkf_state->std_gc_buf);
3328 	nkf_buf_clear(nkf_state->broken_buf);
3329 	nkf_buf_clear(nkf_state->nfc_buf);
3330     }
3331     else {
3332 	nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3333 	nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3334 	nkf_state->broken_buf = nkf_buf_new(3);
3335 	nkf_state->nfc_buf = nkf_buf_new(9);
3336     }
3337     nkf_state->broken_state = 0;
3338     nkf_state->mimeout_state = 0;
3339 }
3340 
3341 #ifndef WIN32DLL
3342 static nkf_char
std_getc(FILE * f)3343 std_getc(FILE *f)
3344 {
3345     if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3346 	return nkf_buf_pop(nkf_state->std_gc_buf);
3347     }
3348     return getc(f);
3349 }
3350 #endif /*WIN32DLL*/
3351 
3352 static nkf_char
std_ungetc(nkf_char c,ARG_UNUSED FILE * f)3353 std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3354 {
3355     nkf_buf_push(nkf_state->std_gc_buf, c);
3356     return c;
3357 }
3358 
3359 #ifndef WIN32DLL
3360 static void
std_putc(nkf_char c)3361 std_putc(nkf_char c)
3362 {
3363     if(c!=EOF)
3364 	putchar(c);
3365 }
3366 #endif /*WIN32DLL*/
3367 
3368 static nkf_char   hold_buf[HOLD_SIZE*2];
3369 static int             hold_count = 0;
3370 static nkf_char
push_hold_buf(nkf_char c2)3371 push_hold_buf(nkf_char c2)
3372 {
3373     if (hold_count >= HOLD_SIZE*2)
3374 	return (EOF);
3375     hold_buf[hold_count++] = c2;
3376     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3377 }
3378 
3379 static int
h_conv(FILE * f,nkf_char c1,nkf_char c2)3380 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3381 {
3382     int ret;
3383     int hold_index;
3384     int fromhold_count;
3385     nkf_char c3, c4;
3386 
3387     /** it must NOT be in the kanji shifte sequence      */
3388     /** it must NOT be written in JIS7                   */
3389     /** and it must be after 2 byte 8bit code            */
3390 
3391     hold_count = 0;
3392     push_hold_buf(c1);
3393     push_hold_buf(c2);
3394 
3395     while ((c2 = (*i_getc)(f)) != EOF) {
3396 	if (c2 == ESC){
3397 	    (*i_ungetc)(c2,f);
3398 	    break;
3399 	}
3400 	code_status(c2);
3401 	if (push_hold_buf(c2) == EOF || estab_f) {
3402 	    break;
3403 	}
3404     }
3405 
3406     if (!estab_f) {
3407 	struct input_code *p = input_code_list;
3408 	struct input_code *result = p;
3409 	if (c2 == EOF) {
3410 	    code_status(c2);
3411 	}
3412 	while (p->name) {
3413 	    if (p->status_func && p->score < result->score) {
3414 		result = p;
3415 	    }
3416 	    p++;
3417 	}
3418 	set_iconv(TRUE, result->iconv_func);
3419     }
3420 
3421 
3422     /** now,
3423      ** 1) EOF is detected, or
3424      ** 2) Code is established, or
3425      ** 3) Buffer is FULL (but last word is pushed)
3426      **
3427      ** in 1) and 3) cases, we continue to use
3428      ** Kanji codes by oconv and leave estab_f unchanged.
3429      **/
3430 
3431     ret = c2;
3432     hold_index = 0;
3433     while (hold_index < hold_count){
3434 	c1 = hold_buf[hold_index++];
3435 	if (nkf_char_unicode_p(c1)) {
3436 	    (*oconv)(0, c1);
3437 	    continue;
3438 	}
3439 	else if (c1 <= DEL){
3440 	    (*iconv)(0, c1, 0);
3441 	    continue;
3442 	}else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3443 	    (*iconv)(JIS_X_0201_1976_K, c1, 0);
3444 	    continue;
3445 	}
3446 	fromhold_count = 1;
3447 	if (hold_index < hold_count){
3448 	    c2 = hold_buf[hold_index++];
3449 	    fromhold_count++;
3450 	}else{
3451 	    c2 = (*i_getc)(f);
3452 	    if (c2 == EOF){
3453 		c4 = EOF;
3454 		break;
3455 	    }
3456 	    code_status(c2);
3457 	}
3458 	c3 = 0;
3459 	switch ((*iconv)(c1, c2, 0)) {  /* can be EUC/SJIS/UTF-8 */
3460 	case -2:
3461 	    /* 4 bytes UTF-8 */
3462 	    if (hold_index < hold_count){
3463 		c3 = hold_buf[hold_index++];
3464 	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3465 		ret = EOF;
3466 		break;
3467 	    }
3468 	    code_status(c3);
3469 	    if (hold_index < hold_count){
3470 		c4 = hold_buf[hold_index++];
3471 	    } else if ((c4 = (*i_getc)(f)) == EOF) {
3472 		c3 = ret = EOF;
3473 		break;
3474 	    }
3475 	    code_status(c4);
3476 	    (*iconv)(c1, c2, (c3<<8)|c4);
3477 	    break;
3478 	case -3:
3479 	    /* 4 bytes UTF-8 (check combining character) */
3480 	    if (hold_index < hold_count){
3481 		c3 = hold_buf[hold_index++];
3482 		fromhold_count++;
3483 	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3484 		w_iconv_nocombine(c1, c2, 0);
3485 		break;
3486 	    }
3487 	    if (hold_index < hold_count){
3488 		c4 = hold_buf[hold_index++];
3489 		fromhold_count++;
3490 	    } else if ((c4 = (*i_getc)(f)) == EOF) {
3491 		w_iconv_nocombine(c1, c2, 0);
3492 		if (fromhold_count <= 2)
3493 		    (*i_ungetc)(c3,f);
3494 		else
3495 		    hold_index--;
3496 		continue;
3497 	    }
3498 	    if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3499 		w_iconv_nocombine(c1, c2, 0);
3500 		if (fromhold_count <= 2) {
3501 		    (*i_ungetc)(c4,f);
3502 		    (*i_ungetc)(c3,f);
3503 		} else if (fromhold_count == 3) {
3504 		    (*i_ungetc)(c4,f);
3505 		    hold_index--;
3506 		} else {
3507 		    hold_index -= 2;
3508 		}
3509 	    }
3510 	    break;
3511 	case -1:
3512 	    /* 3 bytes EUC or UTF-8 */
3513 	    if (hold_index < hold_count){
3514 		c3 = hold_buf[hold_index++];
3515 		fromhold_count++;
3516 	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3517 		ret = EOF;
3518 		break;
3519 	    } else {
3520 		code_status(c3);
3521 	    }
3522 	    if ((*iconv)(c1, c2, c3) == -3) {
3523 		/* 6 bytes UTF-8 (check combining character) */
3524 		nkf_char c5, c6;
3525 		if (hold_index < hold_count){
3526 		    c4 = hold_buf[hold_index++];
3527 		    fromhold_count++;
3528 		} else if ((c4 = (*i_getc)(f)) == EOF) {
3529 		    w_iconv_nocombine(c1, c2, c3);
3530 		    continue;
3531 		}
3532 		if (hold_index < hold_count){
3533 		    c5 = hold_buf[hold_index++];
3534 		    fromhold_count++;
3535 		} else if ((c5 = (*i_getc)(f)) == EOF) {
3536 		    w_iconv_nocombine(c1, c2, c3);
3537 		    if (fromhold_count == 4)
3538 			hold_index--;
3539 		    else
3540 			(*i_ungetc)(c4,f);
3541 		    continue;
3542 		}
3543 		if (hold_index < hold_count){
3544 		    c6 = hold_buf[hold_index++];
3545 		    fromhold_count++;
3546 		} else if ((c6 = (*i_getc)(f)) == EOF) {
3547 		    w_iconv_nocombine(c1, c2, c3);
3548 		    if (fromhold_count == 5) {
3549 			hold_index -= 2;
3550 		    } else if (fromhold_count == 4) {
3551 			hold_index--;
3552 			(*i_ungetc)(c5,f);
3553 		    } else {
3554 			(*i_ungetc)(c5,f);
3555 			(*i_ungetc)(c4,f);
3556 		    }
3557 		    continue;
3558 		}
3559 		if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3560 		    w_iconv_nocombine(c1, c2, c3);
3561 		    if (fromhold_count == 6) {
3562 			hold_index -= 3;
3563 		    } else if (fromhold_count == 5) {
3564 			hold_index -= 2;
3565 			(*i_ungetc)(c6,f);
3566 		    } else if (fromhold_count == 4) {
3567 			hold_index--;
3568 			(*i_ungetc)(c6,f);
3569 			(*i_ungetc)(c5,f);
3570 		    } else {
3571 			(*i_ungetc)(c6,f);
3572 			(*i_ungetc)(c5,f);
3573 			(*i_ungetc)(c4,f);
3574 		    }
3575 		}
3576 	    }
3577 	    break;
3578 	}
3579 	if (c3 == EOF) break;
3580     }
3581     return ret;
3582 }
3583 
3584 /*
3585  * Check and Ignore BOM
3586  */
3587 static void
check_bom(FILE * f)3588 check_bom(FILE *f)
3589 {
3590     int c2;
3591     input_bom_f = FALSE;
3592     switch(c2 = (*i_getc)(f)){
3593     case 0x00:
3594 	if((c2 = (*i_getc)(f)) == 0x00){
3595 	    if((c2 = (*i_getc)(f)) == 0xFE){
3596 		if((c2 = (*i_getc)(f)) == 0xFF){
3597 		    if(!input_encoding){
3598 			set_iconv(TRUE, w_iconv32);
3599 		    }
3600 		    if (iconv == w_iconv32) {
3601 			input_bom_f = TRUE;
3602 			input_endian = ENDIAN_BIG;
3603 			return;
3604 		    }
3605 		    (*i_ungetc)(0xFF,f);
3606 		}else (*i_ungetc)(c2,f);
3607 		(*i_ungetc)(0xFE,f);
3608 	    }else if(c2 == 0xFF){
3609 		if((c2 = (*i_getc)(f)) == 0xFE){
3610 		    if(!input_encoding){
3611 			set_iconv(TRUE, w_iconv32);
3612 		    }
3613 		    if (iconv == w_iconv32) {
3614 			input_endian = ENDIAN_2143;
3615 			return;
3616 		    }
3617 		    (*i_ungetc)(0xFF,f);
3618 		}else (*i_ungetc)(c2,f);
3619 		(*i_ungetc)(0xFF,f);
3620 	    }else (*i_ungetc)(c2,f);
3621 	    (*i_ungetc)(0x00,f);
3622 	}else (*i_ungetc)(c2,f);
3623 	(*i_ungetc)(0x00,f);
3624 	break;
3625     case 0xEF:
3626 	if((c2 = (*i_getc)(f)) == 0xBB){
3627 	    if((c2 = (*i_getc)(f)) == 0xBF){
3628 		if(!input_encoding){
3629 		    set_iconv(TRUE, w_iconv);
3630 		}
3631 		if (iconv == w_iconv) {
3632 		    input_bom_f = TRUE;
3633 		    return;
3634 		}
3635 		(*i_ungetc)(0xBF,f);
3636 	    }else (*i_ungetc)(c2,f);
3637 	    (*i_ungetc)(0xBB,f);
3638 	}else (*i_ungetc)(c2,f);
3639 	(*i_ungetc)(0xEF,f);
3640 	break;
3641     case 0xFE:
3642 	if((c2 = (*i_getc)(f)) == 0xFF){
3643 	    if((c2 = (*i_getc)(f)) == 0x00){
3644 		if((c2 = (*i_getc)(f)) == 0x00){
3645 		    if(!input_encoding){
3646 			set_iconv(TRUE, w_iconv32);
3647 		    }
3648 		    if (iconv == w_iconv32) {
3649 			input_endian = ENDIAN_3412;
3650 			return;
3651 		    }
3652 		    (*i_ungetc)(0x00,f);
3653 		}else (*i_ungetc)(c2,f);
3654 		(*i_ungetc)(0x00,f);
3655 	    }else (*i_ungetc)(c2,f);
3656 	    if(!input_encoding){
3657 		set_iconv(TRUE, w_iconv16);
3658 	    }
3659 	    if (iconv == w_iconv16) {
3660 		input_endian = ENDIAN_BIG;
3661 		input_bom_f = TRUE;
3662 		return;
3663 	    }
3664 	    (*i_ungetc)(0xFF,f);
3665 	}else (*i_ungetc)(c2,f);
3666 	(*i_ungetc)(0xFE,f);
3667 	break;
3668     case 0xFF:
3669 	if((c2 = (*i_getc)(f)) == 0xFE){
3670 	    if((c2 = (*i_getc)(f)) == 0x00){
3671 		if((c2 = (*i_getc)(f)) == 0x00){
3672 		    if(!input_encoding){
3673 			set_iconv(TRUE, w_iconv32);
3674 		    }
3675 		    if (iconv == w_iconv32) {
3676 			input_endian = ENDIAN_LITTLE;
3677 			input_bom_f = TRUE;
3678 			return;
3679 		    }
3680 		    (*i_ungetc)(0x00,f);
3681 		}else (*i_ungetc)(c2,f);
3682 		(*i_ungetc)(0x00,f);
3683 	    }else (*i_ungetc)(c2,f);
3684 	    if(!input_encoding){
3685 		set_iconv(TRUE, w_iconv16);
3686 	    }
3687 	    if (iconv == w_iconv16) {
3688 		input_endian = ENDIAN_LITTLE;
3689 		input_bom_f = TRUE;
3690 		return;
3691 	    }
3692 	    (*i_ungetc)(0xFE,f);
3693 	}else (*i_ungetc)(c2,f);
3694 	(*i_ungetc)(0xFF,f);
3695 	break;
3696     default:
3697 	(*i_ungetc)(c2,f);
3698 	break;
3699     }
3700 }
3701 
3702 static nkf_char
broken_getc(FILE * f)3703 broken_getc(FILE *f)
3704 {
3705     nkf_char c, c1;
3706 
3707     if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3708 	return nkf_buf_pop(nkf_state->broken_buf);
3709     }
3710     c = (*i_bgetc)(f);
3711     if (c=='$' && nkf_state->broken_state != ESC
3712 	&& (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3713 	c1= (*i_bgetc)(f);
3714 	nkf_state->broken_state = 0;
3715 	if (c1=='@'|| c1=='B') {
3716 	    nkf_buf_push(nkf_state->broken_buf, c1);
3717 	    nkf_buf_push(nkf_state->broken_buf, c);
3718 	    return ESC;
3719 	} else {
3720 	    (*i_bungetc)(c1,f);
3721 	    return c;
3722 	}
3723     } else if (c=='(' && nkf_state->broken_state != ESC
3724 	       && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3725 	c1= (*i_bgetc)(f);
3726 	nkf_state->broken_state = 0;
3727 	if (c1=='J'|| c1=='B') {
3728 	    nkf_buf_push(nkf_state->broken_buf, c1);
3729 	    nkf_buf_push(nkf_state->broken_buf, c);
3730 	    return ESC;
3731 	} else {
3732 	    (*i_bungetc)(c1,f);
3733 	    return c;
3734 	}
3735     } else {
3736 	nkf_state->broken_state = c;
3737 	return c;
3738     }
3739 }
3740 
3741 static nkf_char
broken_ungetc(nkf_char c,ARG_UNUSED FILE * f)3742 broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3743 {
3744     if (nkf_buf_length(nkf_state->broken_buf) < 2)
3745 	nkf_buf_push(nkf_state->broken_buf, c);
3746     return c;
3747 }
3748 
3749 static void
eol_conv(nkf_char c2,nkf_char c1)3750 eol_conv(nkf_char c2, nkf_char c1)
3751 {
3752     if (guess_f && input_eol != EOF) {
3753 	if (c2 == 0 && c1 == LF) {
3754 	    if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3755 	    else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3756 	} else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3757 	else if (!prev_cr);
3758 	else if (!input_eol) input_eol = CR;
3759 	else if (input_eol != CR) input_eol = EOF;
3760     }
3761     if (prev_cr || (c2 == 0 && c1 == LF)) {
3762 	prev_cr = 0;
3763 	if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3764 	if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3765     }
3766     if (c2 == 0 && c1 == CR) prev_cr = CR;
3767     else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3768 }
3769 
3770 static void
put_newline(void (* func)(nkf_char))3771 put_newline(void (*func)(nkf_char))
3772 {
3773     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3774       case CRLF:
3775 	(*func)(0x0D);
3776 	(*func)(0x0A);
3777 	break;
3778       case CR:
3779 	(*func)(0x0D);
3780 	break;
3781       case LF:
3782 	(*func)(0x0A);
3783 	break;
3784     }
3785 }
3786 
3787 static void
oconv_newline(void (* func)(nkf_char,nkf_char))3788 oconv_newline(void (*func)(nkf_char, nkf_char))
3789 {
3790     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3791       case CRLF:
3792 	(*func)(0, 0x0D);
3793 	(*func)(0, 0x0A);
3794 	break;
3795       case CR:
3796 	(*func)(0, 0x0D);
3797 	break;
3798       case LF:
3799 	(*func)(0, 0x0A);
3800 	break;
3801     }
3802 }
3803 
3804 /*
3805    Return value of fold_conv()
3806 
3807    LF  add newline  and output char
3808    CR  add newline  and output nothing
3809    SP  space
3810    0   skip
3811    1   (or else) normal output
3812 
3813    fold state in prev (previous character)
3814 
3815    >0x80 Japanese (X0208/X0201)
3816    <0x80 ASCII
3817    LF    new line
3818    SP    space
3819 
3820    This fold algorthm does not preserve heading space in a line.
3821    This is the main difference from fmt.
3822  */
3823 
3824 #define char_size(c2,c1) (c2?2:1)
3825 
3826 static void
fold_conv(nkf_char c2,nkf_char c1)3827 fold_conv(nkf_char c2, nkf_char c1)
3828 {
3829     nkf_char prev0;
3830     nkf_char fold_state;
3831 
3832     if (c1== CR && !fold_preserve_f) {
3833 	fold_state=0;  /* ignore cr */
3834     }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3835 	f_prev = LF;
3836 	fold_state=0;  /* ignore cr */
3837     } else if (c1== BS) {
3838 	if (f_line>0) f_line--;
3839 	fold_state =  1;
3840     } else if (c2==EOF && f_line != 0) {    /* close open last line */
3841 	fold_state = LF;
3842     } else if ((c1==LF && !fold_preserve_f)
3843 	       || ((c1==CR||(c1==LF&&f_prev!=CR))
3844 		   && fold_preserve_f)) {
3845 	/* new line */
3846 	if (fold_preserve_f) {
3847 	    f_prev = c1;
3848 	    f_line = 0;
3849 	    fold_state =  CR;
3850 	} else if ((f_prev == c1)
3851 		   || (f_prev == LF)
3852 		  ) {        /* duplicate newline */
3853 	    if (f_line) {
3854 		f_line = 0;
3855 		fold_state =  LF;    /* output two newline */
3856 	    } else {
3857 		f_line = 0;
3858 		fold_state =  1;
3859 	    }
3860 	} else  {
3861 	    if (f_prev&0x80) {     /* Japanese? */
3862 		f_prev = c1;
3863 		fold_state =  0;       /* ignore given single newline */
3864 	    } else if (f_prev==SP) {
3865 		fold_state =  0;
3866 	    } else {
3867 		f_prev = c1;
3868 		if (++f_line<=fold_len)
3869 		    fold_state =  SP;
3870 		else {
3871 		    f_line = 0;
3872 		    fold_state =  CR;        /* fold and output nothing */
3873 		}
3874 	    }
3875 	}
3876     } else if (c1=='\f') {
3877 	f_prev = LF;
3878 	f_line = 0;
3879 	fold_state =  LF;            /* output newline and clear */
3880     } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3881 	/* X0208 kankaku or ascii space */
3882 	if (f_prev == SP) {
3883 	    fold_state = 0;         /* remove duplicate spaces */
3884 	} else {
3885 	    f_prev = SP;
3886 	    if (++f_line<=fold_len)
3887 		fold_state = SP;         /* output ASCII space only */
3888 	    else {
3889 		f_prev = SP; f_line = 0;
3890 		fold_state = CR;        /* fold and output nothing */
3891 	    }
3892 	}
3893     } else {
3894 	prev0 = f_prev; /* we still need this one... , but almost done */
3895 	f_prev = c1;
3896 	if (c2 || c2 == JIS_X_0201_1976_K)
3897 	    f_prev |= 0x80;  /* this is Japanese */
3898 	f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3899 	if (f_line<=fold_len) {   /* normal case */
3900 	    fold_state = 1;
3901 	} else {
3902 	    if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3903 		f_line = char_size(c2,c1);
3904 		fold_state =  LF;       /* We can't wait, do fold now */
3905 	    } else if (c2 == JIS_X_0201_1976_K) {
3906 		/* simple kinsoku rules  return 1 means no folding  */
3907 		if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3908 		else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3909 		else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3910 		else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3911 		else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3912 		else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3913 		else if (SP<=c1 && c1<=(0xdf&0x7f)) {      /* X0201 */
3914 		    f_line = 1;
3915 		    fold_state = LF;/* add one new f_line before this character */
3916 		} else {
3917 		    f_line = 1;
3918 		    fold_state = LF;/* add one new f_line before this character */
3919 		}
3920 	    } else if (c2==0) {
3921 		/* kinsoku point in ASCII */
3922 		if (  c1==')'||    /* { [ ( */
3923 		    c1==']'||
3924 		    c1=='}'||
3925 		    c1=='.'||
3926 		    c1==','||
3927 		    c1=='!'||
3928 		    c1=='?'||
3929 		    c1=='/'||
3930 		    c1==':'||
3931 		    c1==';') {
3932 		    fold_state = 1;
3933 		    /* just after special */
3934 		} else if (!is_alnum(prev0)) {
3935 		    f_line = char_size(c2,c1);
3936 		    fold_state = LF;
3937 		} else if ((prev0==SP) ||   /* ignored new f_line */
3938 			   (prev0==LF)||        /* ignored new f_line */
3939 			   (prev0&0x80)) {        /* X0208 - ASCII */
3940 		    f_line = char_size(c2,c1);
3941 		    fold_state = LF;/* add one new f_line before this character */
3942 		} else {
3943 		    fold_state = 1;  /* default no fold in ASCII */
3944 		}
3945 	    } else {
3946 		if (c2=='!') {
3947 		    if (c1=='"')  fold_state = 1; /* $B!"(B */
3948 		    else if (c1=='#')  fold_state = 1; /* $B!#(B */
3949 		    else if (c1=='W')  fold_state = 1; /* $B!W(B */
3950 		    else if (c1=='K')  fold_state = 1; /* $B!K(B */
3951 		    else if (c1=='$')  fold_state = 1; /* $B!$(B */
3952 		    else if (c1=='%')  fold_state = 1; /* $B!%(B */
3953 		    else if (c1=='\'') fold_state = 1; /* $B!\(B */
3954 		    else if (c1=='(')  fold_state = 1; /* $B!((B */
3955 		    else if (c1==')')  fold_state = 1; /* $B!)(B */
3956 		    else if (c1=='*')  fold_state = 1; /* $B!*(B */
3957 		    else if (c1=='+')  fold_state = 1; /* $B!+(B */
3958 		    else if (c1==',')  fold_state = 1; /* $B!,(B */
3959 		    /* default no fold in kinsoku */
3960 		    else {
3961 			fold_state = LF;
3962 			f_line = char_size(c2,c1);
3963 			/* add one new f_line before this character */
3964 		    }
3965 		} else {
3966 		    f_line = char_size(c2,c1);
3967 		    fold_state = LF;
3968 		    /* add one new f_line before this character */
3969 		}
3970 	    }
3971 	}
3972     }
3973     /* terminator process */
3974     switch(fold_state) {
3975     case LF:
3976 	oconv_newline(o_fconv);
3977 	(*o_fconv)(c2,c1);
3978 	break;
3979     case 0:
3980 	return;
3981     case CR:
3982 	oconv_newline(o_fconv);
3983 	break;
3984     case TAB:
3985     case SP:
3986 	(*o_fconv)(0,SP);
3987 	break;
3988     default:
3989 	(*o_fconv)(c2,c1);
3990     }
3991 }
3992 
3993 static nkf_char z_prev2=0,z_prev1=0;
3994 
3995 static void
z_conv(nkf_char c2,nkf_char c1)3996 z_conv(nkf_char c2, nkf_char c1)
3997 {
3998 
3999     /* if (c2) c1 &= 0x7f; assertion */
4000 
4001     if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
4002 	(*o_zconv)(c2,c1);
4003 	return;
4004     }
4005 
4006     if (x0201_f) {
4007 	if (z_prev2 == JIS_X_0201_1976_K) {
4008 	    if (c2 == JIS_X_0201_1976_K) {
4009 		if (c1 == (0xde&0x7f)) { /* $BByE@(B */
4010 		    z_prev2 = 0;
4011 		    (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
4012 		    return;
4013 		} else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
4014 		    z_prev2 = 0;
4015 		    (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
4016 		    return;
4017 		} else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
4018 		    z_prev2 = 0;
4019 		    (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
4020 		    return;
4021 		}
4022 	    }
4023 	    z_prev2 = 0;
4024 	    (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
4025 	}
4026 	if (c2 == JIS_X_0201_1976_K) {
4027 	    if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
4028 		/* wait for $BByE@(B or $BH>ByE@(B */
4029 		z_prev1 = c1;
4030 		z_prev2 = c2;
4031 		return;
4032 	    } else {
4033 		(*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
4034 		return;
4035 	    }
4036 	}
4037     }
4038 
4039     if (c2 == EOF) {
4040 	(*o_zconv)(c2, c1);
4041 	return;
4042     }
4043 
4044     if (alpha_f&1 && c2 == 0x23) {
4045 	/* JISX0208 Alphabet */
4046 	c2 = 0;
4047     } else if (c2 == 0x21) {
4048 	/* JISX0208 Kigou */
4049 	if (0x21==c1) {
4050 	    if (alpha_f&2) {
4051 		c2 = 0;
4052 		c1 = SP;
4053 	    } else if (alpha_f&4) {
4054 		(*o_zconv)(0, SP);
4055 		(*o_zconv)(0, SP);
4056 		return;
4057 	    }
4058 	} else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4059 	    c2 =  0;
4060 	    c1 = fv[c1-0x20];
4061 	}
4062     }
4063 
4064     if (alpha_f&8 && c2 == 0) {
4065 	/* HTML Entity */
4066 	const char *entity = 0;
4067 	switch (c1){
4068 	case '>': entity = "&gt;"; break;
4069 	case '<': entity = "&lt;"; break;
4070 	case '\"': entity = "&quot;"; break;
4071 	case '&': entity = "&amp;"; break;
4072 	}
4073 	if (entity){
4074 	    while (*entity) (*o_zconv)(0, *entity++);
4075 	    return;
4076 	}
4077     }
4078 
4079     if (alpha_f & 16) {
4080 	/* JIS X 0208 Katakana to JIS X 0201 Katakana */
4081 	if (c2 == 0x21) {
4082 	    nkf_char c = 0;
4083 	    switch (c1) {
4084 	    case 0x23:
4085 		/* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4086 		c = 0xA1;
4087 		break;
4088 	    case 0x56:
4089 		/* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4090 		c = 0xA2;
4091 		break;
4092 	    case 0x57:
4093 		/* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4094 		c = 0xA3;
4095 		break;
4096 	    case 0x22:
4097 		/* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4098 		c = 0xA4;
4099 		break;
4100 	    case 0x26:
4101 		/* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4102 		c = 0xA5;
4103 		break;
4104 	    case 0x3C:
4105 		/* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4106 		c = 0xB0;
4107 		break;
4108 	    case 0x2B:
4109 		/* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4110 		c = 0xDE;
4111 		break;
4112 	    case 0x2C:
4113 		/* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4114 		c = 0xDF;
4115 		break;
4116 	    }
4117 	    if (c) {
4118 		(*o_zconv)(JIS_X_0201_1976_K, c);
4119 		return;
4120 	    }
4121 	} else if (c2 == 0x25) {
4122 	    /* JISX0208 Katakana */
4123 	    static const int fullwidth_to_halfwidth[] =
4124 	    {
4125 		0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4126 		0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4127 		0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4128 		0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4129 		0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4130 		0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4131 		0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4132 		0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4133 		0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4134 		0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4135 		0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4136 		0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4137 	    };
4138 	    if (fullwidth_to_halfwidth[c1-0x20]){
4139 		c2 = fullwidth_to_halfwidth[c1-0x20];
4140 		(*o_zconv)(JIS_X_0201_1976_K, c2>>8);
4141 		if (c2 & 0xFF) {
4142 		    (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
4143 		}
4144 		return;
4145 	    }
4146 	} else if (c2 == 0 && nkf_char_unicode_p(c1) &&
4147 	    ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
4148 	    (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
4149 	    return;
4150 	}
4151     }
4152     (*o_zconv)(c2,c1);
4153 }
4154 
4155 
4156 #define rot13(c)  ( \
4157 		   ( c < 'A') ? c: \
4158 		   (c <= 'M')  ? (c + 13): \
4159 		   (c <= 'Z')  ? (c - 13): \
4160 		   (c < 'a')   ? (c): \
4161 		   (c <= 'm')  ? (c + 13): \
4162 		   (c <= 'z')  ? (c - 13): \
4163 		   (c) \
4164 		  )
4165 
4166 #define  rot47(c) ( \
4167 		   ( c < '!') ? c: \
4168 		   ( c <= 'O') ? (c + 47) : \
4169 		   ( c <= '~') ?  (c - 47) : \
4170 		   c \
4171 		  )
4172 
4173 static void
rot_conv(nkf_char c2,nkf_char c1)4174 rot_conv(nkf_char c2, nkf_char c1)
4175 {
4176     if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
4177 	c1 = rot13(c1);
4178     } else if (c2) {
4179 	c1 = rot47(c1);
4180 	c2 = rot47(c2);
4181     }
4182     (*o_rot_conv)(c2,c1);
4183 }
4184 
4185 static void
hira_conv(nkf_char c2,nkf_char c1)4186 hira_conv(nkf_char c2, nkf_char c1)
4187 {
4188     if (hira_f & 1) {
4189 	if (c2 == 0x25) {
4190 	    if (0x20 < c1 && c1 < 0x74) {
4191 		c2 = 0x24;
4192 		(*o_hira_conv)(c2,c1);
4193 		return;
4194 	    } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
4195 		c2 = 0;
4196 		c1 = nkf_char_unicode_new(0x3094);
4197 		(*o_hira_conv)(c2,c1);
4198 		return;
4199 	    }
4200 	} else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4201 	    c1 += 2;
4202 	    (*o_hira_conv)(c2,c1);
4203 	    return;
4204 	}
4205     }
4206     if (hira_f & 2) {
4207 	if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
4208 	    c2 = 0x25;
4209 	    c1 = 0x74;
4210 	} else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4211 	    c2 = 0x25;
4212 	} else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4213 	    c1 -= 2;
4214 	}
4215     }
4216     (*o_hira_conv)(c2,c1);
4217 }
4218 
4219 
4220 static void
iso2022jp_check_conv(nkf_char c2,nkf_char c1)4221 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4222 {
4223 #define RANGE_NUM_MAX 18
4224     static const nkf_char range[RANGE_NUM_MAX][2] = {
4225 	{0x222f, 0x2239,},
4226 	{0x2242, 0x2249,},
4227 	{0x2251, 0x225b,},
4228 	{0x226b, 0x2271,},
4229 	{0x227a, 0x227d,},
4230 	{0x2321, 0x232f,},
4231 	{0x233a, 0x2340,},
4232 	{0x235b, 0x2360,},
4233 	{0x237b, 0x237e,},
4234 	{0x2474, 0x247e,},
4235 	{0x2577, 0x257e,},
4236 	{0x2639, 0x2640,},
4237 	{0x2659, 0x267e,},
4238 	{0x2742, 0x2750,},
4239 	{0x2772, 0x277e,},
4240 	{0x2841, 0x287e,},
4241 	{0x4f54, 0x4f7e,},
4242 	{0x7425, 0x747e},
4243     };
4244     nkf_char i;
4245     nkf_char start, end, c;
4246 
4247     if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4248 	c2 = GETA1;
4249 	c1 = GETA2;
4250     }
4251     if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4252 	c2 = GETA1;
4253 	c1 = GETA2;
4254     }
4255 
4256     for (i = 0; i < RANGE_NUM_MAX; i++) {
4257 	start = range[i][0];
4258 	end   = range[i][1];
4259 	c     = (c2 << 8) + c1;
4260 	if (c >= start && c <= end) {
4261 	    c2 = GETA1;
4262 	    c1 = GETA2;
4263 	}
4264     }
4265     (*o_iso2022jp_check_conv)(c2,c1);
4266 }
4267 
4268 
4269 /* This converts  =?ISO-2022-JP?B?HOGE HOGE?= */
4270 
4271 static const unsigned char *mime_pattern[] = {
4272     (const unsigned char *)"\075?EUC-JP?B?",
4273     (const unsigned char *)"\075?SHIFT_JIS?B?",
4274     (const unsigned char *)"\075?ISO-8859-1?Q?",
4275     (const unsigned char *)"\075?ISO-8859-1?B?",
4276     (const unsigned char *)"\075?ISO-2022-JP?B?",
4277     (const unsigned char *)"\075?ISO-2022-JP?B?",
4278     (const unsigned char *)"\075?ISO-2022-JP?Q?",
4279 #if defined(UTF8_INPUT_ENABLE)
4280     (const unsigned char *)"\075?UTF-8?B?",
4281     (const unsigned char *)"\075?UTF-8?Q?",
4282 #endif
4283     (const unsigned char *)"\075?US-ASCII?Q?",
4284     NULL
4285 };
4286 
4287 
4288 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
4289 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
4290     e_iconv, s_iconv, 0, 0, 0, 0, 0,
4291 #if defined(UTF8_INPUT_ENABLE)
4292     w_iconv, w_iconv,
4293 #endif
4294     0,
4295 };
4296 
4297 static const nkf_char mime_encode[] = {
4298     EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
4299 #if defined(UTF8_INPUT_ENABLE)
4300     UTF_8, UTF_8,
4301 #endif
4302     ASCII,
4303     0
4304 };
4305 
4306 static const nkf_char mime_encode_method[] = {
4307     'B', 'B','Q', 'B', 'B', 'B', 'Q',
4308 #if defined(UTF8_INPUT_ENABLE)
4309     'B', 'Q',
4310 #endif
4311     'Q',
4312     0
4313 };
4314 
4315 
4316 /* MIME preprocessor fifo */
4317 
4318 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
4319 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
4320 #define mime_input_buf(n)        mime_input_state.buf[(n)&MIME_BUF_MASK]
4321 static struct {
4322     unsigned char buf[MIME_BUF_SIZE];
4323     unsigned int  top;
4324     unsigned int  last;  /* decoded */
4325     unsigned int  input; /* undecoded */
4326 } mime_input_state;
4327 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
4328 
4329 #define MAXRECOVER 20
4330 
4331 static void
mime_input_buf_unshift(nkf_char c)4332 mime_input_buf_unshift(nkf_char c)
4333 {
4334     mime_input_buf(--mime_input_state.top) = (unsigned char)c;
4335 }
4336 
4337 static nkf_char
mime_ungetc(nkf_char c,ARG_UNUSED FILE * f)4338 mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
4339 {
4340     mime_input_buf_unshift(c);
4341     return c;
4342 }
4343 
4344 static nkf_char
mime_ungetc_buf(nkf_char c,FILE * f)4345 mime_ungetc_buf(nkf_char c, FILE *f)
4346 {
4347     if (mimebuf_f)
4348 	(*i_mungetc_buf)(c,f);
4349     else
4350 	mime_input_buf(--mime_input_state.input) = (unsigned char)c;
4351     return c;
4352 }
4353 
4354 static nkf_char
mime_getc_buf(FILE * f)4355 mime_getc_buf(FILE *f)
4356 {
4357     /* we don't keep eof of mime_input_buf, because it contains ?= as
4358        a terminator. It was checked in mime_integrity. */
4359     return ((mimebuf_f)?
4360 	    (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
4361 }
4362 
4363 static void
switch_mime_getc(void)4364 switch_mime_getc(void)
4365 {
4366     if (i_getc!=mime_getc) {
4367 	i_mgetc = i_getc; i_getc = mime_getc;
4368 	i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4369 	if(mime_f==STRICT_MIME) {
4370 	    i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4371 	    i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4372 	}
4373     }
4374 }
4375 
4376 static void
unswitch_mime_getc(void)4377 unswitch_mime_getc(void)
4378 {
4379     if(mime_f==STRICT_MIME) {
4380 	i_mgetc = i_mgetc_buf;
4381 	i_mungetc = i_mungetc_buf;
4382     }
4383     i_getc = i_mgetc;
4384     i_ungetc = i_mungetc;
4385     if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4386     mime_iconv_back = NULL;
4387 }
4388 
4389 static nkf_char
mime_integrity(FILE * f,const unsigned char * p)4390 mime_integrity(FILE *f, const unsigned char *p)
4391 {
4392     nkf_char c,d;
4393     unsigned int q;
4394     /* In buffered mode, read until =? or NL or buffer full
4395      */
4396     mime_input_state.input = mime_input_state.top;
4397     mime_input_state.last = mime_input_state.top;
4398 
4399     while(*p) mime_input_buf(mime_input_state.input++) = *p++;
4400     d = 0;
4401     q = mime_input_state.input;
4402     while((c=(*i_getc)(f))!=EOF) {
4403 	if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4404 	    break;   /* buffer full */
4405 	}
4406 	if (c=='=' && d=='?') {
4407 	    /* checked. skip header, start decode */
4408 	    mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4409 	    /* mime_last_input = mime_input_state.input; */
4410 	    mime_input_state.input = q;
4411 	    switch_mime_getc();
4412 	    return 1;
4413 	}
4414 	if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4415 	    break;
4416 	/* Should we check length mod 4? */
4417 	mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4418 	d=c;
4419     }
4420     /* In case of Incomplete MIME, no MIME decode  */
4421     mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4422     mime_input_state.last = mime_input_state.input;     /* point undecoded buffer */
4423     mime_decode_mode = 1;              /* no decode on mime_input_buf last in mime_getc */
4424     switch_mime_getc();         /* anyway we need buffered getc */
4425     return 1;
4426 }
4427 
4428 static nkf_char
mime_begin_strict(FILE * f)4429 mime_begin_strict(FILE *f)
4430 {
4431     nkf_char c1 = 0;
4432     int i,j,k;
4433     const unsigned char *p,*q;
4434     nkf_char r[MAXRECOVER];    /* recovery buffer, max mime pattern length */
4435 
4436     mime_decode_mode = FALSE;
4437     /* =? has been checked */
4438     j = 0;
4439     p = mime_pattern[j];
4440     r[0]='='; r[1]='?';
4441 
4442     for(i=2;p[i]>SP;i++) {                   /* start at =? */
4443 	if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4444 	    /* pattern fails, try next one */
4445 	    q = p;
4446 	    while (mime_pattern[++j]) {
4447 		p = mime_pattern[j];
4448 		for(k=2;k<i;k++)              /* assume length(p) > i */
4449 		    if (p[k]!=q[k]) break;
4450 		if (k==i && nkf_toupper(c1)==p[k]) break;
4451 	    }
4452 	    p = mime_pattern[j];
4453 	    if (p) continue;  /* found next one, continue */
4454 	    /* all fails, output from recovery buffer */
4455 	    (*i_ungetc)(c1,f);
4456 	    for(j=0;j<i;j++) {
4457 		(*oconv)(0,r[j]);
4458 	    }
4459 	    return c1;
4460 	}
4461     }
4462     mime_decode_mode = p[i-2];
4463 
4464     mime_iconv_back = iconv;
4465     set_iconv(FALSE, mime_priority_func[j]);
4466     clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4467 
4468     if (mime_decode_mode=='B') {
4469 	mimebuf_f = unbuf_f;
4470 	if (!unbuf_f) {
4471 	    /* do MIME integrity check */
4472 	    return mime_integrity(f,mime_pattern[j]);
4473 	}
4474     }
4475     switch_mime_getc();
4476     mimebuf_f = TRUE;
4477     return c1;
4478 }
4479 
4480 static nkf_char
mime_begin(FILE * f)4481 mime_begin(FILE *f)
4482 {
4483     nkf_char c1 = 0;
4484     int i,k;
4485 
4486     /* In NONSTRICT mode, only =? is checked. In case of failure, we  */
4487     /* re-read and convert again from mime_buffer.  */
4488 
4489     /* =? has been checked */
4490     k = mime_input_state.last;
4491     mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4492     for(i=2;i<MAXRECOVER;i++) {                   /* start at =? */
4493 	/* We accept any character type even if it is breaked by new lines */
4494 	c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4495 	if (c1==LF||c1==SP||c1==CR||
4496 	    c1=='-'||c1=='_'||is_alnum(c1)) continue;
4497 	if (c1=='=') {
4498 	    /* Failed. But this could be another MIME preemble */
4499 	    (*i_ungetc)(c1,f);
4500 	    mime_input_state.last--;
4501 	    break;
4502 	}
4503 	if (c1!='?') break;
4504 	else {
4505 	    /* c1=='?' */
4506 	    c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4507 	    if (!(++i<MAXRECOVER) || c1==EOF) break;
4508 	    if (c1=='b'||c1=='B') {
4509 		mime_decode_mode = 'B';
4510 	    } else if (c1=='q'||c1=='Q') {
4511 		mime_decode_mode = 'Q';
4512 	    } else {
4513 		break;
4514 	    }
4515 	    c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4516 	    if (!(++i<MAXRECOVER) || c1==EOF) break;
4517 	    if (c1!='?') {
4518 		mime_decode_mode = FALSE;
4519 	    }
4520 	    break;
4521 	}
4522     }
4523     switch_mime_getc();
4524     if (!mime_decode_mode) {
4525 	/* false MIME premble, restart from mime_buffer */
4526 	mime_decode_mode = 1;  /* no decode, but read from the mime_buffer */
4527 	/* Since we are in MIME mode until buffer becomes empty,    */
4528 	/* we never go into mime_begin again for a while.           */
4529 	return c1;
4530     }
4531     /* discard mime preemble, and goto MIME mode */
4532     mime_input_state.last = k;
4533     /* do no MIME integrity check */
4534     return c1;   /* used only for checking EOF */
4535 }
4536 
4537 #ifdef CHECK_OPTION
4538 static void
no_putc(ARG_UNUSED nkf_char c)4539 no_putc(ARG_UNUSED nkf_char c)
4540 {
4541     ;
4542 }
4543 
4544 static void
debug(const char * str)4545 debug(const char *str)
4546 {
4547     if (debug_f){
4548 	fprintf(stderr, "%s\n", str ? str : "NULL");
4549     }
4550 }
4551 #endif
4552 
4553 static void
set_input_codename(const char * codename)4554 set_input_codename(const char *codename)
4555 {
4556     if (!input_codename) {
4557 	input_codename = codename;
4558     } else if (strcmp(codename, input_codename) != 0) {
4559 	input_codename = "";
4560     }
4561 }
4562 
4563 static const char*
get_guessed_code(void)4564 get_guessed_code(void)
4565 {
4566     if (input_codename && !*input_codename) {
4567 	input_codename = "BINARY";
4568     } else {
4569 	struct input_code *p = find_inputcode_byfunc(iconv);
4570 	if (!input_codename) {
4571 	    input_codename = "ASCII";
4572 	} else if (strcmp(input_codename, "Shift_JIS") == 0) {
4573 	    if (p->score & (SCORE_DEPEND|SCORE_CP932))
4574 		input_codename = "CP932";
4575 	} else if (strcmp(input_codename, "EUC-JP") == 0) {
4576 	    if (p->score & SCORE_X0213)
4577 		input_codename = "EUC-JIS-2004";
4578 	    else if (p->score & (SCORE_X0212))
4579 		input_codename = "EUCJP-MS";
4580 	    else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4581 		input_codename = "CP51932";
4582 	} else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4583 	    if (p->score & (SCORE_KANA))
4584 		input_codename = "CP50221";
4585 	    else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4586 		input_codename = "CP50220";
4587 	}
4588     }
4589     return input_codename;
4590 }
4591 
4592 #if !defined(PERL_XS) && !defined(WIN32DLL)
4593 static void
print_guessed_code(char * filename)4594 print_guessed_code(char *filename)
4595 {
4596     if (filename != NULL) printf("%s: ", filename);
4597     if (input_codename && !*input_codename) {
4598 	printf("BINARY\n");
4599     } else {
4600 	input_codename = get_guessed_code();
4601 	if (guess_f == 1) {
4602 	    printf("%s\n", input_codename);
4603 	} else {
4604 	    printf("%s%s%s%s\n",
4605 		   input_codename,
4606 		   iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4607 		   input_endian == ENDIAN_LITTLE ? " LE" :
4608 		   input_endian == ENDIAN_BIG ? " BE" :
4609 		   "[BUG]",
4610 		   input_bom_f ? " (BOM)" : "",
4611 		   input_eol == CR   ? " (CR)" :
4612 		   input_eol == LF   ? " (LF)" :
4613 		   input_eol == CRLF ? " (CRLF)" :
4614 		   input_eol == EOF  ? " (MIXED NL)" :
4615 		   "");
4616 	}
4617     }
4618 }
4619 #endif /*WIN32DLL*/
4620 
4621 #ifdef INPUT_OPTION
4622 
4623 static nkf_char
hex_getc(nkf_char ch,FILE * f,nkf_char (* g)(FILE * f),nkf_char (* u)(nkf_char c,FILE * f))4624 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4625 {
4626     nkf_char c1, c2, c3;
4627     c1 = (*g)(f);
4628     if (c1 != ch){
4629 	return c1;
4630     }
4631     c2 = (*g)(f);
4632     if (!nkf_isxdigit(c2)){
4633 	(*u)(c2, f);
4634 	return c1;
4635     }
4636     c3 = (*g)(f);
4637     if (!nkf_isxdigit(c3)){
4638 	(*u)(c2, f);
4639 	(*u)(c3, f);
4640 	return c1;
4641     }
4642     return (hex2bin(c2) << 4) | hex2bin(c3);
4643 }
4644 
4645 static nkf_char
cap_getc(FILE * f)4646 cap_getc(FILE *f)
4647 {
4648     return hex_getc(':', f, i_cgetc, i_cungetc);
4649 }
4650 
4651 static nkf_char
cap_ungetc(nkf_char c,FILE * f)4652 cap_ungetc(nkf_char c, FILE *f)
4653 {
4654     return (*i_cungetc)(c, f);
4655 }
4656 
4657 static nkf_char
url_getc(FILE * f)4658 url_getc(FILE *f)
4659 {
4660     return hex_getc('%', f, i_ugetc, i_uungetc);
4661 }
4662 
4663 static nkf_char
url_ungetc(nkf_char c,FILE * f)4664 url_ungetc(nkf_char c, FILE *f)
4665 {
4666     return (*i_uungetc)(c, f);
4667 }
4668 #endif
4669 
4670 #ifdef NUMCHAR_OPTION
4671 static nkf_char
numchar_getc(FILE * f)4672 numchar_getc(FILE *f)
4673 {
4674     nkf_char (*g)(FILE *) = i_ngetc;
4675     nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4676     int i = 0, j;
4677     nkf_char buf[12];
4678     nkf_char c = -1;
4679 
4680     buf[i] = (*g)(f);
4681     if (buf[i] == '&'){
4682 	buf[++i] = (*g)(f);
4683 	if (buf[i] == '#'){
4684 	    c = 0;
4685 	    buf[++i] = (*g)(f);
4686 	    if (buf[i] == 'x' || buf[i] == 'X'){
4687 		for (j = 0; j < 7; j++){
4688 		    buf[++i] = (*g)(f);
4689 		    if (!nkf_isxdigit(buf[i])){
4690 			if (buf[i] != ';'){
4691 			    c = -1;
4692 			}
4693 			break;
4694 		    }
4695 		    c <<= 4;
4696 		    c |= hex2bin(buf[i]);
4697 		}
4698 	    }else{
4699 		for (j = 0; j < 8; j++){
4700 		    if (j){
4701 			buf[++i] = (*g)(f);
4702 		    }
4703 		    if (!nkf_isdigit(buf[i])){
4704 			if (buf[i] != ';'){
4705 			    c = -1;
4706 			}
4707 			break;
4708 		    }
4709 		    c *= 10;
4710 		    c += hex2bin(buf[i]);
4711 		}
4712 	    }
4713 	}
4714     }
4715     if (c != -1){
4716 	return nkf_char_unicode_new(c);
4717     }
4718     while (i > 0){
4719 	(*u)(buf[i], f);
4720 	--i;
4721     }
4722     return buf[0];
4723 }
4724 
4725 static nkf_char
numchar_ungetc(nkf_char c,FILE * f)4726 numchar_ungetc(nkf_char c, FILE *f)
4727 {
4728     return (*i_nungetc)(c, f);
4729 }
4730 #endif
4731 
4732 #ifdef UNICODE_NORMALIZATION
4733 
4734 static nkf_char
nfc_getc(FILE * f)4735 nfc_getc(FILE *f)
4736 {
4737     nkf_char (*g)(FILE *f) = i_nfc_getc;
4738     nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4739     nkf_buf_t *buf = nkf_state->nfc_buf;
4740     const unsigned char *array;
4741     int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4742     nkf_char c = (*g)(f);
4743 
4744     if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4745 
4746     nkf_buf_push(buf, c);
4747     do {
4748 	while (lower <= upper) {
4749 	    int mid = (lower+upper) / 2;
4750 	    int len;
4751 	    array = normalization_table[mid].nfd;
4752 	    for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4753 		if (len >= nkf_buf_length(buf)) {
4754 		    c = (*g)(f);
4755 		    if (c == EOF) {
4756 			len = 0;
4757 			lower = 1, upper = 0;
4758 			break;
4759 		    }
4760 		    nkf_buf_push(buf, c);
4761 		}
4762 		if (array[len] != nkf_buf_at(buf, len)) {
4763 		    if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4764 		    else  upper = mid - 1;
4765 		    len = 0;
4766 		    break;
4767 		}
4768 	    }
4769 	    if (len > 0) {
4770 		int i;
4771 		array = normalization_table[mid].nfc;
4772 		nkf_buf_clear(buf);
4773 		for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4774 		    nkf_buf_push(buf, array[i]);
4775 		break;
4776 	    }
4777 	}
4778     } while (lower <= upper);
4779 
4780     while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4781     c = nkf_buf_pop(buf);
4782 
4783     return c;
4784 }
4785 
4786 static nkf_char
nfc_ungetc(nkf_char c,FILE * f)4787 nfc_ungetc(nkf_char c, FILE *f)
4788 {
4789     return (*i_nfc_ungetc)(c, f);
4790 }
4791 #endif /* UNICODE_NORMALIZATION */
4792 
4793 
4794 static nkf_char
base64decode(nkf_char c)4795 base64decode(nkf_char c)
4796 {
4797     int             i;
4798     if (c > '@') {
4799 	if (c < '[') {
4800 	    i = c - 'A';                        /* A..Z 0-25 */
4801 	} else if (c == '_') {
4802 	    i = '?'         /* 63 */ ;          /* _  63 */
4803 	} else {
4804 	    i = c - 'G'     /* - 'a' + 26 */ ;  /* a..z 26-51 */
4805 	}
4806     } else if (c > '/') {
4807 	i = c - '0' + '4'   /* - '0' + 52 */ ;  /* 0..9 52-61 */
4808     } else if (c == '+' || c == '-') {
4809 	i = '>'             /* 62 */ ;          /* + and -  62 */
4810     } else {
4811 	i = '?'             /* 63 */ ;          /* / 63 */
4812     }
4813     return (i);
4814 }
4815 
4816 static nkf_char
mime_getc(FILE * f)4817 mime_getc(FILE *f)
4818 {
4819     nkf_char c1, c2, c3, c4, cc;
4820     nkf_char t1, t2, t3, t4, mode, exit_mode;
4821     nkf_char lwsp_count;
4822     char *lwsp_buf;
4823     char *lwsp_buf_new;
4824     nkf_char lwsp_size = 128;
4825 
4826     if (mime_input_state.top != mime_input_state.last) {  /* Something is in FIFO */
4827 	return  mime_input_buf(mime_input_state.top++);
4828     }
4829     if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4830 	mime_decode_mode=FALSE;
4831 	unswitch_mime_getc();
4832 	return (*i_getc)(f);
4833     }
4834 
4835     if (mimebuf_f == FIXED_MIME)
4836 	exit_mode = mime_decode_mode;
4837     else
4838 	exit_mode = FALSE;
4839     if (mime_decode_mode == 'Q') {
4840 	if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4841       restart_mime_q:
4842 	if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4843 	if (c1<=SP || DEL<=c1) {
4844 	    mime_decode_mode = exit_mode; /* prepare for quit */
4845 	    return c1;
4846 	}
4847 	if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4848 	    return c1;
4849 	}
4850 
4851 	mime_decode_mode = exit_mode; /* prepare for quit */
4852 	if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4853 	if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4854 	    /* end Q encoding */
4855 	    input_mode = exit_mode;
4856 	    lwsp_count = 0;
4857 	    lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4858 	    while ((c1=(*i_getc)(f))!=EOF) {
4859 		switch (c1) {
4860 		case LF:
4861 		case CR:
4862 		    if (c1==LF) {
4863 			if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4864 			    i_ungetc(SP,f);
4865 			    continue;
4866 			} else {
4867 			    i_ungetc(c1,f);
4868 			}
4869 			c1 = LF;
4870 		    } else {
4871 			if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4872 			    if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4873 				i_ungetc(SP,f);
4874 				continue;
4875 			    } else {
4876 				i_ungetc(c1,f);
4877 			    }
4878 			    i_ungetc(LF,f);
4879 			} else {
4880 			    i_ungetc(c1,f);
4881 			}
4882 			c1 = CR;
4883 		    }
4884 		    break;
4885 		case SP:
4886 		case TAB:
4887 		    lwsp_buf[lwsp_count] = (unsigned char)c1;
4888 		    if (lwsp_count++>lwsp_size){
4889 			lwsp_size <<= 1;
4890 			lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4891 			lwsp_buf = lwsp_buf_new;
4892 		    }
4893 		    continue;
4894 		}
4895 		break;
4896 	    }
4897 	    if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4898 		i_ungetc(c1,f);
4899 		for(lwsp_count--;lwsp_count>0;lwsp_count--)
4900 		    i_ungetc(lwsp_buf[lwsp_count],f);
4901 		c1 = lwsp_buf[0];
4902 	    }
4903 	    nkf_xfree(lwsp_buf);
4904 	    return c1;
4905 	}
4906 	if (c1=='='&&c2<SP) { /* this is soft wrap */
4907 	    while((c1 =  (*i_mgetc)(f)) <=SP) {
4908 		if (c1 == EOF) return (EOF);
4909 	    }
4910 	    mime_decode_mode = 'Q'; /* still in MIME */
4911 	    goto restart_mime_q;
4912 	}
4913 	if (c1=='?') {
4914 	    mime_decode_mode = 'Q'; /* still in MIME */
4915 	    (*i_mungetc)(c2,f);
4916 	    return c1;
4917 	}
4918 	if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4919 	if (c2<=SP) return c2;
4920 	mime_decode_mode = 'Q'; /* still in MIME */
4921 	return ((hex2bin(c2)<<4) + hex2bin(c3));
4922     }
4923 
4924     if (mime_decode_mode != 'B') {
4925 	mime_decode_mode = FALSE;
4926 	return (*i_mgetc)(f);
4927     }
4928 
4929 
4930     /* Base64 encoding */
4931     /*
4932        MIME allows line break in the middle of
4933        Base64, but we are very pessimistic in decoding
4934        in unbuf mode because MIME encoded code may broken by
4935        less or editor's control sequence (such as ESC-[-K in unbuffered
4936        mode. ignore incomplete MIME.
4937      */
4938     mode = mime_decode_mode;
4939     mime_decode_mode = exit_mode;  /* prepare for quit */
4940 
4941     while ((c1 = (*i_mgetc)(f))<=SP) {
4942 	if (c1==EOF)
4943 	    return (EOF);
4944     }
4945   mime_c2_retry:
4946     if ((c2 = (*i_mgetc)(f))<=SP) {
4947 	if (c2==EOF)
4948 	    return (EOF);
4949 	if (mime_f != STRICT_MIME) goto mime_c2_retry;
4950 	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4951 	return c2;
4952     }
4953     if ((c1 == '?') && (c2 == '=')) {
4954 	input_mode = ASCII;
4955 	lwsp_count = 0;
4956 	lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4957 	while ((c1=(*i_getc)(f))!=EOF) {
4958 	    switch (c1) {
4959 	    case LF:
4960 	    case CR:
4961 		if (c1==LF) {
4962 		    if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4963 			i_ungetc(SP,f);
4964 			continue;
4965 		    } else {
4966 			i_ungetc(c1,f);
4967 		    }
4968 		    c1 = LF;
4969 		} else {
4970 		    if ((c1=(*i_getc)(f))!=EOF) {
4971 			if (c1==SP) {
4972 			    i_ungetc(SP,f);
4973 			    continue;
4974 			} else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4975 			    i_ungetc(SP,f);
4976 			    continue;
4977 			} else {
4978 			    i_ungetc(c1,f);
4979 			}
4980 			i_ungetc(LF,f);
4981 		    } else {
4982 			i_ungetc(c1,f);
4983 		    }
4984 		    c1 = CR;
4985 		}
4986 		break;
4987 	    case SP:
4988 	    case TAB:
4989 		lwsp_buf[lwsp_count] = (unsigned char)c1;
4990 		if (lwsp_count++>lwsp_size){
4991 		    lwsp_size <<= 1;
4992 		    lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4993 		    lwsp_buf = lwsp_buf_new;
4994 		}
4995 		continue;
4996 	    }
4997 	    break;
4998 	}
4999 	if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
5000 	    i_ungetc(c1,f);
5001 	    for(lwsp_count--;lwsp_count>0;lwsp_count--)
5002 		i_ungetc(lwsp_buf[lwsp_count],f);
5003 	    c1 = lwsp_buf[0];
5004 	}
5005 	nkf_xfree(lwsp_buf);
5006 	return c1;
5007     }
5008   mime_c3_retry:
5009     if ((c3 = (*i_mgetc)(f))<=SP) {
5010 	if (c3==EOF)
5011 	    return (EOF);
5012 	if (mime_f != STRICT_MIME) goto mime_c3_retry;
5013 	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5014 	return c3;
5015     }
5016   mime_c4_retry:
5017     if ((c4 = (*i_mgetc)(f))<=SP) {
5018 	if (c4==EOF)
5019 	    return (EOF);
5020 	if (mime_f != STRICT_MIME) goto mime_c4_retry;
5021 	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5022 	return c4;
5023     }
5024 
5025     mime_decode_mode = mode; /* still in MIME sigh... */
5026 
5027     /* BASE 64 decoding */
5028 
5029     t1 = 0x3f & base64decode(c1);
5030     t2 = 0x3f & base64decode(c2);
5031     t3 = 0x3f & base64decode(c3);
5032     t4 = 0x3f & base64decode(c4);
5033     cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5034     if (c2 != '=') {
5035 	mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5036 	cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5037 	if (c3 != '=') {
5038 	    mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5039 	    cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5040 	    if (c4 != '=')
5041 		mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5042 	}
5043     } else {
5044 	return c1;
5045     }
5046     return  mime_input_buf(mime_input_state.top++);
5047 }
5048 
5049 static const char basis_64[] =
5050     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5051 
5052 #define MIMEOUT_BUF_LENGTH 74
5053 static struct {
5054     unsigned char buf[MIMEOUT_BUF_LENGTH+1];
5055     int count;
5056 } mimeout_state;
5057 
5058 /*nkf_char mime_lastchar2, mime_lastchar1;*/
5059 
5060 static void
open_mime(nkf_char mode)5061 open_mime(nkf_char mode)
5062 {
5063     const unsigned char *p;
5064     int i;
5065     int j;
5066     p  = mime_pattern[0];
5067     for(i=0;mime_pattern[i];i++) {
5068 	if (mode == mime_encode[i]) {
5069 	    p = mime_pattern[i];
5070 	    break;
5071 	}
5072     }
5073     mimeout_mode = mime_encode_method[i];
5074     i = 0;
5075     if (base64_count>45) {
5076 	if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
5077 	    (*o_mputc)(mimeout_state.buf[i]);
5078 	    i++;
5079 	}
5080 	put_newline(o_mputc);
5081 	(*o_mputc)(SP);
5082 	base64_count = 1;
5083 	if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
5084 	    i++;
5085 	}
5086     }
5087     for (;i<mimeout_state.count;i++) {
5088 	if (nkf_isspace(mimeout_state.buf[i])) {
5089 	    (*o_mputc)(mimeout_state.buf[i]);
5090 	    base64_count ++;
5091 	} else {
5092 	    break;
5093 	}
5094     }
5095     while(*p) {
5096 	(*o_mputc)(*p++);
5097 	base64_count ++;
5098     }
5099     j = mimeout_state.count;
5100     mimeout_state.count = 0;
5101     for (;i<j;i++) {
5102 	mime_putc(mimeout_state.buf[i]);
5103     }
5104 }
5105 
5106 static void
mime_prechar(nkf_char c2,nkf_char c1)5107 mime_prechar(nkf_char c2, nkf_char c1)
5108 {
5109     if (mimeout_mode > 0){
5110 	if (c2 == EOF){
5111 	    if (base64_count + mimeout_state.count/3*4> 73){
5112 		(*o_base64conv)(EOF,0);
5113 		oconv_newline(o_base64conv);
5114 		(*o_base64conv)(0,SP);
5115 		base64_count = 1;
5116 	    }
5117 	} else {
5118 	    if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
5119 		(*o_base64conv)(EOF,0);
5120 		oconv_newline(o_base64conv);
5121 		(*o_base64conv)(0,SP);
5122 		base64_count = 1;
5123 		mimeout_mode = -1;
5124 	    }
5125 	}
5126     } else if (c2) {
5127 	if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
5128 	    mimeout_mode =  (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
5129 	    open_mime(output_mode);
5130 	    (*o_base64conv)(EOF,0);
5131 	    oconv_newline(o_base64conv);
5132 	    (*o_base64conv)(0,SP);
5133 	    base64_count = 1;
5134 	    mimeout_mode = -1;
5135 	}
5136     }
5137 }
5138 
5139 static void
close_mime(void)5140 close_mime(void)
5141 {
5142     (*o_mputc)('?');
5143     (*o_mputc)('=');
5144     base64_count += 2;
5145     mimeout_mode = 0;
5146 }
5147 
5148 static void
eof_mime(void)5149 eof_mime(void)
5150 {
5151     switch(mimeout_mode) {
5152     case 'Q':
5153     case 'B':
5154 	break;
5155     case 2:
5156 	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
5157 	(*o_mputc)('=');
5158 	(*o_mputc)('=');
5159 	base64_count += 3;
5160 	break;
5161     case 1:
5162 	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
5163 	(*o_mputc)('=');
5164 	base64_count += 2;
5165 	break;
5166     }
5167     if (mimeout_mode > 0) {
5168 	if (mimeout_f!=FIXED_MIME) {
5169 	    close_mime();
5170 	} else if (mimeout_mode != 'Q')
5171 	    mimeout_mode = 'B';
5172     }
5173 }
5174 
5175 static void
mimeout_addchar(nkf_char c)5176 mimeout_addchar(nkf_char c)
5177 {
5178     switch(mimeout_mode) {
5179     case 'Q':
5180 	if (c==CR||c==LF) {
5181 	    (*o_mputc)(c);
5182 	    base64_count = 0;
5183 	} else if(!nkf_isalnum(c)) {
5184 	    (*o_mputc)('=');
5185 	    (*o_mputc)(bin2hex(((c>>4)&0xf)));
5186 	    (*o_mputc)(bin2hex((c&0xf)));
5187 	    base64_count += 3;
5188 	} else {
5189 	    (*o_mputc)(c);
5190 	    base64_count++;
5191 	}
5192 	break;
5193     case 'B':
5194 	nkf_state->mimeout_state=c;
5195 	(*o_mputc)(basis_64[c>>2]);
5196 	mimeout_mode=2;
5197 	base64_count ++;
5198 	break;
5199     case 2:
5200 	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5201 	nkf_state->mimeout_state=c;
5202 	mimeout_mode=1;
5203 	base64_count ++;
5204 	break;
5205     case 1:
5206 	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5207 	(*o_mputc)(basis_64[c & 0x3F]);
5208 	mimeout_mode='B';
5209 	base64_count += 2;
5210 	break;
5211     default:
5212 	(*o_mputc)(c);
5213 	base64_count++;
5214 	break;
5215     }
5216 }
5217 
5218 static void
mime_putc(nkf_char c)5219 mime_putc(nkf_char c)
5220 {
5221     int i, j;
5222     nkf_char lastchar;
5223 
5224     if (mimeout_f == FIXED_MIME){
5225 	if (mimeout_mode == 'Q'){
5226 	    if (base64_count > 71){
5227 		if (c!=CR && c!=LF) {
5228 		    (*o_mputc)('=');
5229 		    put_newline(o_mputc);
5230 		}
5231 		base64_count = 0;
5232 	    }
5233 	}else{
5234 	    if (base64_count > 71){
5235 		eof_mime();
5236 		put_newline(o_mputc);
5237 		base64_count = 0;
5238 	    }
5239 	    if (c == EOF) { /* c==EOF */
5240 		eof_mime();
5241 	    }
5242 	}
5243 	if (c != EOF) { /* c==EOF */
5244 	    mimeout_addchar(c);
5245 	}
5246 	return;
5247     }
5248 
5249     /* mimeout_f != FIXED_MIME */
5250 
5251     if (c == EOF) { /* c==EOF */
5252 	if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5253 	j = mimeout_state.count;
5254 	mimeout_state.count = 0;
5255 	i = 0;
5256 	if (mimeout_mode > 0) {
5257 	    if (!nkf_isblank(mimeout_state.buf[j-1])) {
5258 		for (;i<j;i++) {
5259 		    if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5260 			break;
5261 		    }
5262 		    mimeout_addchar(mimeout_state.buf[i]);
5263 		}
5264 		eof_mime();
5265 		for (;i<j;i++) {
5266 		    mimeout_addchar(mimeout_state.buf[i]);
5267 		}
5268 	    } else {
5269 		for (;i<j;i++) {
5270 		    mimeout_addchar(mimeout_state.buf[i]);
5271 		}
5272 		eof_mime();
5273 	    }
5274 	} else {
5275 	    for (;i<j;i++) {
5276 		mimeout_addchar(mimeout_state.buf[i]);
5277 	    }
5278 	}
5279 	return;
5280     }
5281 
5282     if (mimeout_state.count > 0){
5283 	lastchar = mimeout_state.buf[mimeout_state.count - 1];
5284     }else{
5285 	lastchar = -1;
5286     }
5287 
5288     if (mimeout_mode=='Q') {
5289 	if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
5290 	    if (c == CR || c == LF) {
5291 		close_mime();
5292 		(*o_mputc)(c);
5293 		base64_count = 0;
5294 		return;
5295 	    } else if (c <= SP) {
5296 		close_mime();
5297 		if (base64_count > 70) {
5298 		    put_newline(o_mputc);
5299 		    base64_count = 0;
5300 		}
5301 		if (!nkf_isblank(c)) {
5302 		    (*o_mputc)(SP);
5303 		    base64_count++;
5304 		}
5305 	    } else {
5306 		if (base64_count > 70) {
5307 		    close_mime();
5308 		    put_newline(o_mputc);
5309 		    (*o_mputc)(SP);
5310 		    base64_count = 1;
5311 		    open_mime(output_mode);
5312 		}
5313 		if (!nkf_noescape_mime(c)) {
5314 		    mimeout_addchar(c);
5315 		    return;
5316 		}
5317 	    }
5318 	    if (c != 0x1B) {
5319 		(*o_mputc)(c);
5320 		base64_count++;
5321 		return;
5322 	    }
5323 	}
5324     }
5325 
5326     if (mimeout_mode <= 0) {
5327 	if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5328 		    output_mode == UTF_8)) {
5329 	    if (nkf_isspace(c)) {
5330 		int flag = 0;
5331 		if (mimeout_mode == -1) {
5332 		    flag = 1;
5333 		}
5334 		if (c==CR || c==LF) {
5335 		    if (flag) {
5336 			open_mime(output_mode);
5337 			output_mode = 0;
5338 		    } else {
5339 			base64_count = 0;
5340 		    }
5341 		}
5342 		for (i=0;i<mimeout_state.count;i++) {
5343 		    (*o_mputc)(mimeout_state.buf[i]);
5344 		    if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
5345 			base64_count = 0;
5346 		    }else{
5347 			base64_count++;
5348 		    }
5349 		}
5350 		if (flag) {
5351 		    eof_mime();
5352 		    base64_count = 0;
5353 		    mimeout_mode = 0;
5354 		}
5355 		mimeout_state.buf[0] = (char)c;
5356 		mimeout_state.count = 1;
5357 	    }else{
5358 		if (base64_count > 1
5359 		    && base64_count + mimeout_state.count > 76
5360 		    && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
5361 		    static const char *str = "boundary=\"";
5362 		    static int len = 10;
5363 		    i = 0;
5364 
5365 		    for (; i < mimeout_state.count - len; ++i) {
5366 			if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
5367 			    i += len - 2;
5368 			    break;
5369 			}
5370 		    }
5371 
5372 		    if (i == 0 || i == mimeout_state.count - len) {
5373 			put_newline(o_mputc);
5374 			base64_count = 0;
5375 			if (!nkf_isspace(mimeout_state.buf[0])){
5376 			    (*o_mputc)(SP);
5377 			    base64_count++;
5378 			}
5379 		    }
5380 		    else {
5381 			int j;
5382 			for (j = 0; j <= i; ++j) {
5383 			    (*o_mputc)(mimeout_state.buf[j]);
5384 			}
5385 			put_newline(o_mputc);
5386 			base64_count = 1;
5387 			for (; j <= mimeout_state.count; ++j) {
5388 			    mimeout_state.buf[j - i] = mimeout_state.buf[j];
5389 			}
5390 			mimeout_state.count -= i;
5391 		    }
5392 		}
5393 		mimeout_state.buf[mimeout_state.count++] = (char)c;
5394 		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5395 		    open_mime(output_mode);
5396 		}
5397 	    }
5398 	    return;
5399 	}else{
5400 	    if (lastchar==CR || lastchar == LF){
5401 		for (i=0;i<mimeout_state.count;i++) {
5402 		    (*o_mputc)(mimeout_state.buf[i]);
5403 		}
5404 		base64_count = 0;
5405 		mimeout_state.count = 0;
5406 	    }
5407 	    if (lastchar==SP) {
5408 		for (i=0;i<mimeout_state.count-1;i++) {
5409 		    (*o_mputc)(mimeout_state.buf[i]);
5410 		    base64_count++;
5411 		}
5412 		mimeout_state.buf[0] = SP;
5413 		mimeout_state.count = 1;
5414 	    }
5415 	    open_mime(output_mode);
5416 	}
5417     }else{
5418 	/* mimeout_mode == 'B', 1, 2 */
5419 	if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5420 		    output_mode == UTF_8)) {
5421 	    if (lastchar == CR || lastchar == LF){
5422 		if (nkf_isblank(c)) {
5423 		    for (i=0;i<mimeout_state.count;i++) {
5424 			mimeout_addchar(mimeout_state.buf[i]);
5425 		    }
5426 		    mimeout_state.count = 0;
5427 		} else {
5428 		    eof_mime();
5429 		    for (i=0;i<mimeout_state.count;i++) {
5430 			(*o_mputc)(mimeout_state.buf[i]);
5431 		    }
5432 		    base64_count = 0;
5433 		    mimeout_state.count = 0;
5434 		}
5435 		mimeout_state.buf[mimeout_state.count++] = (char)c;
5436 		return;
5437 	    }
5438 	    if (nkf_isspace(c)) {
5439 		for (i=0;i<mimeout_state.count;i++) {
5440 		    if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5441 			eof_mime();
5442 			for (i=0;i<mimeout_state.count;i++) {
5443 			    (*o_mputc)(mimeout_state.buf[i]);
5444 			    base64_count++;
5445 			}
5446 			mimeout_state.count = 0;
5447 		    }
5448 		}
5449 		mimeout_state.buf[mimeout_state.count++] = (char)c;
5450 		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5451 		    eof_mime();
5452 		    for (j=0;j<mimeout_state.count;j++) {
5453 			(*o_mputc)(mimeout_state.buf[j]);
5454 			base64_count++;
5455 		    }
5456 		    mimeout_state.count = 0;
5457 		}
5458 		return;
5459 	    }
5460 	    if (mimeout_state.count>0 && SP<c && c!='=') {
5461 		mimeout_state.buf[mimeout_state.count++] = (char)c;
5462 		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5463 		    j = mimeout_state.count;
5464 		    mimeout_state.count = 0;
5465 		    for (i=0;i<j;i++) {
5466 			mimeout_addchar(mimeout_state.buf[i]);
5467 		    }
5468 		}
5469 		return;
5470 	    }
5471 	}
5472     }
5473     if (mimeout_state.count>0) {
5474 	j = mimeout_state.count;
5475 	mimeout_state.count = 0;
5476 	for (i=0;i<j;i++) {
5477 	    if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5478 		break;
5479 	    mimeout_addchar(mimeout_state.buf[i]);
5480 	}
5481 	if (i<j) {
5482 	    eof_mime();
5483 	    base64_count=0;
5484 	    for (;i<j;i++) {
5485 		(*o_mputc)(mimeout_state.buf[i]);
5486 	    }
5487 	    open_mime(output_mode);
5488 	}
5489     }
5490     mimeout_addchar(c);
5491 }
5492 
5493 static void
base64_conv(nkf_char c2,nkf_char c1)5494 base64_conv(nkf_char c2, nkf_char c1)
5495 {
5496     mime_prechar(c2, c1);
5497     (*o_base64conv)(c2,c1);
5498 }
5499 
5500 #ifdef HAVE_ICONV_H
5501 typedef struct nkf_iconv_t {
5502     iconv_t cd;
5503     char *input_buffer;
5504     size_t input_buffer_size;
5505     char *output_buffer;
5506     size_t output_buffer_size;
5507 };
5508 
5509 static nkf_iconv_t
nkf_iconv_new(char * tocode,char * fromcode)5510 nkf_iconv_new(char *tocode, char *fromcode)
5511 {
5512     nkf_iconv_t converter;
5513 
5514     converter->input_buffer_size = IOBUF_SIZE;
5515     converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5516     converter->output_buffer_size = IOBUF_SIZE * 2;
5517     converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5518     converter->cd = iconv_open(tocode, fromcode);
5519     if (converter->cd == (iconv_t)-1)
5520     {
5521 	switch (errno) {
5522 	case EINVAL:
5523 	    perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5524 	    return -1;
5525 	default:
5526 	    perror("can't iconv_open");
5527 	}
5528     }
5529 }
5530 
5531 static size_t
nkf_iconv_convert(nkf_iconv_t * converter,FILE * input)5532 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5533 {
5534     size_t invalid = (size_t)0;
5535     char *input_buffer = converter->input_buffer;
5536     size_t input_length = (size_t)0;
5537     char *output_buffer = converter->output_buffer;
5538     size_t output_length = converter->output_buffer_size;
5539     int c;
5540 
5541     do {
5542 	if (c != EOF) {
5543 	    while ((c = (*i_getc)(f)) != EOF) {
5544 		input_buffer[input_length++] = c;
5545 		if (input_length < converter->input_buffer_size) break;
5546 	    }
5547 	}
5548 
5549 	size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5550 	while (output_length-- > 0) {
5551 	    (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5552 	}
5553 	if (ret == (size_t) - 1) {
5554 	    switch (errno) {
5555 	    case EINVAL:
5556 		if (input_buffer != converter->input_buffer)
5557 		    memmove(converter->input_buffer, input_buffer, input_length);
5558 		break;
5559 	    case E2BIG:
5560 		converter->output_buffer_size *= 2;
5561 		output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5562 		if (output_buffer == NULL) {
5563 		    perror("can't realloc");
5564 		    return -1;
5565 		}
5566 		converter->output_buffer = output_buffer;
5567 		break;
5568 	    default:
5569 		perror("can't iconv");
5570 		return -1;
5571 	    }
5572 	} else {
5573 	    invalid += ret;
5574 	}
5575     } while (1);
5576 
5577     return invalid;
5578 }
5579 
5580 
5581 static void
nkf_iconv_close(nkf_iconv_t * convert)5582 nkf_iconv_close(nkf_iconv_t *convert)
5583 {
5584     nkf_xfree(converter->inbuf);
5585     nkf_xfree(converter->outbuf);
5586     iconv_close(converter->cd);
5587 }
5588 #endif
5589 
5590 
5591 static void
reinit(void)5592 reinit(void)
5593 {
5594     {
5595 	struct input_code *p = input_code_list;
5596 	while (p->name){
5597 	    status_reinit(p++);
5598 	}
5599     }
5600     unbuf_f = FALSE;
5601     estab_f = FALSE;
5602     nop_f = FALSE;
5603     binmode_f = TRUE;
5604     rot_f = FALSE;
5605     hira_f = FALSE;
5606     alpha_f = FALSE;
5607     mime_f = MIME_DECODE_DEFAULT;
5608     mime_decode_f = FALSE;
5609     mimebuf_f = FALSE;
5610     broken_f = FALSE;
5611     iso8859_f = FALSE;
5612     mimeout_f = FALSE;
5613     x0201_f = NKF_UNSPECIFIED;
5614     iso2022jp_f = FALSE;
5615 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5616     ms_ucs_map_f = UCS_MAP_ASCII;
5617 #endif
5618 #ifdef UTF8_INPUT_ENABLE
5619     no_cp932ext_f = FALSE;
5620     no_best_fit_chars_f = FALSE;
5621     encode_fallback = NULL;
5622     unicode_subchar  = '?';
5623     input_endian = ENDIAN_BIG;
5624 #endif
5625 #ifdef UTF8_OUTPUT_ENABLE
5626     output_bom_f = FALSE;
5627     output_endian = ENDIAN_BIG;
5628 #endif
5629 #ifdef UNICODE_NORMALIZATION
5630     nfc_f = FALSE;
5631 #endif
5632 #ifdef INPUT_OPTION
5633     cap_f = FALSE;
5634     url_f = FALSE;
5635     numchar_f = FALSE;
5636 #endif
5637 #ifdef CHECK_OPTION
5638     noout_f = FALSE;
5639     debug_f = FALSE;
5640 #endif
5641     guess_f = 0;
5642 #ifdef EXEC_IO
5643     exec_f = 0;
5644 #endif
5645 #ifdef SHIFTJIS_CP932
5646     cp51932_f = TRUE;
5647     cp932inv_f = TRUE;
5648 #endif
5649 #ifdef X0212_ENABLE
5650     x0212_f = FALSE;
5651     x0213_f = FALSE;
5652 #endif
5653     {
5654 	int i;
5655 	for (i = 0; i < 256; i++){
5656 	    prefix_table[i] = 0;
5657 	}
5658     }
5659     hold_count = 0;
5660     mimeout_state.count = 0;
5661     mimeout_mode = 0;
5662     base64_count = 0;
5663     f_line = 0;
5664     f_prev = 0;
5665     fold_preserve_f = FALSE;
5666     fold_f = FALSE;
5667     fold_len = 0;
5668     kanji_intro = DEFAULT_J;
5669     ascii_intro = DEFAULT_R;
5670     fold_margin  = FOLD_MARGIN;
5671     o_zconv = no_connection;
5672     o_fconv = no_connection;
5673     o_eol_conv = no_connection;
5674     o_rot_conv = no_connection;
5675     o_hira_conv = no_connection;
5676     o_base64conv = no_connection;
5677     o_iso2022jp_check_conv = no_connection;
5678     o_putc = std_putc;
5679     i_getc = std_getc;
5680     i_ungetc = std_ungetc;
5681     i_bgetc = std_getc;
5682     i_bungetc = std_ungetc;
5683     o_mputc = std_putc;
5684     i_mgetc = std_getc;
5685     i_mungetc  = std_ungetc;
5686     i_mgetc_buf = std_getc;
5687     i_mungetc_buf = std_ungetc;
5688     output_mode = ASCII;
5689     input_mode =  ASCII;
5690     mime_decode_mode = FALSE;
5691     file_out_f = FALSE;
5692     eolmode_f = 0;
5693     input_eol = 0;
5694     prev_cr = 0;
5695     option_mode = 0;
5696     z_prev2=0,z_prev1=0;
5697 #ifdef CHECK_OPTION
5698     iconv_for_check = 0;
5699 #endif
5700     input_codename = NULL;
5701     input_encoding = NULL;
5702     output_encoding = NULL;
5703     nkf_state_init();
5704 #ifdef WIN32DLL
5705     reinitdll();
5706 #endif /*WIN32DLL*/
5707 }
5708 
5709 static int
module_connection(void)5710 module_connection(void)
5711 {
5712     if (input_encoding) set_input_encoding(input_encoding);
5713     if (!output_encoding) {
5714 	output_encoding = nkf_default_encoding();
5715     }
5716     if (!output_encoding) {
5717 	if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5718 	else return -1;
5719     }
5720     set_output_encoding(output_encoding);
5721     oconv = nkf_enc_to_oconv(output_encoding);
5722     o_putc = std_putc;
5723     if (nkf_enc_unicode_p(output_encoding))
5724 	output_mode = UTF_8;
5725 
5726     if (x0201_f == NKF_UNSPECIFIED) {
5727 	x0201_f = X0201_DEFAULT;
5728     }
5729 
5730     /* replace continuation module, from output side */
5731 
5732     /* output redirection */
5733 #ifdef CHECK_OPTION
5734     if (noout_f || guess_f){
5735 	o_putc = no_putc;
5736     }
5737 #endif
5738     if (mimeout_f) {
5739 	o_mputc = o_putc;
5740 	o_putc = mime_putc;
5741 	if (mimeout_f == TRUE) {
5742 	    o_base64conv = oconv; oconv = base64_conv;
5743 	}
5744 	/* base64_count = 0; */
5745     }
5746 
5747     if (eolmode_f || guess_f) {
5748 	o_eol_conv = oconv; oconv = eol_conv;
5749     }
5750     if (rot_f) {
5751 	o_rot_conv = oconv; oconv = rot_conv;
5752     }
5753     if (iso2022jp_f) {
5754 	o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5755     }
5756     if (hira_f) {
5757 	o_hira_conv = oconv; oconv = hira_conv;
5758     }
5759     if (fold_f) {
5760 	o_fconv = oconv; oconv = fold_conv;
5761 	f_line = 0;
5762     }
5763     if (alpha_f || x0201_f) {
5764 	o_zconv = oconv; oconv = z_conv;
5765     }
5766 
5767     i_getc = std_getc;
5768     i_ungetc = std_ungetc;
5769     /* input redirection */
5770 #ifdef INPUT_OPTION
5771     if (cap_f){
5772 	i_cgetc = i_getc; i_getc = cap_getc;
5773 	i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5774     }
5775     if (url_f){
5776 	i_ugetc = i_getc; i_getc = url_getc;
5777 	i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5778     }
5779 #endif
5780 #ifdef NUMCHAR_OPTION
5781     if (numchar_f){
5782 	i_ngetc = i_getc; i_getc = numchar_getc;
5783 	i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5784     }
5785 #endif
5786 #ifdef UNICODE_NORMALIZATION
5787     if (nfc_f){
5788 	i_nfc_getc = i_getc; i_getc = nfc_getc;
5789 	i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5790     }
5791 #endif
5792     if (mime_f && mimebuf_f==FIXED_MIME) {
5793 	i_mgetc = i_getc; i_getc = mime_getc;
5794 	i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5795     }
5796     if (broken_f & 1) {
5797 	i_bgetc = i_getc; i_getc = broken_getc;
5798 	i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5799     }
5800     if (input_encoding) {
5801 	set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5802     } else {
5803 	set_iconv(FALSE, e_iconv);
5804     }
5805 
5806     {
5807 	struct input_code *p = input_code_list;
5808 	while (p->name){
5809 	    status_reinit(p++);
5810 	}
5811     }
5812     return 0;
5813 }
5814 
5815 /*
5816    Conversion main loop. Code detection only.
5817  */
5818 
5819 #if !defined(PERL_XS) && !defined(WIN32DLL)
5820 static nkf_char
noconvert(FILE * f)5821 noconvert(FILE *f)
5822 {
5823     nkf_char    c;
5824 
5825     if (nop_f == 2)
5826 	module_connection();
5827     while ((c = (*i_getc)(f)) != EOF)
5828 	(*o_putc)(c);
5829     (*o_putc)(EOF);
5830     return 1;
5831 }
5832 #endif
5833 
5834 #define NEXT continue        /* no output, get next */
5835 #define SKIP c2=0;continue        /* no output, get next */
5836 #define MORE c2=c1;continue  /* need one more byte */
5837 #define SEND (void)0         /* output c1 and c2, get next */
5838 #define LAST break           /* end of loop, go closing  */
5839 #define set_input_mode(mode) do { \
5840     input_mode = mode; \
5841     shift_mode = 0; \
5842     set_input_codename("ISO-2022-JP"); \
5843     debug("ISO-2022-JP"); \
5844 } while (0)
5845 
5846 static int
kanji_convert(FILE * f)5847 kanji_convert(FILE *f)
5848 {
5849     nkf_char c1=0, c2=0, c3=0, c4=0;
5850     int shift_mode = 0; /* 0, 1, 2, 3 */
5851     int g2 = 0;
5852     int is_8bit = FALSE;
5853 
5854     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5855 	is_8bit = TRUE;
5856     }
5857 
5858     input_mode = ASCII;
5859     output_mode = ASCII;
5860 
5861     if (module_connection() < 0) {
5862 #if !defined(PERL_XS) && !defined(WIN32DLL)
5863 	fprintf(stderr, "no output encoding given\n");
5864 #endif
5865 	return -1;
5866     }
5867     check_bom(f);
5868 
5869 #ifdef UTF8_INPUT_ENABLE
5870     if(iconv == w_iconv32){
5871 	while ((c1 = (*i_getc)(f)) != EOF &&
5872 	       (c2 = (*i_getc)(f)) != EOF &&
5873 	       (c3 = (*i_getc)(f)) != EOF &&
5874 	       (c4 = (*i_getc)(f)) != EOF) {
5875 	    nkf_char c5, c6, c7, c8;
5876 	    if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5877 		if ((c5 = (*i_getc)(f)) != EOF &&
5878 		    (c6 = (*i_getc)(f)) != EOF &&
5879 		    (c7 = (*i_getc)(f)) != EOF &&
5880 		    (c8 = (*i_getc)(f)) != EOF) {
5881 		    if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5882 			(*i_ungetc)(c8, f);
5883 			(*i_ungetc)(c7, f);
5884 			(*i_ungetc)(c6, f);
5885 			(*i_ungetc)(c5, f);
5886 			nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5887 		    }
5888 		} else {
5889 		    nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5890 		}
5891 	    }
5892 	}
5893 	goto finished;
5894     }
5895     else if (iconv == w_iconv16) {
5896 	while ((c1 = (*i_getc)(f)) != EOF &&
5897 	       (c2 = (*i_getc)(f)) != EOF) {
5898 	    size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5899 	    if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5900 		(c3 = (*i_getc)(f)) != EOF &&
5901 		(c4 = (*i_getc)(f)) != EOF) {
5902 		nkf_iconv_utf_16(c1, c2, c3, c4);
5903 	    } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5904 		if ((c3 = (*i_getc)(f)) != EOF &&
5905 		    (c4 = (*i_getc)(f)) != EOF) {
5906 		    if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5907 			(*i_ungetc)(c4, f);
5908 			(*i_ungetc)(c3, f);
5909 			nkf_iconv_utf_16_nocombine(c1, c2);
5910 		    }
5911 		} else {
5912 		    nkf_iconv_utf_16_nocombine(c1, c2);
5913 		}
5914 	    }
5915 	}
5916 	goto finished;
5917     }
5918 #endif
5919 
5920     while ((c1 = (*i_getc)(f)) != EOF) {
5921 #ifdef INPUT_CODE_FIX
5922 	if (!input_encoding)
5923 #endif
5924 	    code_status(c1);
5925 	if (c2) {
5926 	    /* second byte */
5927 	    if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5928 		/* in case of 8th bit is on */
5929 		if (!estab_f&&!mime_decode_mode) {
5930 		    /* in case of not established yet */
5931 		    /* It is still ambiguous */
5932 		    if (h_conv(f, c2, c1)==EOF) {
5933 			LAST;
5934 		    }
5935 		    else {
5936 			SKIP;
5937 		    }
5938 		}
5939 		else {
5940 		    /* in case of already established */
5941 		    if (c1 < 0x40) {
5942 			/* ignore bogus code */
5943 			SKIP;
5944 		    } else {
5945 			SEND;
5946 		    }
5947 		}
5948 	    }
5949 	    else {
5950 		/* 2nd byte of 7 bit code or SJIS */
5951 		SEND;
5952 	    }
5953 	}
5954 	else if (nkf_char_unicode_p(c1)) {
5955 	    (*oconv)(0, c1);
5956 	    NEXT;
5957 	}
5958 	else {
5959 	    /* first byte */
5960 	    if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5961 		/* CP5022x */
5962 		MORE;
5963 	    }else if (input_codename && input_codename[0] == 'I' &&
5964 		    0xA1 <= c1 && c1 <= 0xDF) {
5965 		/* JIS X 0201 Katakana in 8bit JIS */
5966 		c2 = JIS_X_0201_1976_K;
5967 		c1 &= 0x7f;
5968 		SEND;
5969 	    } else if (c1 > DEL) {
5970 		/* 8 bit code */
5971 		if (!estab_f && !iso8859_f) {
5972 		    /* not established yet */
5973 		    MORE;
5974 		} else { /* estab_f==TRUE */
5975 		    if (iso8859_f) {
5976 			c2 = ISO_8859_1;
5977 			c1 &= 0x7f;
5978 			SEND;
5979 		    }
5980 		    else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5981 			     (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5982 			/* JIS X 0201 */
5983 			c2 = JIS_X_0201_1976_K;
5984 			c1 &= 0x7f;
5985 			SEND;
5986 		    }
5987 		    else {
5988 			/* already established */
5989 			MORE;
5990 		    }
5991 		}
5992 	    } else if (SP < c1 && c1 < DEL) {
5993 		/* in case of Roman characters */
5994 		if (shift_mode) {
5995 		    /* output 1 shifted byte */
5996 		    if (iso8859_f) {
5997 			c2 = ISO_8859_1;
5998 			SEND;
5999 		    } else if (nkf_byte_jisx0201_katakana_p(c1)){
6000 			/* output 1 shifted byte */
6001 			c2 = JIS_X_0201_1976_K;
6002 			SEND;
6003 		    } else {
6004 			/* look like bogus code */
6005 			SKIP;
6006 		    }
6007 		} else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
6008 			   input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
6009 		    /* in case of Kanji shifted */
6010 		    MORE;
6011 		} else if (c1 == '=' && mime_f && !mime_decode_mode) {
6012 		    /* Check MIME code */
6013 		    if ((c1 = (*i_getc)(f)) == EOF) {
6014 			(*oconv)(0, '=');
6015 			LAST;
6016 		    } else if (c1 == '?') {
6017 			/* =? is mime conversion start sequence */
6018 			if(mime_f == STRICT_MIME) {
6019 			    /* check in real detail */
6020 			    if (mime_begin_strict(f) == EOF)
6021 				LAST;
6022 			    SKIP;
6023 			} else if (mime_begin(f) == EOF)
6024 			    LAST;
6025 			SKIP;
6026 		    } else {
6027 			(*oconv)(0, '=');
6028 			(*i_ungetc)(c1,f);
6029 			SKIP;
6030 		    }
6031 		} else {
6032 		    /* normal ASCII code */
6033 		    SEND;
6034 		}
6035 	    } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
6036 		shift_mode = 0;
6037 		SKIP;
6038 	    } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
6039 		shift_mode = 1;
6040 		SKIP;
6041 	    } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
6042 		if ((c1 = (*i_getc)(f)) == EOF) {
6043 		    (*oconv)(0, ESC);
6044 		    LAST;
6045 		}
6046 		else if (c1 == '&') {
6047 		    /* IRR */
6048 		    if ((c1 = (*i_getc)(f)) == EOF) {
6049 			LAST;
6050 		    } else {
6051 			SKIP;
6052 		    }
6053 		}
6054 		else if (c1 == '$') {
6055 		    /* GZDMx */
6056 		    if ((c1 = (*i_getc)(f)) == EOF) {
6057 			/* don't send bogus code
6058 			   (*oconv)(0, ESC);
6059 			   (*oconv)(0, '$'); */
6060 			LAST;
6061 		    } else if (c1 == '@' || c1 == 'B') {
6062 			/* JIS X 0208 */
6063 			set_input_mode(JIS_X_0208);
6064 			SKIP;
6065 		    } else if (c1 == '(') {
6066 			/* GZDM4 */
6067 			if ((c1 = (*i_getc)(f)) == EOF) {
6068 			    /* don't send bogus code
6069 			       (*oconv)(0, ESC);
6070 			       (*oconv)(0, '$');
6071 			       (*oconv)(0, '(');
6072 			     */
6073 			    LAST;
6074 			} else if (c1 == '@'|| c1 == 'B') {
6075 			    /* JIS X 0208 */
6076 			    set_input_mode(JIS_X_0208);
6077 			    SKIP;
6078 #ifdef X0212_ENABLE
6079 			} else if (c1 == 'D'){
6080 			    set_input_mode(JIS_X_0212);
6081 			    SKIP;
6082 #endif /* X0212_ENABLE */
6083 			} else if (c1 == 'O' || c1 == 'Q'){
6084 			    set_input_mode(JIS_X_0213_1);
6085 			    SKIP;
6086 			} else if (c1 == 'P'){
6087 			    set_input_mode(JIS_X_0213_2);
6088 			    SKIP;
6089 			} else {
6090 			    /* could be some special code */
6091 			    (*oconv)(0, ESC);
6092 			    (*oconv)(0, '$');
6093 			    (*oconv)(0, '(');
6094 			    (*oconv)(0, c1);
6095 			    SKIP;
6096 			}
6097 		    } else if (broken_f&0x2) {
6098 			/* accept any ESC-(-x as broken code ... */
6099 			input_mode = JIS_X_0208;
6100 			shift_mode = 0;
6101 			SKIP;
6102 		    } else {
6103 			(*oconv)(0, ESC);
6104 			(*oconv)(0, '$');
6105 			(*oconv)(0, c1);
6106 			SKIP;
6107 		    }
6108 		} else if (c1 == '(') {
6109 		    /* GZD4 */
6110 		    if ((c1 = (*i_getc)(f)) == EOF) {
6111 			/* don't send bogus code
6112 			   (*oconv)(0, ESC);
6113 			   (*oconv)(0, '('); */
6114 			LAST;
6115 		    }
6116 		    else if (c1 == 'I') {
6117 			/* JIS X 0201 Katakana */
6118 			set_input_mode(JIS_X_0201_1976_K);
6119 			shift_mode = 1;
6120 			SKIP;
6121 		    }
6122 		    else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
6123 			/* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
6124 			set_input_mode(ASCII);
6125 			SKIP;
6126 		    }
6127 		    else if (broken_f&0x2) {
6128 			set_input_mode(ASCII);
6129 			SKIP;
6130 		    }
6131 		    else {
6132 			(*oconv)(0, ESC);
6133 			(*oconv)(0, '(');
6134 			SEND;
6135 		    }
6136 		}
6137 		else if (c1 == '.') {
6138 		    /* G2D6 */
6139 		    if ((c1 = (*i_getc)(f)) == EOF) {
6140 			LAST;
6141 		    }
6142 		    else if (c1 == 'A') {
6143 			/* ISO-8859-1 */
6144 			g2 = ISO_8859_1;
6145 			SKIP;
6146 		    }
6147 		    else {
6148 			(*oconv)(0, ESC);
6149 			(*oconv)(0, '.');
6150 			SEND;
6151 		    }
6152 		}
6153 		else if (c1 == 'N') {
6154 		    /* SS2 */
6155 		    c1 = (*i_getc)(f);
6156 		    if (g2 == ISO_8859_1) {
6157 			c2 = ISO_8859_1;
6158 			SEND;
6159 		    }else{
6160 			(*i_ungetc)(c1, f);
6161 			/* lonely ESC  */
6162 			(*oconv)(0, ESC);
6163 			SEND;
6164 		    }
6165 		}
6166 		else {
6167 		    i_ungetc(c1,f);
6168 		    /* lonely ESC  */
6169 		    (*oconv)(0, ESC);
6170 		    SKIP;
6171 		}
6172 	    } else if (c1 == ESC && iconv == s_iconv) {
6173 		/* ESC in Shift_JIS */
6174 		if ((c1 = (*i_getc)(f)) == EOF) {
6175 		    (*oconv)(0, ESC);
6176 		    LAST;
6177 		} else if (c1 == '$') {
6178 		    /* J-PHONE emoji */
6179 		    if ((c1 = (*i_getc)(f)) == EOF) {
6180 			LAST;
6181 		    } else if (('E' <= c1 && c1 <= 'G') ||
6182 			       ('O' <= c1 && c1 <= 'Q')) {
6183 			/*
6184 			   NUM : 0 1 2 3 4 5
6185 			   BYTE: G E F O P Q
6186 			   C%7 : 1 6 0 2 3 4
6187 			   C%7 : 0 1 2 3 4 5 6
6188 			   NUM : 2 0 3 4 5 X 1
6189 			 */
6190 			static const nkf_char jphone_emoji_first_table[7] =
6191 			{0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6192 			c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
6193 			if ((c1 = (*i_getc)(f)) == EOF) LAST;
6194 			while (SP <= c1 && c1 <= 'z') {
6195 			    (*oconv)(0, c1 + c3);
6196 			    if ((c1 = (*i_getc)(f)) == EOF) LAST;
6197 			}
6198 			SKIP;
6199 		    }
6200 		    else {
6201 			(*oconv)(0, ESC);
6202 			(*oconv)(0, '$');
6203 			SEND;
6204 		    }
6205 		}
6206 		else {
6207 		    i_ungetc(c1,f);
6208 		    /* lonely ESC  */
6209 		    (*oconv)(0, ESC);
6210 		    SKIP;
6211 		}
6212 	    } else if (c1 == LF || c1 == CR) {
6213 		if (broken_f&4) {
6214 		    input_mode = ASCII; set_iconv(FALSE, 0);
6215 		    SEND;
6216 		} else if (mime_decode_f && !mime_decode_mode){
6217 		    if (c1 == LF) {
6218 			if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
6219 			    i_ungetc(SP,f);
6220 			    continue;
6221 			} else {
6222 			    i_ungetc(c1,f);
6223 			}
6224 			c1 = LF;
6225 			SEND;
6226 		    } else  { /* if (c1 == CR)*/
6227 			if ((c1=(*i_getc)(f))!=EOF) {
6228 			    if (c1==SP) {
6229 				i_ungetc(SP,f);
6230 				continue;
6231 			    } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
6232 				i_ungetc(SP,f);
6233 				continue;
6234 			    } else {
6235 				i_ungetc(c1,f);
6236 			    }
6237 			    i_ungetc(LF,f);
6238 			} else {
6239 			    i_ungetc(c1,f);
6240 			}
6241 			c1 = CR;
6242 			SEND;
6243 		    }
6244 		}
6245 	    } else
6246 		SEND;
6247 	}
6248 	/* send: */
6249 	switch(input_mode){
6250 	case ASCII:
6251 	    switch ((*iconv)(c2, c1, 0)) {  /* can be EUC / SJIS / UTF-8 */
6252 	    case -2:
6253 		/* 4 bytes UTF-8 */
6254 		if ((c3 = (*i_getc)(f)) != EOF) {
6255 		    code_status(c3);
6256 		    c3 <<= 8;
6257 		    if ((c4 = (*i_getc)(f)) != EOF) {
6258 			code_status(c4);
6259 			(*iconv)(c2, c1, c3|c4);
6260 		    }
6261 		}
6262 		break;
6263 	    case -3:
6264 		/* 4 bytes UTF-8 (check combining character) */
6265 		if ((c3 = (*i_getc)(f)) != EOF) {
6266 		    if ((c4 = (*i_getc)(f)) != EOF) {
6267 			if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6268 			    (*i_ungetc)(c4, f);
6269 			    (*i_ungetc)(c3, f);
6270 			    w_iconv_nocombine(c2, c1, 0);
6271 			}
6272 		    } else {
6273 			(*i_ungetc)(c3, f);
6274 			w_iconv_nocombine(c2, c1, 0);
6275 		    }
6276 		} else {
6277 		    w_iconv_nocombine(c2, c1, 0);
6278 		}
6279 		break;
6280 	    case -1:
6281 		/* 3 bytes EUC or UTF-8 */
6282 		if ((c3 = (*i_getc)(f)) != EOF) {
6283 		    code_status(c3);
6284 		    if ((*iconv)(c2, c1, c3) == -3) {
6285 			/* 6 bytes UTF-8 (check combining character) */
6286 			nkf_char c5, c6;
6287 			if ((c4 = (*i_getc)(f)) != EOF) {
6288 			    if ((c5 = (*i_getc)(f)) != EOF) {
6289 				if ((c6 = (*i_getc)(f)) != EOF) {
6290 				    if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6291 					(*i_ungetc)(c6, f);
6292 					(*i_ungetc)(c5, f);
6293 					(*i_ungetc)(c4, f);
6294 					w_iconv_nocombine(c2, c1, c3);
6295 				    }
6296 				} else {
6297 				    (*i_ungetc)(c5, f);
6298 				    (*i_ungetc)(c4, f);
6299 				    w_iconv_nocombine(c2, c1, c3);
6300 				}
6301 			    } else {
6302 				(*i_ungetc)(c4, f);
6303 				w_iconv_nocombine(c2, c1, c3);
6304 			    }
6305 			} else {
6306 			    w_iconv_nocombine(c2, c1, c3);
6307 			}
6308 		    }
6309 		}
6310 		break;
6311 	    }
6312 	    break;
6313 	case JIS_X_0208:
6314 	case JIS_X_0213_1:
6315 	    if (ms_ucs_map_f &&
6316 		0x7F <= c2 && c2 <= 0x92 &&
6317 		0x21 <= c1 && c1 <= 0x7E) {
6318 		/* CP932 UDC */
6319 		c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
6320 		c2 = 0;
6321 	    }
6322 	    (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
6323 	    break;
6324 #ifdef X0212_ENABLE
6325 	case JIS_X_0212:
6326 	    (*oconv)(PREFIX_EUCG3 | c2, c1);
6327 	    break;
6328 #endif /* X0212_ENABLE */
6329 	case JIS_X_0213_2:
6330 	    (*oconv)(PREFIX_EUCG3 | c2, c1);
6331 	    break;
6332 	default:
6333 	    (*oconv)(input_mode, c1);  /* other special case */
6334 	}
6335 
6336 	c2 = 0;
6337 	c3 = 0;
6338 	continue;
6339 	/* goto next_word */
6340     }
6341 
6342 finished:
6343     /* epilogue */
6344     (*iconv)(EOF, 0, 0);
6345     if (!input_codename)
6346     {
6347 	if (is_8bit) {
6348 	    struct input_code *p = input_code_list;
6349 	    struct input_code *result = p;
6350 	    while (p->name){
6351 		if (p->score < result->score) result = p;
6352 		++p;
6353 	    }
6354 	    set_input_codename(result->name);
6355 #ifdef CHECK_OPTION
6356 	    debug(result->name);
6357 #endif
6358 	}
6359     }
6360     return 0;
6361 }
6362 
6363 /*
6364  * int options(unsigned char *cp)
6365  *
6366  * return values:
6367  *    0: success
6368  *   -1: ArgumentError
6369  */
6370 static int
options(unsigned char * cp)6371 options(unsigned char *cp)
6372 {
6373     nkf_char i, j;
6374     unsigned char *p;
6375     unsigned char *cp_back = NULL;
6376     nkf_encoding *enc;
6377 
6378     if (option_mode==1)
6379 	return 0;
6380     while(*cp && *cp++!='-');
6381     while (*cp || cp_back) {
6382 	if(!*cp){
6383 	    cp = cp_back;
6384 	    cp_back = NULL;
6385 	    continue;
6386 	}
6387 	p = 0;
6388 	switch (*cp++) {
6389 	case '-':  /* literal options */
6390 	    if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
6391 		option_mode = 1;
6392 		return 0;
6393 	    }
6394 	    for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
6395 		p = (unsigned char *)long_option[i].name;
6396 		for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
6397 		if (*p == cp[j] || cp[j] == SP){
6398 		    p = &cp[j] + 1;
6399 		    break;
6400 		}
6401 		p = 0;
6402 	    }
6403 	    if (p == 0) {
6404 #if !defined(PERL_XS) && !defined(WIN32DLL)
6405 		fprintf(stderr, "unknown long option: --%s\n", cp);
6406 #endif
6407 		return -1;
6408 	    }
6409 	    while(*cp && *cp != SP && cp++);
6410 	    if (long_option[i].alias[0]){
6411 		cp_back = cp;
6412 		cp = (unsigned char *)long_option[i].alias;
6413 	    }else{
6414 #ifndef PERL_XS
6415 		if (strcmp(long_option[i].name, "help") == 0){
6416 		    usage();
6417 		    exit(EXIT_SUCCESS);
6418 		}
6419 #endif
6420 		if (strcmp(long_option[i].name, "ic=") == 0){
6421 		    enc = nkf_enc_find((char *)p);
6422 		    if (!enc) continue;
6423 		    input_encoding = enc;
6424 		    continue;
6425 		}
6426 		if (strcmp(long_option[i].name, "oc=") == 0){
6427 		    enc = nkf_enc_find((char *)p);
6428 		    /* if (enc <= 0) continue; */
6429 		    if (!enc) continue;
6430 		    output_encoding = enc;
6431 		    continue;
6432 		}
6433 		if (strcmp(long_option[i].name, "guess=") == 0){
6434 		    if (p[0] == '0' || p[0] == '1') {
6435 			guess_f = 1;
6436 		    } else {
6437 			guess_f = 2;
6438 		    }
6439 		    continue;
6440 		}
6441 #ifdef OVERWRITE
6442 		if (strcmp(long_option[i].name, "overwrite") == 0){
6443 		    file_out_f = TRUE;
6444 		    overwrite_f = TRUE;
6445 		    preserve_time_f = TRUE;
6446 		    continue;
6447 		}
6448 		if (strcmp(long_option[i].name, "overwrite=") == 0){
6449 		    file_out_f = TRUE;
6450 		    overwrite_f = TRUE;
6451 		    preserve_time_f = TRUE;
6452 		    backup_f = TRUE;
6453 		    backup_suffix = (char *)p;
6454 		    continue;
6455 		}
6456 		if (strcmp(long_option[i].name, "in-place") == 0){
6457 		    file_out_f = TRUE;
6458 		    overwrite_f = TRUE;
6459 		    preserve_time_f = FALSE;
6460 		    continue;
6461 		}
6462 		if (strcmp(long_option[i].name, "in-place=") == 0){
6463 		    file_out_f = TRUE;
6464 		    overwrite_f = TRUE;
6465 		    preserve_time_f = FALSE;
6466 		    backup_f = TRUE;
6467 		    backup_suffix = (char *)p;
6468 		    continue;
6469 		}
6470 #endif
6471 #ifdef INPUT_OPTION
6472 		if (strcmp(long_option[i].name, "cap-input") == 0){
6473 		    cap_f = TRUE;
6474 		    continue;
6475 		}
6476 		if (strcmp(long_option[i].name, "url-input") == 0){
6477 		    url_f = TRUE;
6478 		    continue;
6479 		}
6480 #endif
6481 #ifdef NUMCHAR_OPTION
6482 		if (strcmp(long_option[i].name, "numchar-input") == 0){
6483 		    numchar_f = TRUE;
6484 		    continue;
6485 		}
6486 #endif
6487 #ifdef CHECK_OPTION
6488 		if (strcmp(long_option[i].name, "no-output") == 0){
6489 		    noout_f = TRUE;
6490 		    continue;
6491 		}
6492 		if (strcmp(long_option[i].name, "debug") == 0){
6493 		    debug_f = TRUE;
6494 		    continue;
6495 		}
6496 #endif
6497 		if (strcmp(long_option[i].name, "cp932") == 0){
6498 #ifdef SHIFTJIS_CP932
6499 		    cp51932_f = TRUE;
6500 		    cp932inv_f = -TRUE;
6501 #endif
6502 #ifdef UTF8_OUTPUT_ENABLE
6503 		    ms_ucs_map_f = UCS_MAP_CP932;
6504 #endif
6505 		    continue;
6506 		}
6507 		if (strcmp(long_option[i].name, "no-cp932") == 0){
6508 #ifdef SHIFTJIS_CP932
6509 		    cp51932_f = FALSE;
6510 		    cp932inv_f = FALSE;
6511 #endif
6512 #ifdef UTF8_OUTPUT_ENABLE
6513 		    ms_ucs_map_f = UCS_MAP_ASCII;
6514 #endif
6515 		    continue;
6516 		}
6517 #ifdef SHIFTJIS_CP932
6518 		if (strcmp(long_option[i].name, "cp932inv") == 0){
6519 		    cp932inv_f = -TRUE;
6520 		    continue;
6521 		}
6522 #endif
6523 
6524 #ifdef X0212_ENABLE
6525 		if (strcmp(long_option[i].name, "x0212") == 0){
6526 		    x0212_f = TRUE;
6527 		    continue;
6528 		}
6529 #endif
6530 
6531 #ifdef EXEC_IO
6532 		if (strcmp(long_option[i].name, "exec-in") == 0){
6533 		    exec_f = 1;
6534 		    return 0;
6535 		}
6536 		if (strcmp(long_option[i].name, "exec-out") == 0){
6537 		    exec_f = -1;
6538 		    return 0;
6539 		}
6540 #endif
6541 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6542 		if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6543 		    no_cp932ext_f = TRUE;
6544 		    continue;
6545 		}
6546 		if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6547 		    no_best_fit_chars_f = TRUE;
6548 		    continue;
6549 		}
6550 		if (strcmp(long_option[i].name, "fb-skip") == 0){
6551 		    encode_fallback = NULL;
6552 		    continue;
6553 		}
6554 		if (strcmp(long_option[i].name, "fb-html") == 0){
6555 		    encode_fallback = encode_fallback_html;
6556 		    continue;
6557 		}
6558 		if (strcmp(long_option[i].name, "fb-xml") == 0){
6559 		    encode_fallback = encode_fallback_xml;
6560 		    continue;
6561 		}
6562 		if (strcmp(long_option[i].name, "fb-java") == 0){
6563 		    encode_fallback = encode_fallback_java;
6564 		    continue;
6565 		}
6566 		if (strcmp(long_option[i].name, "fb-perl") == 0){
6567 		    encode_fallback = encode_fallback_perl;
6568 		    continue;
6569 		}
6570 		if (strcmp(long_option[i].name, "fb-subchar") == 0){
6571 		    encode_fallback = encode_fallback_subchar;
6572 		    continue;
6573 		}
6574 		if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6575 		    encode_fallback = encode_fallback_subchar;
6576 		    unicode_subchar = 0;
6577 		    if (p[0] != '0'){
6578 			/* decimal number */
6579 			for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6580 			    unicode_subchar *= 10;
6581 			    unicode_subchar += hex2bin(p[i]);
6582 			}
6583 		    }else if(p[1] == 'x' || p[1] == 'X'){
6584 			/* hexadecimal number */
6585 			for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6586 			    unicode_subchar <<= 4;
6587 			    unicode_subchar |= hex2bin(p[i]);
6588 			}
6589 		    }else{
6590 			/* octal number */
6591 			for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6592 			    unicode_subchar *= 8;
6593 			    unicode_subchar += hex2bin(p[i]);
6594 			}
6595 		    }
6596 		    w16e_conv(unicode_subchar, &i, &j);
6597 		    unicode_subchar = i<<8 | j;
6598 		    continue;
6599 		}
6600 #endif
6601 #ifdef UTF8_OUTPUT_ENABLE
6602 		if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6603 		    ms_ucs_map_f = UCS_MAP_MS;
6604 		    continue;
6605 		}
6606 #endif
6607 #ifdef UNICODE_NORMALIZATION
6608 		if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6609 		    nfc_f = TRUE;
6610 		    continue;
6611 		}
6612 #endif
6613 		if (strcmp(long_option[i].name, "prefix=") == 0){
6614 		    if (nkf_isgraph(p[0])){
6615 			for (i = 1; nkf_isgraph(p[i]); i++){
6616 			    prefix_table[p[i]] = p[0];
6617 			}
6618 		    }
6619 		    continue;
6620 		}
6621 #if !defined(PERL_XS) && !defined(WIN32DLL)
6622 		fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6623 #endif
6624 		return -1;
6625 	    }
6626 	    continue;
6627 	case 'b':           /* buffered mode */
6628 	    unbuf_f = FALSE;
6629 	    continue;
6630 	case 'u':           /* non bufferd mode */
6631 	    unbuf_f = TRUE;
6632 	    continue;
6633 	case 't':           /* transparent mode */
6634 	    if (*cp=='1') {
6635 		/* alias of -t */
6636 		cp++;
6637 		nop_f = TRUE;
6638 	    } else if (*cp=='2') {
6639 		/*
6640 		 * -t with put/get
6641 		 *
6642 		 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6643 		 *
6644 		 */
6645 		cp++;
6646 		nop_f = 2;
6647 	    } else
6648 		nop_f = TRUE;
6649 	    continue;
6650 	case 'j':           /* JIS output */
6651 	case 'n':
6652 	    output_encoding = nkf_enc_from_index(ISO_2022_JP);
6653 	    continue;
6654 	case 'e':           /* AT&T EUC output */
6655 	    output_encoding = nkf_enc_from_index(EUCJP_NKF);
6656 	    continue;
6657 	case 's':           /* SJIS output */
6658 	    output_encoding = nkf_enc_from_index(SHIFT_JIS);
6659 	    continue;
6660 	case 'l':           /* ISO8859 Latin-1 support, no conversion */
6661 	    iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
6662 	    input_encoding = nkf_enc_from_index(ISO_8859_1);
6663 	    continue;
6664 	case 'i':           /* Kanji IN ESC-$-@/B */
6665 	    if (*cp=='@'||*cp=='B')
6666 		kanji_intro = *cp++;
6667 	    continue;
6668 	case 'o':           /* ASCII IN ESC-(-J/B/H */
6669 	    /* ESC ( H was used in initial JUNET messages */
6670 	    if (*cp=='J'||*cp=='B'||*cp=='H')
6671 		ascii_intro = *cp++;
6672 	    continue;
6673 	case 'h':
6674 	    /*
6675 	       bit:1   katakana->hiragana
6676 	       bit:2   hiragana->katakana
6677 	     */
6678 	    if ('9'>= *cp && *cp>='0')
6679 		hira_f |= (*cp++ -'0');
6680 	    else
6681 		hira_f |= 1;
6682 	    continue;
6683 	case 'r':
6684 	    rot_f = TRUE;
6685 	    continue;
6686 #if defined(MSDOS) || defined(__OS2__)
6687 	case 'T':
6688 	    binmode_f = FALSE;
6689 	    continue;
6690 #endif
6691 #ifndef PERL_XS
6692 	case 'V':
6693 	    show_configuration();
6694 	    exit(EXIT_SUCCESS);
6695 	    break;
6696 	case 'v':
6697 	    version();
6698 	    exit(EXIT_SUCCESS);
6699 	    break;
6700 #endif
6701 #ifdef UTF8_OUTPUT_ENABLE
6702 	case 'w':           /* UTF-{8,16,32} output */
6703 	    if (cp[0] == '8') {
6704 		cp++;
6705 		if (cp[0] == '0'){
6706 		    cp++;
6707 		    output_encoding = nkf_enc_from_index(UTF_8N);
6708 		} else {
6709 		    output_bom_f = TRUE;
6710 		    output_encoding = nkf_enc_from_index(UTF_8_BOM);
6711 		}
6712 	    } else {
6713 		int enc_idx;
6714 		if ('1'== cp[0] && '6'==cp[1]) {
6715 		    cp += 2;
6716 		    enc_idx = UTF_16;
6717 		} else if ('3'== cp[0] && '2'==cp[1]) {
6718 		    cp += 2;
6719 		    enc_idx = UTF_32;
6720 		} else {
6721 		    output_encoding = nkf_enc_from_index(UTF_8);
6722 		    continue;
6723 		}
6724 		if (cp[0]=='L') {
6725 		    cp++;
6726 		    output_endian = ENDIAN_LITTLE;
6727 		    output_bom_f = TRUE;
6728 		} else if (cp[0] == 'B') {
6729 		    cp++;
6730 		    output_bom_f = TRUE;
6731 		}
6732 		if (cp[0] == '0'){
6733 		    output_bom_f = FALSE;
6734 		    cp++;
6735 		    enc_idx = enc_idx == UTF_16
6736 			? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6737 			: (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6738 		} else {
6739 		    enc_idx = enc_idx == UTF_16
6740 			? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6741 			: (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6742 		}
6743 		output_encoding = nkf_enc_from_index(enc_idx);
6744 	    }
6745 	    continue;
6746 #endif
6747 #ifdef UTF8_INPUT_ENABLE
6748 	case 'W':           /* UTF input */
6749 	    if (cp[0] == '8') {
6750 		cp++;
6751 		input_encoding = nkf_enc_from_index(UTF_8);
6752 	    }else{
6753 		int enc_idx;
6754 		if ('1'== cp[0] && '6'==cp[1]) {
6755 		    cp += 2;
6756 		    input_endian = ENDIAN_BIG;
6757 		    enc_idx = UTF_16;
6758 		} else if ('3'== cp[0] && '2'==cp[1]) {
6759 		    cp += 2;
6760 		    input_endian = ENDIAN_BIG;
6761 		    enc_idx = UTF_32;
6762 		} else {
6763 		    input_encoding = nkf_enc_from_index(UTF_8);
6764 		    continue;
6765 		}
6766 		if (cp[0]=='L') {
6767 		    cp++;
6768 		    input_endian = ENDIAN_LITTLE;
6769 		} else if (cp[0] == 'B') {
6770 		    cp++;
6771 		    input_endian = ENDIAN_BIG;
6772 		}
6773 		enc_idx = (enc_idx == UTF_16
6774 		    ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6775 		    : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6776 		input_encoding = nkf_enc_from_index(enc_idx);
6777 	    }
6778 	    continue;
6779 #endif
6780 	    /* Input code assumption */
6781 	case 'J':   /* ISO-2022-JP input */
6782 	    input_encoding = nkf_enc_from_index(ISO_2022_JP);
6783 	    continue;
6784 	case 'E':   /* EUC-JP input */
6785 	    input_encoding = nkf_enc_from_index(EUCJP_NKF);
6786 	    continue;
6787 	case 'S':   /* Shift_JIS input */
6788 	    input_encoding = nkf_enc_from_index(SHIFT_JIS);
6789 	    continue;
6790 	case 'Z':   /* Convert X0208 alphabet to asii */
6791 	    /* alpha_f
6792 	       bit:0   Convert JIS X 0208 Alphabet to ASCII
6793 	       bit:1   Convert Kankaku to one space
6794 	       bit:2   Convert Kankaku to two spaces
6795 	       bit:3   Convert HTML Entity
6796 	       bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6797 	     */
6798 	    while ('0'<= *cp && *cp <='4') {
6799 		alpha_f |= 1 << (*cp++ - '0');
6800 	    }
6801 	    alpha_f |= 1;
6802 	    continue;
6803 	case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
6804 	    x0201_f = FALSE;    /* No X0201->X0208 conversion */
6805 	    /* accept  X0201
6806 	       ESC-(-I     in JIS, EUC, MS Kanji
6807 	       SI/SO       in JIS, EUC, MS Kanji
6808 	       SS2         in EUC, JIS, not in MS Kanji
6809 	       MS Kanji (0xa0-0xdf)
6810 	       output  X0201
6811 	       ESC-(-I     in JIS (0x20-0x5f)
6812 	       SS2         in EUC (0xa0-0xdf)
6813 	       0xa0-0xd    in MS Kanji (0xa0-0xdf)
6814 	     */
6815 	    continue;
6816 	case 'X':   /* Convert X0201 kana to X0208 */
6817 	    x0201_f = TRUE;
6818 	    continue;
6819 	case 'F':   /* prserve new lines */
6820 	    fold_preserve_f = TRUE;
6821 	case 'f':   /* folding -f60 or -f */
6822 	    fold_f = TRUE;
6823 	    fold_len = 0;
6824 	    while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6825 		fold_len *= 10;
6826 		fold_len += *cp++ - '0';
6827 	    }
6828 	    if (!(0<fold_len && fold_len<BUFSIZ))
6829 		fold_len = DEFAULT_FOLD;
6830 	    if (*cp=='-') {
6831 		fold_margin = 0;
6832 		cp++;
6833 		while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6834 		    fold_margin *= 10;
6835 		    fold_margin += *cp++ - '0';
6836 		}
6837 	    }
6838 	    continue;
6839 	case 'm':   /* MIME support */
6840 	    /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6841 	    if (*cp=='B'||*cp=='Q') {
6842 		mime_decode_mode = *cp++;
6843 		mimebuf_f = FIXED_MIME;
6844 	    } else if (*cp=='N') {
6845 		mime_f = TRUE; cp++;
6846 	    } else if (*cp=='S') {
6847 		mime_f = STRICT_MIME; cp++;
6848 	    } else if (*cp=='0') {
6849 		mime_decode_f = FALSE;
6850 		mime_f = FALSE; cp++;
6851 	    } else {
6852 		mime_f = STRICT_MIME;
6853 	    }
6854 	    continue;
6855 	case 'M':   /* MIME output */
6856 	    if (*cp=='B') {
6857 		mimeout_mode = 'B';
6858 		mimeout_f = FIXED_MIME; cp++;
6859 	    } else if (*cp=='Q') {
6860 		mimeout_mode = 'Q';
6861 		mimeout_f = FIXED_MIME; cp++;
6862 	    } else {
6863 		mimeout_f = TRUE;
6864 	    }
6865 	    continue;
6866 	case 'B':   /* Broken JIS support */
6867 	    /*  bit:0   no ESC JIS
6868 	       bit:1   allow any x on ESC-(-x or ESC-$-x
6869 	       bit:2   reset to ascii on NL
6870 	     */
6871 	    if ('9'>= *cp && *cp>='0')
6872 		broken_f |= 1<<(*cp++ -'0');
6873 	    else
6874 		broken_f |= TRUE;
6875 	    continue;
6876 #ifndef PERL_XS
6877 	case 'O':/* for Output file */
6878 	    file_out_f = TRUE;
6879 	    continue;
6880 #endif
6881 	case 'c':/* add cr code */
6882 	    eolmode_f = CRLF;
6883 	    continue;
6884 	case 'd':/* delete cr code */
6885 	    eolmode_f = LF;
6886 	    continue;
6887 	case 'I':   /* ISO-2022-JP output */
6888 	    iso2022jp_f = TRUE;
6889 	    continue;
6890 	case 'L':  /* line mode */
6891 	    if (*cp=='u') {         /* unix */
6892 		eolmode_f = LF; cp++;
6893 	    } else if (*cp=='m') { /* mac */
6894 		eolmode_f = CR; cp++;
6895 	    } else if (*cp=='w') { /* windows */
6896 		eolmode_f = CRLF; cp++;
6897 	    } else if (*cp=='0') { /* no conversion  */
6898 		eolmode_f = 0; cp++;
6899 	    }
6900 	    continue;
6901 #ifndef PERL_XS
6902 	case 'g':
6903 	    if ('2' <= *cp && *cp <= '9') {
6904 		guess_f = 2;
6905 		cp++;
6906 	    } else if (*cp == '0' || *cp == '1') {
6907 		guess_f = 1;
6908 		cp++;
6909 	    } else {
6910 		guess_f = 1;
6911 	    }
6912 	    continue;
6913 #endif
6914 	case SP:
6915 	    /* module multiple options in a string are allowed for Perl module  */
6916 	    while(*cp && *cp++!='-');
6917 	    continue;
6918 	default:
6919 #if !defined(PERL_XS) && !defined(WIN32DLL)
6920 	    fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6921 #endif
6922 	    /* bogus option but ignored */
6923 	    return -1;
6924 	}
6925     }
6926     return 0;
6927 }
6928 
6929 #ifdef WIN32DLL
6930 #include "nkf32dll.c"
6931 #elif defined(PERL_XS)
6932 #else /* WIN32DLL */
6933 int
main(int argc,char ** argv)6934 main(int argc, char **argv)
6935 {
6936     FILE  *fin;
6937     unsigned char  *cp;
6938 
6939     char *outfname = NULL;
6940     char *origfname;
6941 
6942 #ifdef EASYWIN /*Easy Win */
6943     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6944 #endif
6945 #ifdef DEFAULT_CODE_LOCALE
6946     setlocale(LC_CTYPE, "");
6947 #endif
6948     nkf_state_init();
6949 
6950     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6951 	cp = (unsigned char *)*argv;
6952 	options(cp);
6953 #ifdef EXEC_IO
6954 	if (exec_f){
6955 	    int fds[2], pid;
6956 	    if (pipe(fds) < 0 || (pid = fork()) < 0){
6957 		abort();
6958 	    }
6959 	    if (pid == 0){
6960 		if (exec_f > 0){
6961 		    close(fds[0]);
6962 		    dup2(fds[1], 1);
6963 		}else{
6964 		    close(fds[1]);
6965 		    dup2(fds[0], 0);
6966 		}
6967 		execvp(argv[1], &argv[1]);
6968 	    }
6969 	    if (exec_f > 0){
6970 		close(fds[1]);
6971 		dup2(fds[0], 0);
6972 	    }else{
6973 		close(fds[0]);
6974 		dup2(fds[1], 1);
6975 	    }
6976 	    argc = 0;
6977 	    break;
6978 	}
6979 #endif
6980     }
6981 
6982     if (guess_f) {
6983 #ifdef CHECK_OPTION
6984 	int debug_f_back = debug_f;
6985 #endif
6986 #ifdef EXEC_IO
6987 	int exec_f_back = exec_f;
6988 #endif
6989 #ifdef X0212_ENABLE
6990 	int x0212_f_back = x0212_f;
6991 #endif
6992 	int x0213_f_back = x0213_f;
6993 	int guess_f_back = guess_f;
6994 	reinit();
6995 	guess_f = guess_f_back;
6996 	mime_f = FALSE;
6997 #ifdef CHECK_OPTION
6998 	debug_f = debug_f_back;
6999 #endif
7000 #ifdef EXEC_IO
7001 	exec_f = exec_f_back;
7002 #endif
7003 	x0212_f = x0212_f_back;
7004 	x0213_f = x0213_f_back;
7005     }
7006 
7007     if (binmode_f == TRUE)
7008 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7009 	if (freopen("","wb",stdout) == NULL)
7010 	    return (-1);
7011 #else
7012     setbinmode(stdout);
7013 #endif
7014 
7015     if (unbuf_f)
7016 	setbuf(stdout, (char *) NULL);
7017     else
7018 	setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
7019 
7020     if (argc == 0) {
7021 	if (binmode_f == TRUE)
7022 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7023 	    if (freopen("","rb",stdin) == NULL) return (-1);
7024 #else
7025 	setbinmode(stdin);
7026 #endif
7027 	setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
7028 	if (nop_f)
7029 	    noconvert(stdin);
7030 	else {
7031 	    kanji_convert(stdin);
7032 	    if (guess_f) print_guessed_code(NULL);
7033 	}
7034     } else {
7035 	int nfiles = argc;
7036 	int is_argument_error = FALSE;
7037 	while (argc--) {
7038 	    input_codename = NULL;
7039 	    input_eol = 0;
7040 #ifdef CHECK_OPTION
7041 	    iconv_for_check = 0;
7042 #endif
7043 	    if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
7044 		perror(*(argv-1));
7045 		is_argument_error = TRUE;
7046 		continue;
7047 	    } else {
7048 #ifdef OVERWRITE
7049 		int fd = 0;
7050 		int fd_backup = 0;
7051 #endif
7052 
7053 		/* reopen file for stdout */
7054 		if (file_out_f == TRUE) {
7055 #ifdef OVERWRITE
7056 		    if (overwrite_f){
7057 			outfname = nkf_xmalloc(strlen(origfname)
7058 					  + strlen(".nkftmpXXXXXX")
7059 					  + 1);
7060 			strcpy(outfname, origfname);
7061 #ifdef MSDOS
7062 			{
7063 			    int i;
7064 			    for (i = strlen(outfname); i; --i){
7065 				if (outfname[i - 1] == '/'
7066 				    || outfname[i - 1] == '\\'){
7067 				    break;
7068 				}
7069 			    }
7070 			    outfname[i] = '\0';
7071 			}
7072 			strcat(outfname, "ntXXXXXX");
7073 			mktemp(outfname);
7074 			fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7075 				  S_IREAD | S_IWRITE);
7076 #else
7077 			strcat(outfname, ".nkftmpXXXXXX");
7078 			fd = mkstemp(outfname);
7079 #endif
7080 			if (fd < 0
7081 			    || (fd_backup = dup(fileno(stdout))) < 0
7082 			    || dup2(fd, fileno(stdout)) < 0
7083 			   ){
7084 			    perror(origfname);
7085 			    return -1;
7086 			}
7087 		    }else
7088 #endif
7089 		    if(argc == 1) {
7090 			outfname = *argv++;
7091 			argc--;
7092 		    } else {
7093 			outfname = "nkf.out";
7094 		    }
7095 
7096 		    if(freopen(outfname, "w", stdout) == NULL) {
7097 			perror (outfname);
7098 			return (-1);
7099 		    }
7100 		    if (binmode_f == TRUE) {
7101 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7102 			if (freopen("","wb",stdout) == NULL)
7103 			    return (-1);
7104 #else
7105 			setbinmode(stdout);
7106 #endif
7107 		    }
7108 		}
7109 		if (binmode_f == TRUE)
7110 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7111 		    if (freopen("","rb",fin) == NULL)
7112 			return (-1);
7113 #else
7114 		setbinmode(fin);
7115 #endif
7116 		setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
7117 		if (nop_f)
7118 		    noconvert(fin);
7119 		else {
7120 		    char *filename = NULL;
7121 		    kanji_convert(fin);
7122 		    if (nfiles > 1) filename = origfname;
7123 		    if (guess_f) print_guessed_code(filename);
7124 		}
7125 		fclose(fin);
7126 #ifdef OVERWRITE
7127 		if (overwrite_f) {
7128 		    struct stat     sb;
7129 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7130 		    time_t tb[2];
7131 #else
7132 		    struct utimbuf  tb;
7133 #endif
7134 
7135 		    fflush(stdout);
7136 		    close(fd);
7137 		    if (dup2(fd_backup, fileno(stdout)) < 0){
7138 			perror("dup2");
7139 		    }
7140 		    if (stat(origfname, &sb)) {
7141 			fprintf(stderr, "Can't stat %s\n", origfname);
7142 		    }
7143 		    /* $B%Q!<%_%C%7%g%s$rI|85(B */
7144 		    if (chmod(outfname, sb.st_mode)) {
7145 			fprintf(stderr, "Can't set permission %s\n", outfname);
7146 		    }
7147 
7148 		    /* $B%?%$%`%9%?%s%W$rI|85(B */
7149 		    if(preserve_time_f){
7150 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7151 			tb[0] = tb[1] = sb.st_mtime;
7152 			if (utime(outfname, tb)) {
7153 			    fprintf(stderr, "Can't set timestamp %s\n", outfname);
7154 			}
7155 #else
7156 			tb.actime  = sb.st_atime;
7157 			tb.modtime = sb.st_mtime;
7158 			if (utime(outfname, &tb)) {
7159 			    fprintf(stderr, "Can't set timestamp %s\n", outfname);
7160 			}
7161 #endif
7162 		    }
7163 		    if(backup_f){
7164 			char *backup_filename = get_backup_filename(backup_suffix, origfname);
7165 #ifdef MSDOS
7166 			unlink(backup_filename);
7167 #endif
7168 			if (rename(origfname, backup_filename)) {
7169 			    perror(backup_filename);
7170 			    fprintf(stderr, "Can't rename %s to %s\n",
7171 				    origfname, backup_filename);
7172 			}
7173 			nkf_xfree(backup_filename);
7174 		    }else{
7175 #ifdef MSDOS
7176 			if (unlink(origfname)){
7177 			    perror(origfname);
7178 			}
7179 #endif
7180 		    }
7181 		    if (rename(outfname, origfname)) {
7182 			perror(origfname);
7183 			fprintf(stderr, "Can't rename %s to %s\n",
7184 				outfname, origfname);
7185 		    }
7186 		    nkf_xfree(outfname);
7187 		}
7188 #endif
7189 	    }
7190 	}
7191 	if (is_argument_error)
7192 	    return(-1);
7193     }
7194 #ifdef EASYWIN /*Easy Win */
7195     if (file_out_f == FALSE)
7196 	scanf("%d",&end_check);
7197     else
7198 	fclose(stdout);
7199 #else /* for Other OS */
7200     if (file_out_f == TRUE)
7201 	fclose(stdout);
7202 #endif /*Easy Win */
7203     return (0);
7204 }
7205 #endif /* WIN32DLL */
7206