Ruby  2.5.0dev(2017-10-22revision60238)
nkf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2013, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.4"
24 #define NKF_RELEASE_DATE "2015-12-12"
25 #define COPY_RIGHT \
26  "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27  "Copyright (C) 1996-2015, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46  c2 0 means ASCII
47  JIS_X_0201_1976_K
48  ISO_8859_1
49  JIS_X_0208
50  EOF all termination
51  c1 32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define FIXED_MIME 7
58 #define STRICT_MIME 8
59 
60 /* byte order */
61 enum byte_order {
66 };
67 
68 /* ASCII CODE */
69 
70 #define BS 0x08
71 #define TAB 0x09
72 #define LF 0x0a
73 #define CR 0x0d
74 #define ESC 0x1b
75 #define SP 0x20
76 #define DEL 0x7f
77 #define SI 0x0f
78 #define SO 0x0e
79 #define SS2 0x8e
80 #define SS3 0x8f
81 #define CRLF 0x0D0A
82 
83 
84 /* encodings */
85 
124  JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125  /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126  /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127  /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128  JIS_X_0208 = 0x1168, /* @B */
129  JIS_X_0212 = 0x1159, /* D */
130  /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131  JIS_X_0213_2 = 0x1229, /* P */
132  JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148  const char *name;
149  nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150  void (*oconv)(nkf_char c2, nkf_char c1);
152 
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
160 
161 typedef struct {
162  const int id;
163  const char *name;
165 } nkf_encoding;
166 
168  {ASCII, "US-ASCII", &NkfEncodingASCII},
169  {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170  {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171  {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172  {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173  {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174  {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175  {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176  {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177  {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178  {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179  {CP10001, "CP10001", &NkfEncodingShift_JIS},
180  {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181  {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182  {CP51932, "CP51932", &NkfEncodingEUC_JP},
183  {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184  {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185  {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186  {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187  {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188  {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189  {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190  {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191  {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192  {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193  {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194  {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195  {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196  {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197  {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198  {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199  {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200  {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201  {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202  {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203  {BINARY, "BINARY", &NkfEncodingASCII},
204  {-1, NULL, NULL}
205 };
206 
207 struct {
208  const char *name;
209  const int id;
211  {"US-ASCII", ASCII},
212  {"ASCII", ASCII},
213  {"646", ASCII},
214  {"ROMAN8", ASCII},
215  {"ISO-2022-JP", ISO_2022_JP},
216  {"ISO2022JP-CP932", CP50220},
217  {"CP50220", CP50220},
218  {"CP50221", CP50221},
219  {"CSISO2022JP", CP50221},
220  {"CP50222", CP50222},
221  {"ISO-2022-JP-1", ISO_2022_JP_1},
222  {"ISO-2022-JP-3", ISO_2022_JP_3},
223  {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224  {"SHIFT_JIS", SHIFT_JIS},
225  {"SJIS", SHIFT_JIS},
226  {"MS_Kanji", SHIFT_JIS},
227  {"PCK", SHIFT_JIS},
228  {"WINDOWS-31J", WINDOWS_31J},
229  {"CSWINDOWS31J", WINDOWS_31J},
230  {"CP932", WINDOWS_31J},
231  {"MS932", WINDOWS_31J},
232  {"CP10001", CP10001},
233  {"EUCJP", EUC_JP},
234  {"EUC-JP", EUC_JP},
235  {"EUCJP-NKF", EUCJP_NKF},
236  {"CP51932", CP51932},
237  {"EUC-JP-MS", EUCJP_MS},
238  {"EUCJP-MS", EUCJP_MS},
239  {"EUCJPMS", EUCJP_MS},
240  {"EUC-JP-ASCII", EUCJP_ASCII},
241  {"EUCJP-ASCII", EUCJP_ASCII},
242  {"SHIFT_JISX0213", SHIFT_JISX0213},
243  {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244  {"EUC-JISX0213", EUC_JISX0213},
245  {"EUC-JIS-2004", EUC_JIS_2004},
246  {"UTF-8", UTF_8},
247  {"UTF-8N", UTF_8N},
248  {"UTF-8-BOM", UTF_8_BOM},
249  {"UTF8-MAC", UTF8_MAC},
250  {"UTF-8-MAC", UTF8_MAC},
251  {"UTF-16", UTF_16},
252  {"UTF-16BE", UTF_16BE},
253  {"UTF-16BE-BOM", UTF_16BE_BOM},
254  {"UTF-16LE", UTF_16LE},
255  {"UTF-16LE-BOM", UTF_16LE_BOM},
256  {"UTF-32", UTF_32},
257  {"UTF-32BE", UTF_32BE},
258  {"UTF-32BE-BOM", UTF_32BE_BOM},
259  {"UTF-32LE", UTF_32LE},
260  {"UTF-32LE-BOM", UTF_32LE_BOM},
261  {"BINARY", BINARY},
262  {NULL, -1}
263 };
264 
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
275 #endif
276 
277 
278 #define is_alnum(c) \
279  (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293  ('A'<=c&&c<='F') ? (c-'A'+10) : \
294  ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298  ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299  && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
307 #else
308 #define IOBUF_SIZE 16384
309 #endif
310 
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
313 
314 
315 #define GETA1 0x22
316 #define GETA2 0x2e
317 
318 
319 /* MIME preprocessor */
320 
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
323 #endif
324 
325 struct input_code{
326  const char *name;
331  void (*status_func)(struct input_code *, nkf_char);
334 };
335 
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
339 
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 /* UCS Mapping
342  * 0: Shift_JIS, eucJP-ascii
343  * 1: eucJP-ms
344  * 2: CP932, CP51932
345  * 3: CP10001
346  */
347 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_MS 1
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static int no_best_fit_chars_f = FALSE;
358 static int input_endian = ENDIAN_BIG;
359 static int input_bom_f = FALSE;
360 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
361 static void (*encode_fallback)(nkf_char c) = NULL;
362 static void w_status(struct input_code *, nkf_char);
363 #endif
364 #ifdef UTF8_OUTPUT_ENABLE
365 static int output_bom_f = FALSE;
366 static int output_endian = ENDIAN_BIG;
367 #endif
368 
369 static void std_putc(nkf_char c);
370 static nkf_char std_getc(FILE *f);
371 static nkf_char std_ungetc(nkf_char c,FILE *f);
372 
373 static nkf_char broken_getc(FILE *f);
374 static nkf_char broken_ungetc(nkf_char c,FILE *f);
375 
376 static nkf_char mime_getc(FILE *f);
377 
378 static void mime_putc(nkf_char c);
379 
380 /* buffers */
381 
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 static unsigned char stdibuf[IOBUF_SIZE];
384 static unsigned char stdobuf[IOBUF_SIZE];
385 #endif
386 
387 #define NKF_UNSPECIFIED (-TRUE)
388 
389 /* flags */
390 static int unbuf_f = FALSE;
391 static int estab_f = FALSE;
392 static int nop_f = FALSE;
393 static int binmode_f = TRUE; /* binary mode */
394 static int rot_f = FALSE; /* rot14/43 mode */
395 static int hira_f = FALSE; /* hira/kata henkan */
396 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
398 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
399 static int mimebuf_f = FALSE; /* MIME buffered input */
400 static int broken_f = FALSE; /* convert ESC-less broken JIS */
401 static int iso8859_f = FALSE; /* ISO8859 through */
402 static int mimeout_f = FALSE; /* base64 mode */
403 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
404 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405 
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
410 #endif
411 
412 #ifdef INPUT_OPTION
413 static int cap_f = FALSE;
414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416 
417 static int url_f = FALSE;
418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420 #endif
421 
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 
434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435 
436 #ifdef NUMCHAR_OPTION
437 static int numchar_f = FALSE;
438 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
439 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440 #endif
441 
442 #ifdef CHECK_OPTION
443 static int noout_f = FALSE;
444 static void no_putc(nkf_char c);
445 static int debug_f = FALSE;
446 static void debug(const char *str);
447 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448 #endif
449 
450 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451 static void set_input_codename(const char *codename);
452 
453 #ifdef EXEC_IO
454 static int exec_f = 0;
455 #endif
456 
457 #ifdef SHIFTJIS_CP932
458 /* invert IBM extended characters to others */
459 static int cp51932_f = FALSE;
460 
461 /* invert NEC-selected IBM extended characters to IBM extended characters */
462 static int cp932inv_f = TRUE;
463 
464 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465 #endif /* SHIFTJIS_CP932 */
466 
467 static int x0212_f = FALSE;
468 static int x0213_f = FALSE;
469 
470 static unsigned char prefix_table[256];
471 
472 static void e_status(struct input_code *, nkf_char);
473 static void s_status(struct input_code *, nkf_char);
474 
476  {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477  {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE
479  {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480  {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481  {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482 #endif
483  {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484 };
485 
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
488 
489 /* X0208 -> ASCII converter */
490 
491 /* fold parameter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
497 
498 /* options */
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
501 
502 /* Folding */
503 
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
506 
507 static int fold_margin = FOLD_MARGIN;
508 
509 /* process default */
510 
511 static nkf_char
512 no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
513 {
514  fprintf(stderr,"nkf internal module connection failure.\n");
515  exit(EXIT_FAILURE);
516  return 0; /* LINT */
517 }
518 
519 static void
520 no_connection(nkf_char c2, nkf_char c1)
521 {
522  no_connection2(c2,c1,0);
523 }
524 
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527 
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
535 
536 /* static redirections */
537 
538 static void (*o_putc)(nkf_char c) = std_putc;
539 
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
542 
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
545 
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547 
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
550 
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554 
555 /* Global states */
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
559 
560 /* X0201 / X0208 conversion tables */
561 
562 /* X0201 kana conversion table */
563 /* 90-9F A0-DF */
564 static const unsigned char cv[]= {
565  0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566  0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567  0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568  0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569  0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570  0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571  0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572  0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573  0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574  0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575  0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576  0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577  0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578  0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579  0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580  0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581  0x00,0x00};
582 
583 
584 /* X0201 kana conversion table for daguten */
585 /* 90-9F A0-DF */
586 static const unsigned char dv[]= {
587  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591  0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592  0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593  0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594  0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595  0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596  0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597  0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598  0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603  0x00,0x00};
604 
605 /* X0201 kana conversion table for han-daguten */
606 /* 90-9F A0-DF */
607 static const unsigned char ev[]= {
608  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618  0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619  0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624  0x00,0x00};
625 
626 /* X0201 kana to X0213 conversion table for han-daguten */
627 /* 90-9F A0-DF */
628 static const unsigned char ev_x0213[]= {
629  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634  0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635  0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636  0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637  0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638  0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645  0x00,0x00};
646 
647 
648 /* X0208 kigou conversion table */
649 /* 0x8140 - 0x819e */
650 static const unsigned char fv[] = {
651 
652  0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653  0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654  0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655  0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656  0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657  0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658  0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659  0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660  0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662  0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664 } ;
665 
666 
667 
668 static int option_mode = 0;
669 static int file_out_f = FALSE;
670 #ifdef OVERWRITE
671 static int overwrite_f = FALSE;
672 static int preserve_time_f = FALSE;
673 static int backup_f = FALSE;
674 static char *backup_suffix = "";
675 #endif
676 
677 static int eolmode_f = 0; /* CR, LF, CRLF */
678 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679 static nkf_char prev_cr = 0; /* CR or 0 */
680 #ifdef EASYWIN /*Easy Win */
681 static int end_check;
682 #endif /*Easy Win */
683 
684 static void *
685 nkf_xmalloc(size_t size)
686 {
687  void *ptr;
688 
689  if (size == 0) size = 1;
690 
691  ptr = malloc(size);
692  if (ptr == NULL) {
693  perror("can't malloc");
694  exit(EXIT_FAILURE);
695  }
696 
697  return ptr;
698 }
699 
700 static void *
701 nkf_xrealloc(void *ptr, size_t size)
702 {
703  if (size == 0) size = 1;
704 
705  ptr = realloc(ptr, size);
706  if (ptr == NULL) {
707  perror("can't realloc");
708  exit(EXIT_FAILURE);
709  }
710 
711  return ptr;
712 }
713 
714 #define nkf_xfree(ptr) free(ptr)
715 
716 static int
717 nkf_str_caseeql(const char *src, const char *target)
718 {
719  int i;
720  for (i = 0; src[i] && target[i]; i++) {
721  if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722  }
723  if (src[i] || target[i]) return FALSE;
724  else return TRUE;
725 }
726 
727 static nkf_encoding*
728 nkf_enc_from_index(int idx)
729 {
730  if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731  return 0;
732  }
733  return &nkf_encoding_table[idx];
734 }
735 
736 static int
737 nkf_enc_find_index(const char *name)
738 {
739  int i;
740  if (name[0] == 'X' && *(name+1) == '-') name += 2;
741  for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742  if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743  return encoding_name_to_id_table[i].id;
744  }
745  }
746  return -1;
747 }
748 
749 static nkf_encoding*
750 nkf_enc_find(const char *name)
751 {
752  int idx = -1;
753  idx = nkf_enc_find_index(name);
754  if (idx < 0) return 0;
755  return nkf_enc_from_index(idx);
756 }
757 
758 #define nkf_enc_name(enc) (enc)->name
759 #define nkf_enc_to_index(enc) (enc)->id
760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763 #define nkf_enc_asciicompat(enc) (\
764  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766 #define nkf_enc_unicode_p(enc) (\
767  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770 #define nkf_enc_cp5022x_p(enc) (\
771  nkf_enc_to_index(enc) == CP50220 ||\
772  nkf_enc_to_index(enc) == CP50221 ||\
773  nkf_enc_to_index(enc) == CP50222)
774 
775 #ifdef DEFAULT_CODE_LOCALE
776 static const char*
777 nkf_locale_charmap(void)
778 {
779 #ifdef HAVE_LANGINFO_H
780  return nl_langinfo(CODESET);
781 #elif defined(__WIN32__)
782  static char buf[16];
783  sprintf(buf, "CP%d", GetACP());
784  return buf;
785 #elif defined(__OS2__)
786 # if defined(INT_IS_SHORT)
787  /* OS/2 1.x */
788  return NULL;
789 # else
790  /* OS/2 32bit */
791  static char buf[16];
792  ULONG ulCP[1], ulncp;
793  DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794  if (ulCP[0] == 932 || ulCP[0] == 943)
795  strcpy(buf, "Shift_JIS");
796  else
797  sprintf(buf, "CP%lu", ulCP[0]);
798  return buf;
799 # endif
800 #endif
801  return NULL;
802 }
803 
804 static nkf_encoding*
805 nkf_locale_encoding(void)
806 {
807  nkf_encoding *enc = 0;
808  const char *encname = nkf_locale_charmap();
809  if (encname)
810  enc = nkf_enc_find(encname);
811  return enc;
812 }
813 #endif /* DEFAULT_CODE_LOCALE */
814 
815 static nkf_encoding*
816 nkf_utf8_encoding(void)
817 {
818  return &nkf_encoding_table[UTF_8];
819 }
820 
821 static nkf_encoding*
822 nkf_default_encoding(void)
823 {
824  nkf_encoding *enc = 0;
825 #ifdef DEFAULT_CODE_LOCALE
826  enc = nkf_locale_encoding();
827 #elif defined(DEFAULT_ENCIDX)
828  enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829 #endif
830  if (!enc) enc = nkf_utf8_encoding();
831  return enc;
832 }
833 
834 typedef struct {
835  long capa;
836  long len;
838 } nkf_buf_t;
839 
840 static nkf_buf_t *
841 nkf_buf_new(int length)
842 {
843  nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844  buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845  buf->capa = length;
846  buf->len = 0;
847  return buf;
848 }
849 
850 #if 0
851 static void
852 nkf_buf_dispose(nkf_buf_t *buf)
853 {
854  nkf_xfree(buf->ptr);
855  nkf_xfree(buf);
856 }
857 #endif
858 
859 #define nkf_buf_length(buf) ((buf)->len)
860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
861 
862 static nkf_char
863 nkf_buf_at(nkf_buf_t *buf, int index)
864 {
865  assert(index <= buf->len);
866  return buf->ptr[index];
867 }
868 
869 static void
870 nkf_buf_clear(nkf_buf_t *buf)
871 {
872  buf->len = 0;
873 }
874 
875 static void
876 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
877 {
878  if (buf->capa <= buf->len) {
879  exit(EXIT_FAILURE);
880  }
881  buf->ptr[buf->len++] = c;
882 }
883 
884 static nkf_char
885 nkf_buf_pop(nkf_buf_t *buf)
886 {
887  assert(!nkf_buf_empty_p(buf));
888  return buf->ptr[--buf->len];
889 }
890 
891 /* Normalization Form C */
892 #ifndef PERL_XS
893 #ifdef WIN32DLL
894 #define fprintf dllprintf
895 #endif
896 
897 static void
898 version(void)
899 {
900  fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901 }
902 
903 static void
904 usage(void)
905 {
906  fprintf(HELP_OUTPUT,
907  "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908 #ifdef UTF8_OUTPUT_ENABLE
909  " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910  " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911 #else
912 #endif
913 #ifdef UTF8_INPUT_ENABLE
914  " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915  " UTF option is -W[8,[16,32][B,L]]\n"
916 #else
917  " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918 #endif
919  );
920  fprintf(HELP_OUTPUT,
921  " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922  " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923  " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924  );
925  fprintf(HELP_OUTPUT,
926  " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927  " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928  " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929  " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930  );
931  fprintf(HELP_OUTPUT,
932  " O Output to File (DEFAULT 'nkf.out')\n"
933  " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934  );
935  fprintf(HELP_OUTPUT,
936  " --ic=<encoding> Specify the input encoding\n"
937  " --oc=<encoding> Specify the output encoding\n"
938  " --hiragana --katakana Hiragana/Katakana Conversion\n"
939  " --katakana-hiragana Converts each other\n"
940  );
941  fprintf(HELP_OUTPUT,
942 #ifdef INPUT_OPTION
943  " --{cap, url}-input Convert hex after ':' or '%%'\n"
944 #endif
945 #ifdef NUMCHAR_OPTION
946  " --numchar-input Convert Unicode Character Reference\n"
947 #endif
948 #ifdef UTF8_INPUT_ENABLE
949  " --fb-{skip, html, xml, perl, java, subchar}\n"
950  " Specify unassigned character's replacement\n"
951 #endif
952  );
953  fprintf(HELP_OUTPUT,
954 #ifdef OVERWRITE
955  " --in-place[=SUF] Overwrite original files\n"
956  " --overwrite[=SUF] Preserve timestamp of original files\n"
957 #endif
958  " -g --guess Guess the input code\n"
959  " -v --version Print the version\n"
960  " --help/-V Print this help / configuration\n"
961  );
962  version();
963 }
964 
965 static void
966 show_configuration(void)
967 {
968  fprintf(HELP_OUTPUT,
969  "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970  " Compile-time options:\n"
971  " Compiled at: " __DATE__ " " __TIME__ "\n"
972  );
973  fprintf(HELP_OUTPUT,
974  " Default output encoding: "
975 #ifdef DEFAULT_CODE_LOCALE
976  "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977 #elif defined(DEFAULT_ENCIDX)
978  "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979 #else
980  "NONE\n"
981 #endif
982  );
983  fprintf(HELP_OUTPUT,
984  " Default output end of line: "
985 #if DEFAULT_NEWLINE == CR
986  "CR"
987 #elif DEFAULT_NEWLINE == CRLF
988  "CRLF"
989 #else
990  "LF"
991 #endif
992  "\n"
993  " Decode MIME encoded string: "
995  "ON"
996 #else
997  "OFF"
998 #endif
999  "\n"
1000  " Convert JIS X 0201 Katakana: "
1001 #if X0201_DEFAULT
1002  "ON"
1003 #else
1004  "OFF"
1005 #endif
1006  "\n"
1007  " --help, --version output: "
1008 #if HELP_OUTPUT_HELP_OUTPUT
1009  "HELP_OUTPUT"
1010 #else
1011  "STDOUT"
1012 #endif
1013  "\n");
1014 }
1015 #endif /*PERL_XS*/
1016 
1017 #ifdef OVERWRITE
1018 static char*
1019 get_backup_filename(const char *suffix, const char *filename)
1020 {
1021  char *backup_filename;
1022  int asterisk_count = 0;
1023  int i, j;
1024  int filename_length = strlen(filename);
1025 
1026  for(i = 0; suffix[i]; i++){
1027  if(suffix[i] == '*') asterisk_count++;
1028  }
1029 
1030  if(asterisk_count){
1031  backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032  for(i = 0, j = 0; suffix[i];){
1033  if(suffix[i] == '*'){
1034  backup_filename[j] = '\0';
1035  strncat(backup_filename, filename, filename_length);
1036  i++;
1037  j += filename_length;
1038  }else{
1039  backup_filename[j++] = suffix[i++];
1040  }
1041  }
1042  backup_filename[j] = '\0';
1043  }else{
1044  j = filename_length + strlen(suffix);
1045  backup_filename = nkf_xmalloc(j + 1);
1046  strcpy(backup_filename, filename);
1047  strcat(backup_filename, suffix);
1048  backup_filename[j] = '\0';
1049  }
1050  return backup_filename;
1051 }
1052 #endif
1053 
1054 #ifdef UTF8_INPUT_ENABLE
1055 static void
1056 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1057 {
1058  int shift = 20;
1059  c &= VALUE_MASK;
1060  while(shift >= 0){
1061  if(c >= NKF_INT32_C(1)<<shift){
1062  while(shift >= 0){
1063  (*f)(0, bin2hex(c>>shift));
1064  shift -= 4;
1065  }
1066  }else{
1067  shift -= 4;
1068  }
1069  }
1070  return;
1071 }
1072 
1073 static void
1074 encode_fallback_html(nkf_char c)
1075 {
1076  (*oconv)(0, '&');
1077  (*oconv)(0, '#');
1078  c &= VALUE_MASK;
1079  if(c >= NKF_INT32_C(1000000))
1080  (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081  if(c >= NKF_INT32_C(100000))
1082  (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083  if(c >= 10000)
1084  (*oconv)(0, 0x30+(c/10000 )%10);
1085  if(c >= 1000)
1086  (*oconv)(0, 0x30+(c/1000 )%10);
1087  if(c >= 100)
1088  (*oconv)(0, 0x30+(c/100 )%10);
1089  if(c >= 10)
1090  (*oconv)(0, 0x30+(c/10 )%10);
1091  if(c >= 0)
1092  (*oconv)(0, 0x30+ c %10);
1093  (*oconv)(0, ';');
1094  return;
1095 }
1096 
1097 static void
1098 encode_fallback_xml(nkf_char c)
1099 {
1100  (*oconv)(0, '&');
1101  (*oconv)(0, '#');
1102  (*oconv)(0, 'x');
1103  nkf_each_char_to_hex(oconv, c);
1104  (*oconv)(0, ';');
1105  return;
1106 }
1107 
1108 static void
1109 encode_fallback_java(nkf_char c)
1110 {
1111  (*oconv)(0, '\\');
1112  c &= VALUE_MASK;
1113  if(!nkf_char_unicode_bmp_p(c)){
1114  (*oconv)(0, 'U');
1115  (*oconv)(0, '0');
1116  (*oconv)(0, '0');
1117  (*oconv)(0, bin2hex(c>>20));
1118  (*oconv)(0, bin2hex(c>>16));
1119  }else{
1120  (*oconv)(0, 'u');
1121  }
1122  (*oconv)(0, bin2hex(c>>12));
1123  (*oconv)(0, bin2hex(c>> 8));
1124  (*oconv)(0, bin2hex(c>> 4));
1125  (*oconv)(0, bin2hex(c ));
1126  return;
1127 }
1128 
1129 static void
1130 encode_fallback_perl(nkf_char c)
1131 {
1132  (*oconv)(0, '\\');
1133  (*oconv)(0, 'x');
1134  (*oconv)(0, '{');
1135  nkf_each_char_to_hex(oconv, c);
1136  (*oconv)(0, '}');
1137  return;
1138 }
1139 
1140 static void
1141 encode_fallback_subchar(nkf_char c)
1142 {
1143  c = unicode_subchar;
1144  (*oconv)((c>>8)&0xFF, c&0xFF);
1145  return;
1146 }
1147 #endif
1148 
1149 static const struct {
1150  const char *name;
1151  const char *alias;
1152 } long_option[] = {
1153  {"ic=", ""},
1154  {"oc=", ""},
1155  {"base64","jMB"},
1156  {"euc","e"},
1157  {"euc-input","E"},
1158  {"fj","jm"},
1159  {"help",""},
1160  {"jis","j"},
1161  {"jis-input","J"},
1162  {"mac","sLm"},
1163  {"mime","jM"},
1164  {"mime-input","m"},
1165  {"msdos","sLw"},
1166  {"sjis","s"},
1167  {"sjis-input","S"},
1168  {"unix","eLu"},
1169  {"version","v"},
1170  {"windows","sLw"},
1171  {"hiragana","h1"},
1172  {"katakana","h2"},
1173  {"katakana-hiragana","h3"},
1174  {"guess=", ""},
1175  {"guess", "g2"},
1176  {"cp932", ""},
1177  {"no-cp932", ""},
1178 #ifdef X0212_ENABLE
1179  {"x0212", ""},
1180 #endif
1181 #ifdef UTF8_OUTPUT_ENABLE
1182  {"utf8", "w"},
1183  {"utf16", "w16"},
1184  {"ms-ucs-map", ""},
1185  {"fb-skip", ""},
1186  {"fb-html", ""},
1187  {"fb-xml", ""},
1188  {"fb-perl", ""},
1189  {"fb-java", ""},
1190  {"fb-subchar", ""},
1191  {"fb-subchar=", ""},
1192 #endif
1193 #ifdef UTF8_INPUT_ENABLE
1194  {"utf8-input", "W"},
1195  {"utf16-input", "W16"},
1196  {"no-cp932ext", ""},
1197  {"no-best-fit-chars",""},
1198 #endif
1199 #ifdef UNICODE_NORMALIZATION
1200  {"utf8mac-input", ""},
1201 #endif
1202 #ifdef OVERWRITE
1203  {"overwrite", ""},
1204  {"overwrite=", ""},
1205  {"in-place", ""},
1206  {"in-place=", ""},
1207 #endif
1208 #ifdef INPUT_OPTION
1209  {"cap-input", ""},
1210  {"url-input", ""},
1211 #endif
1212 #ifdef NUMCHAR_OPTION
1213  {"numchar-input", ""},
1214 #endif
1215 #ifdef CHECK_OPTION
1216  {"no-output", ""},
1217  {"debug", ""},
1218 #endif
1219 #ifdef SHIFTJIS_CP932
1220  {"cp932inv", ""},
1221 #endif
1222 #ifdef EXEC_IO
1223  {"exec-in", ""},
1224  {"exec-out", ""},
1225 #endif
1226  {"prefix=", ""},
1227 };
1228 
1229 static void
1230 set_input_encoding(nkf_encoding *enc)
1231 {
1232  switch (nkf_enc_to_index(enc)) {
1233  case ISO_8859_1:
1234  iso8859_f = TRUE;
1235  break;
1236  case CP50221:
1237  case CP50222:
1238  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1239  case CP50220:
1240 #ifdef SHIFTJIS_CP932
1241  cp51932_f = TRUE;
1242 #endif
1243 #ifdef UTF8_OUTPUT_ENABLE
1244  ms_ucs_map_f = UCS_MAP_CP932;
1245 #endif
1246  break;
1247  case ISO_2022_JP_1:
1248  x0212_f = TRUE;
1249  break;
1250  case ISO_2022_JP_3:
1251  x0212_f = TRUE;
1252  x0213_f = TRUE;
1253  break;
1254  case ISO_2022_JP_2004:
1255  x0212_f = TRUE;
1256  x0213_f = TRUE;
1257  break;
1258  case SHIFT_JIS:
1259  break;
1260  case WINDOWS_31J:
1261  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1262 #ifdef SHIFTJIS_CP932
1263  cp51932_f = TRUE;
1264 #endif
1265 #ifdef UTF8_OUTPUT_ENABLE
1266  ms_ucs_map_f = UCS_MAP_CP932;
1267 #endif
1268  break;
1269  break;
1270  case CP10001:
1271 #ifdef SHIFTJIS_CP932
1272  cp51932_f = TRUE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275  ms_ucs_map_f = UCS_MAP_CP10001;
1276 #endif
1277  break;
1278  case EUC_JP:
1279  break;
1280  case EUCJP_NKF:
1281  break;
1282  case CP51932:
1283  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1284 #ifdef SHIFTJIS_CP932
1285  cp51932_f = TRUE;
1286 #endif
1287 #ifdef UTF8_OUTPUT_ENABLE
1288  ms_ucs_map_f = UCS_MAP_CP932;
1289 #endif
1290  break;
1291  case EUCJP_MS:
1292  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1293 #ifdef SHIFTJIS_CP932
1294  cp51932_f = FALSE;
1295 #endif
1296 #ifdef UTF8_OUTPUT_ENABLE
1297  ms_ucs_map_f = UCS_MAP_MS;
1298 #endif
1299  break;
1300  case EUCJP_ASCII:
1301  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1302 #ifdef SHIFTJIS_CP932
1303  cp51932_f = FALSE;
1304 #endif
1305 #ifdef UTF8_OUTPUT_ENABLE
1306  ms_ucs_map_f = UCS_MAP_ASCII;
1307 #endif
1308  break;
1309  case SHIFT_JISX0213:
1310  case SHIFT_JIS_2004:
1311  x0213_f = TRUE;
1312 #ifdef SHIFTJIS_CP932
1313  cp51932_f = FALSE;
1314  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1315 #endif
1316  break;
1317  case EUC_JISX0213:
1318  case EUC_JIS_2004:
1319  x0213_f = TRUE;
1320 #ifdef SHIFTJIS_CP932
1321  cp51932_f = FALSE;
1322 #endif
1323  break;
1324 #ifdef UTF8_INPUT_ENABLE
1325 #ifdef UNICODE_NORMALIZATION
1326  case UTF8_MAC:
1327  nfc_f = TRUE;
1328  break;
1329 #endif
1330  case UTF_16:
1331  case UTF_16BE:
1332  case UTF_16BE_BOM:
1333  input_endian = ENDIAN_BIG;
1334  break;
1335  case UTF_16LE:
1336  case UTF_16LE_BOM:
1337  input_endian = ENDIAN_LITTLE;
1338  break;
1339  case UTF_32:
1340  case UTF_32BE:
1341  case UTF_32BE_BOM:
1342  input_endian = ENDIAN_BIG;
1343  break;
1344  case UTF_32LE:
1345  case UTF_32LE_BOM:
1346  input_endian = ENDIAN_LITTLE;
1347  break;
1348 #endif
1349  }
1350 }
1351 
1352 static void
1353 set_output_encoding(nkf_encoding *enc)
1354 {
1355  switch (nkf_enc_to_index(enc)) {
1356  case CP50220:
1357 #ifdef SHIFTJIS_CP932
1358  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1359 #endif
1360 #ifdef UTF8_OUTPUT_ENABLE
1361  ms_ucs_map_f = UCS_MAP_CP932;
1362 #endif
1363  break;
1364  case CP50221:
1365  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1366 #ifdef SHIFTJIS_CP932
1367  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1368 #endif
1369 #ifdef UTF8_OUTPUT_ENABLE
1370  ms_ucs_map_f = UCS_MAP_CP932;
1371 #endif
1372  break;
1373  case ISO_2022_JP:
1374 #ifdef SHIFTJIS_CP932
1375  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376 #endif
1377  break;
1378  case ISO_2022_JP_1:
1379  x0212_f = TRUE;
1380 #ifdef SHIFTJIS_CP932
1381  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1382 #endif
1383  break;
1384  case ISO_2022_JP_3:
1385  case ISO_2022_JP_2004:
1386  x0212_f = TRUE;
1387  x0213_f = TRUE;
1388 #ifdef SHIFTJIS_CP932
1389  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390 #endif
1391  break;
1392  case SHIFT_JIS:
1393  break;
1394  case WINDOWS_31J:
1395  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1396 #ifdef UTF8_OUTPUT_ENABLE
1397  ms_ucs_map_f = UCS_MAP_CP932;
1398 #endif
1399  break;
1400  case CP10001:
1401 #ifdef UTF8_OUTPUT_ENABLE
1402  ms_ucs_map_f = UCS_MAP_CP10001;
1403 #endif
1404  break;
1405  case EUC_JP:
1406  x0212_f = TRUE;
1407 #ifdef SHIFTJIS_CP932
1408  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411  ms_ucs_map_f = UCS_MAP_ASCII;
1412 #endif
1413  break;
1414  case EUCJP_NKF:
1415  x0212_f = FALSE;
1416 #ifdef SHIFTJIS_CP932
1417  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1418 #endif
1419 #ifdef UTF8_OUTPUT_ENABLE
1420  ms_ucs_map_f = UCS_MAP_ASCII;
1421 #endif
1422  break;
1423  case CP51932:
1424  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1425 #ifdef SHIFTJIS_CP932
1426  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1427 #endif
1428 #ifdef UTF8_OUTPUT_ENABLE
1429  ms_ucs_map_f = UCS_MAP_CP932;
1430 #endif
1431  break;
1432  case EUCJP_MS:
1433  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1434  x0212_f = TRUE;
1435 #ifdef UTF8_OUTPUT_ENABLE
1436  ms_ucs_map_f = UCS_MAP_MS;
1437 #endif
1438  break;
1439  case EUCJP_ASCII:
1440  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1441  x0212_f = TRUE;
1442 #ifdef UTF8_OUTPUT_ENABLE
1443  ms_ucs_map_f = UCS_MAP_ASCII;
1444 #endif
1445  break;
1446  case SHIFT_JISX0213:
1447  case SHIFT_JIS_2004:
1448  x0213_f = TRUE;
1449 #ifdef SHIFTJIS_CP932
1450  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1451 #endif
1452  break;
1453  case EUC_JISX0213:
1454  case EUC_JIS_2004:
1455  x0212_f = TRUE;
1456  x0213_f = TRUE;
1457 #ifdef SHIFTJIS_CP932
1458  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459 #endif
1460  break;
1461 #ifdef UTF8_OUTPUT_ENABLE
1462  case UTF_8_BOM:
1463  output_bom_f = TRUE;
1464  break;
1465  case UTF_16:
1466  case UTF_16BE_BOM:
1467  output_bom_f = TRUE;
1468  break;
1469  case UTF_16LE:
1470  output_endian = ENDIAN_LITTLE;
1471  output_bom_f = FALSE;
1472  break;
1473  case UTF_16LE_BOM:
1474  output_endian = ENDIAN_LITTLE;
1475  output_bom_f = TRUE;
1476  break;
1477  case UTF_32:
1478  case UTF_32BE_BOM:
1479  output_bom_f = TRUE;
1480  break;
1481  case UTF_32LE:
1482  output_endian = ENDIAN_LITTLE;
1483  output_bom_f = FALSE;
1484  break;
1485  case UTF_32LE_BOM:
1486  output_endian = ENDIAN_LITTLE;
1487  output_bom_f = TRUE;
1488  break;
1489 #endif
1490  }
1491 }
1492 
1493 static struct input_code*
1494 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1495 {
1496  if (iconv_func){
1497  struct input_code *p = input_code_list;
1498  while (p->name){
1499  if (iconv_func == p->iconv_func){
1500  return p;
1501  }
1502  p++;
1503  }
1504  }
1505  return 0;
1506 }
1507 
1508 static void
1509 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1510 {
1511 #ifdef INPUT_CODE_FIX
1512  if (f || !input_encoding)
1513 #endif
1514  if (estab_f != f){
1515  estab_f = f;
1516  }
1517 
1518  if (iconv_func
1519 #ifdef INPUT_CODE_FIX
1520  && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1521 #endif
1522  ){
1523  iconv = iconv_func;
1524  }
1525 #ifdef CHECK_OPTION
1526  if (estab_f && iconv_for_check != iconv){
1527  struct input_code *p = find_inputcode_byfunc(iconv);
1528  if (p){
1529  set_input_codename(p->name);
1530  debug(p->name);
1531  }
1532  iconv_for_check = iconv;
1533  }
1534 #endif
1535 }
1536 
1537 #ifdef X0212_ENABLE
1538 static nkf_char
1539 x0212_shift(nkf_char c)
1540 {
1541  nkf_char ret = c;
1542  c &= 0x7f;
1543  if (is_eucg3(ret)){
1544  if (0x75 <= c && c <= 0x7f){
1545  ret = c + (0x109 - 0x75);
1546  }
1547  }else{
1548  if (0x75 <= c && c <= 0x7f){
1549  ret = c + (0x113 - 0x75);
1550  }
1551  }
1552  return ret;
1553 }
1554 
1555 
1556 static nkf_char
1557 x0212_unshift(nkf_char c)
1558 {
1559  nkf_char ret = c;
1560  if (0x7f <= c && c <= 0x88){
1561  ret = c + (0x75 - 0x7f);
1562  }else if (0x89 <= c && c <= 0x92){
1563  ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1564  }
1565  return ret;
1566 }
1567 #endif /* X0212_ENABLE */
1568 
1569 static int
1570 is_x0213_2_in_x0212(nkf_char c1)
1571 {
1572  static const char x0213_2_table[] =
1573  {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1574  int ku = c1 - 0x20;
1575  if (ku <= 15)
1576  return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1577  if (78 <= ku && ku <= 94)
1578  return 1;
1579  return 0;
1580 }
1581 
1582 static nkf_char
1583 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1584 {
1585  nkf_char ndx;
1586  if (is_eucg3(c2)){
1587  ndx = c2 & 0x7f;
1588  if (x0213_f && is_x0213_2_in_x0212(ndx)){
1589  if((0x21 <= ndx && ndx <= 0x2F)){
1590  if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1592  return 0;
1593  }else if(0x6E <= ndx && ndx <= 0x7E){
1594  if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1596  return 0;
1597  }
1598  return 1;
1599  }
1600 #ifdef X0212_ENABLE
1601  else if(nkf_isgraph(ndx)){
1602  nkf_char val = 0;
1603  const unsigned short *ptr;
1604  ptr = x0212_shiftjis[ndx - 0x21];
1605  if (ptr){
1606  val = ptr[(c1 & 0x7f) - 0x21];
1607  }
1608  if (val){
1609  c2 = val >> 8;
1610  c1 = val & 0xff;
1611  if (p2) *p2 = c2;
1612  if (p1) *p1 = c1;
1613  return 0;
1614  }
1615  c2 = x0212_shift(c2);
1616  }
1617 #endif /* X0212_ENABLE */
1618  }
1619  if(0x7F < c2) return 1;
1620  if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621  if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1622  return 0;
1623 }
1624 
1625 static nkf_char
1626 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1627 {
1628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1629  nkf_char val;
1630 #endif
1631  static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632  if (0xFC < c1) return 1;
1633 #ifdef SHIFTJIS_CP932
1634  if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1635  val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1636  if (val){
1637  c2 = val >> 8;
1638  c1 = val & 0xff;
1639  }
1640  }
1641  if (cp932inv_f
1642  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643  val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1644  if (val){
1645  c2 = val >> 8;
1646  c1 = val & 0xff;
1647  }
1648  }
1649 #endif /* SHIFTJIS_CP932 */
1650 #ifdef X0212_ENABLE
1651  if (!x0213_f && is_ibmext_in_sjis(c2)){
1652  val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1653  if (val){
1654  if (val > 0x7FFF){
1655  c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1656  c1 = val & 0xff;
1657  }else{
1658  c2 = val >> 8;
1659  c1 = val & 0xff;
1660  }
1661  if (p2) *p2 = c2;
1662  if (p1) *p1 = c1;
1663  return 0;
1664  }
1665  }
1666 #endif
1667  if(c2 >= 0x80){
1668  if(x0213_f && c2 >= 0xF0){
1669  if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1670  c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1671  }else{ /* 78<=k<=94 */
1672  c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1673  if (0x9E < c1) c2++;
1674  }
1675  }else{
1676 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1677 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1678  c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1679  if (0x9E < c1) c2++;
1680  }
1681  if (c1 < 0x9F)
1682  c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1683  else {
1684  c1 = c1 - 0x7E;
1685  }
1686  }
1687 
1688 #ifdef X0212_ENABLE
1689  c2 = x0212_unshift(c2);
1690 #endif
1691  if (p2) *p2 = c2;
1692  if (p1) *p1 = c1;
1693  return 0;
1694 }
1695 
1696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1697 static void
1698 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1699 {
1700  val &= VALUE_MASK;
1701  if (val < 0x80){
1702  *p1 = val;
1703  *p2 = 0;
1704  *p3 = 0;
1705  *p4 = 0;
1706  }else if (val < 0x800){
1707  *p1 = 0xc0 | (val >> 6);
1708  *p2 = 0x80 | (val & 0x3f);
1709  *p3 = 0;
1710  *p4 = 0;
1711  } else if (nkf_char_unicode_bmp_p(val)) {
1712  *p1 = 0xe0 | (val >> 12);
1713  *p2 = 0x80 | ((val >> 6) & 0x3f);
1714  *p3 = 0x80 | ( val & 0x3f);
1715  *p4 = 0;
1716  } else if (nkf_char_unicode_value_p(val)) {
1717  *p1 = 0xf0 | (val >> 18);
1718  *p2 = 0x80 | ((val >> 12) & 0x3f);
1719  *p3 = 0x80 | ((val >> 6) & 0x3f);
1720  *p4 = 0x80 | ( val & 0x3f);
1721  } else {
1722  *p1 = 0;
1723  *p2 = 0;
1724  *p3 = 0;
1725  *p4 = 0;
1726  }
1727 }
1728 
1729 static nkf_char
1730 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1731 {
1732  nkf_char wc;
1733  if (c1 <= 0x7F) {
1734  /* single byte */
1735  wc = c1;
1736  }
1737  else if (c1 <= 0xC1) {
1738  /* trail byte or invalid */
1739  return -1;
1740  }
1741  else if (c1 <= 0xDF) {
1742  /* 2 bytes */
1743  wc = (c1 & 0x1F) << 6;
1744  wc |= (c2 & 0x3F);
1745  }
1746  else if (c1 <= 0xEF) {
1747  /* 3 bytes */
1748  wc = (c1 & 0x0F) << 12;
1749  wc |= (c2 & 0x3F) << 6;
1750  wc |= (c3 & 0x3F);
1751  }
1752  else if (c2 <= 0xF4) {
1753  /* 4 bytes */
1754  wc = (c1 & 0x0F) << 18;
1755  wc |= (c2 & 0x3F) << 12;
1756  wc |= (c3 & 0x3F) << 6;
1757  wc |= (c4 & 0x3F);
1758  }
1759  else {
1760  return -1;
1761  }
1762  return wc;
1763 }
1764 #endif
1765 
1766 #ifdef UTF8_INPUT_ENABLE
1767 static int
1768 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1769  const unsigned short *const *pp, nkf_char psize,
1770  nkf_char *p2, nkf_char *p1)
1771 {
1772  nkf_char c2;
1773  const unsigned short *p;
1774  unsigned short val;
1775 
1776  if (pp == 0) return 1;
1777 
1778  c1 -= 0x80;
1779  if (c1 < 0 || psize <= c1) return 1;
1780  p = pp[c1];
1781  if (p == 0) return 1;
1782 
1783  c0 -= 0x80;
1784  if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1785  val = p[c0];
1786  if (val == 0) return 1;
1787  if (no_cp932ext_f && (
1788  (val>>8) == 0x2D || /* NEC special characters */
1789  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1790  )) return 1;
1791 
1792  c2 = val >> 8;
1793  if (val > 0x7FFF){
1794  c2 &= 0x7f;
1795  c2 |= PREFIX_EUCG3;
1796  }
1797  if (c2 == SO) c2 = JIS_X_0201_1976_K;
1798  c1 = val & 0xFF;
1799  if (p2) *p2 = c2;
1800  if (p1) *p1 = c1;
1801  return 0;
1802 }
1803 
1804 static int
1805 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1806 {
1807  const unsigned short *const *pp;
1808  const unsigned short *const *const *ppp;
1809  static const char no_best_fit_chars_table_C2[] =
1810  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812  1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813  0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814  static const char no_best_fit_chars_table_C2_ms[] =
1815  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817  1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818  0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819  static const char no_best_fit_chars_table_932_C2[] =
1820  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822  1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823  0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824  static const char no_best_fit_chars_table_932_C3[] =
1825  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1829  nkf_char ret = 0;
1830 
1831  if(c2 < 0x80){
1832  *p2 = 0;
1833  *p1 = c2;
1834  }else if(c2 < 0xe0){
1835  if(no_best_fit_chars_f){
1836  if(ms_ucs_map_f == UCS_MAP_CP932){
1837  switch(c2){
1838  case 0xC2:
1839  if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1840  break;
1841  case 0xC3:
1842  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1843  break;
1844  }
1845  }else if(!cp932inv_f){
1846  switch(c2){
1847  case 0xC2:
1848  if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1849  break;
1850  case 0xC3:
1851  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1852  break;
1853  }
1854  }else if(ms_ucs_map_f == UCS_MAP_MS){
1855  if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1856  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1857  switch(c2){
1858  case 0xC2:
1859  switch(c1){
1860  case 0xA2:
1861  case 0xA3:
1862  case 0xA5:
1863  case 0xA6:
1864  case 0xAC:
1865  case 0xAF:
1866  case 0xB8:
1867  return 1;
1868  }
1869  break;
1870  }
1871  }
1872  }
1873  pp =
1874  ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1875  ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1876  ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1877  x0213_f ? utf8_to_euc_2bytes_x0213 :
1879  ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1880  }else if(c0 < 0xF0){
1881  if(no_best_fit_chars_f){
1882  if(ms_ucs_map_f == UCS_MAP_CP932){
1883  if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1884  }else if(ms_ucs_map_f == UCS_MAP_MS){
1885  switch(c2){
1886  case 0xE2:
1887  switch(c1){
1888  case 0x80:
1889  if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1890  break;
1891  case 0x88:
1892  if(c0 == 0x92) return 1;
1893  break;
1894  }
1895  break;
1896  case 0xE3:
1897  if(c1 == 0x80 || c0 == 0x9C) return 1;
1898  break;
1899  }
1900  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1901  switch(c2){
1902  case 0xE3:
1903  switch(c1){
1904  case 0x82:
1905  if(c0 == 0x94) return 1;
1906  break;
1907  case 0x83:
1908  if(c0 == 0xBB) return 1;
1909  break;
1910  }
1911  break;
1912  }
1913  }else{
1914  switch(c2){
1915  case 0xE2:
1916  switch(c1){
1917  case 0x80:
1918  if(c0 == 0x95) return 1;
1919  break;
1920  case 0x88:
1921  if(c0 == 0xA5) return 1;
1922  break;
1923  }
1924  break;
1925  case 0xEF:
1926  switch(c1){
1927  case 0xBC:
1928  if(c0 == 0x8D) return 1;
1929  break;
1930  case 0xBD:
1931  if(c0 == 0x9E && !cp932inv_f) return 1;
1932  break;
1933  case 0xBF:
1934  if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1935  break;
1936  }
1937  break;
1938  }
1939  }
1940  }
1941  ppp =
1942  ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1943  ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1944  ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1945  x0213_f ? utf8_to_euc_3bytes_x0213 :
1947  ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1948  }else return -1;
1949 #ifdef SHIFTJIS_CP932
1950  if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1951  nkf_char s2, s1;
1952  if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1953  s2e_conv(s2, s1, p2, p1);
1954  }else{
1955  ret = 1;
1956  }
1957  }
1958 #endif
1959  return ret;
1960 }
1961 
1962 #ifdef UTF8_OUTPUT_ENABLE
1963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1964  int i; \
1965  for (i = 0; i < size; i++) \
1966  if (tbl[i][0] == euc) { \
1967  low = tbl[i][2]; \
1968  break; \
1969  } \
1970  } while (0)
1971 
1972 static nkf_char
1973 e2w_conv(nkf_char c2, nkf_char c1)
1974 {
1975  const unsigned short *p;
1976 
1977  if (c2 == JIS_X_0201_1976_K) {
1978  if (ms_ucs_map_f == UCS_MAP_CP10001) {
1979  switch (c1) {
1980  case 0x20:
1981  return 0xA0;
1982  case 0x7D:
1983  return 0xA9;
1984  }
1985  }
1986  p = euc_to_utf8_1byte;
1987 #ifdef X0212_ENABLE
1988  } else if (is_eucg3(c2)){
1989  if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1990  return 0xA6;
1991  }
1992  c2 = (c2&0x7f) - 0x21;
1993  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1994  p =
1995  x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
1997  else
1998  return 0;
1999 #endif
2000  } else {
2001  c2 &= 0x7f;
2002  c2 = (c2&0x7f) - 0x21;
2003  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2004  p =
2005  x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
2006  ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
2007  ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
2009  else
2010  return 0;
2011  }
2012  if (!p) return 0;
2013  c1 = (c1 & 0x7f) - 0x21;
2014  if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2015  nkf_char val = p[c1];
2016  if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2017  nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2018  nkf_char low = 0;
2019  if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2020  X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2021  } else {
2022  X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2023  }
2024  if (!low) return 0;
2025  return UTF16_TO_UTF32(val, low);
2026  } else {
2027  return val;
2028  }
2029  }
2030  return 0;
2031 }
2032 
2033 static nkf_char
2034 e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
2035 {
2036  nkf_char euc;
2037  int i;
2038  for (i = 0; i < sizeof_x0213_combining_chars; i++)
2039  if (x0213_combining_chars[i] == comb)
2040  break;
2041  if (i >= sizeof_x0213_combining_chars)
2042  return 0;
2043  euc = (c2&0x7f)<<8 | (c1&0x7f);
2044  for (i = 0; i < sizeof_x0213_combining_table; i++)
2045  if (x0213_combining_table[i][0] == euc)
2046  return x0213_combining_table[i][1];
2047  return 0;
2048 }
2049 #endif
2050 
2051 static nkf_char
2052 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2053 {
2054  nkf_char ret = 0;
2055 
2056  if (!c1){
2057  *p2 = 0;
2058  *p1 = c2;
2059  }else if (0xc0 <= c2 && c2 <= 0xef) {
2060  ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2061 #ifdef NUMCHAR_OPTION
2062  if (ret > 0){
2063  if (p2) *p2 = 0;
2064  if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2065  ret = 0;
2066  }
2067 #endif
2068  }
2069  return ret;
2070 }
2071 
2072 #ifdef UTF8_INPUT_ENABLE
2073 static nkf_char
2074 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
2075 {
2076  nkf_char c1, c2, c3, c4;
2077  nkf_char ret = 0;
2078  val &= VALUE_MASK;
2079  if (val < 0x80) {
2080  *p2 = 0;
2081  *p1 = val;
2082  }
2083  else if (nkf_char_unicode_bmp_p(val)){
2084  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2085  ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2086  if (ret > 0){
2087  *p2 = 0;
2088  *p1 = nkf_char_unicode_new(val);
2089  ret = 0;
2090  }
2091  }
2092  else {
2093  int i;
2094  if (x0213_f) {
2095  c1 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2096  c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2097  for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2098  if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2099  val = x0213_1_surrogate_table[i][0];
2100  *p2 = val >> 8;
2101  *p1 = val & 0xFF;
2102  return 0;
2103  }
2104  for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2105  if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2106  val = x0213_2_surrogate_table[i][0];
2107  *p2 = PREFIX_EUCG3 | (val >> 8);
2108  *p1 = val & 0xFF;
2109  return 0;
2110  }
2111  }
2112  *p2 = 0;
2113  *p1 = nkf_char_unicode_new(val);
2114  }
2115  return ret;
2116 }
2117 #endif
2118 
2119 static nkf_char
2120 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2121 {
2122  if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2123  if (iso2022jp_f && !x0201_f) {
2124  c2 = GETA1; c1 = GETA2;
2125  } else {
2126  c2 = JIS_X_0201_1976_K;
2127  c1 &= 0x7f;
2128  }
2129 #ifdef X0212_ENABLE
2130  }else if (c2 == 0x8f){
2131  if (c0 == 0){
2132  return -1;
2133  }
2134  if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2135  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2136  c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2137  c2 = 0;
2138  } else {
2139  c2 = (c2 << 8) | (c1 & 0x7f);
2140  c1 = c0 & 0x7f;
2141 #ifdef SHIFTJIS_CP932
2142  if (cp51932_f){
2143  nkf_char s2, s1;
2144  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2145  s2e_conv(s2, s1, &c2, &c1);
2146  if (c2 < 0x100){
2147  c1 &= 0x7f;
2148  c2 &= 0x7f;
2149  }
2150  }
2151  }
2152 #endif /* SHIFTJIS_CP932 */
2153  }
2154 #endif /* X0212_ENABLE */
2155  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2156  /* NOP */
2157  } else {
2158  if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2159  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2160  c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2161  c2 = 0;
2162  } else {
2163  c1 &= 0x7f;
2164  c2 &= 0x7f;
2165 #ifdef SHIFTJIS_CP932
2166  if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2167  nkf_char s2, s1;
2168  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2169  s2e_conv(s2, s1, &c2, &c1);
2170  if (c2 < 0x100){
2171  c1 &= 0x7f;
2172  c2 &= 0x7f;
2173  }
2174  }
2175  }
2176 #endif /* SHIFTJIS_CP932 */
2177  }
2178  }
2179  (*oconv)(c2, c1);
2180  return 0;
2181 }
2182 
2183 static nkf_char
2184 s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2185 {
2186  if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2187  if (iso2022jp_f && !x0201_f) {
2188  c2 = GETA1; c1 = GETA2;
2189  } else {
2190  c1 &= 0x7f;
2191  }
2192  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2193  /* NOP */
2194  } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2195  /* CP932 UDC */
2196  if(c1 == 0x7F) return 0;
2197  c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2198  c2 = 0;
2199  } else {
2200  nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2201  if (ret) return ret;
2202  }
2203  (*oconv)(c2, c1);
2204  return 0;
2205 }
2206 
2207 static int
2208 x0213_wait_combining_p(nkf_char wc)
2209 {
2210  int i;
2211  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2212  if (x0213_combining_table[i][1] == wc) {
2213  return TRUE;
2214  }
2215  }
2216  return FALSE;
2217 }
2218 
2219 static int
2220 x0213_combining_p(nkf_char wc)
2221 {
2222  int i;
2223  for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2224  if (x0213_combining_chars[i] == wc) {
2225  return TRUE;
2226  }
2227  }
2228  return FALSE;
2229 }
2230 
2231 static nkf_char
2232 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2233 {
2234  nkf_char ret = 0, c4 = 0;
2235  static const char w_iconv_utf8_1st_byte[] =
2236  { /* 0xC0 - 0xFF */
2237  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239  30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240  40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2241 
2242  if (c3 > 0xFF) {
2243  c4 = c3 & 0xFF;
2244  c3 >>= 8;
2245  }
2246 
2247  if (c1 < 0 || 0xff < c1) {
2248  }else if (c1 == 0) { /* 0 : 1 byte*/
2249  c3 = 0;
2250  } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2251  return 0;
2252  } else{
2253  switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2254  case 21:
2255  if (c2 < 0x80 || 0xBF < c2) return 0;
2256  break;
2257  case 30:
2258  if (c3 == 0) return -1;
2259  if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2260  return 0;
2261  break;
2262  case 31:
2263  case 33:
2264  if (c3 == 0) return -1;
2265  if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2266  return 0;
2267  break;
2268  case 32:
2269  if (c3 == 0) return -1;
2270  if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2271  return 0;
2272  break;
2273  case 40:
2274  if (c3 == 0) return -2;
2275  if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2276  return 0;
2277  break;
2278  case 41:
2279  if (c3 == 0) return -2;
2280  if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2281  return 0;
2282  break;
2283  case 42:
2284  if (c3 == 0) return -2;
2285  if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2286  return 0;
2287  break;
2288  default:
2289  return 0;
2290  break;
2291  }
2292  }
2293  if (c1 == 0 || c1 == EOF){
2294  } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2295  c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2296  c1 = 0;
2297  } else {
2298  if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2299  return -3;
2300  ret = w2e_conv(c1, c2, c3, &c1, &c2);
2301  }
2302  if (ret == 0){
2303  (*oconv)(c1, c2);
2304  }
2305  return ret;
2306 }
2307 
2308 static nkf_char
2309 w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
2310 {
2311  /* continue from the line below 'return -3;' in w_iconv() */
2312  nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2313  if (ret == 0){
2314  (*oconv)(c1, c2);
2315  }
2316  return ret;
2317 }
2318 
2319 #define NKF_ICONV_INVALID_CODE_RANGE -13
2320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
2321 #define NKF_ICONV_NOT_COMBINED -15
2322 static size_t
2323 unicode_iconv(nkf_char wc, int nocombine)
2324 {
2325  nkf_char c1, c2;
2326  int ret = 0;
2327 
2328  if (wc < 0x80) {
2329  c2 = 0;
2330  c1 = wc;
2331  }else if ((wc>>11) == 27) {
2332  /* unpaired surrogate */
2334  }else if (wc < 0xFFFF) {
2335  if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2337  ret = w16e_conv(wc, &c2, &c1);
2338  if (ret) return ret;
2339  }else if (wc < 0x10FFFF) {
2340  c2 = 0;
2341  c1 = nkf_char_unicode_new(wc);
2342  } else {
2344  }
2345  (*oconv)(c2, c1);
2346  return 0;
2347 }
2348 
2349 static nkf_char
2350 unicode_iconv_combine(nkf_char wc, nkf_char wc2)
2351 {
2352  nkf_char c1, c2;
2353  int i;
2354 
2355  if (wc2 < 0x80) {
2356  return NKF_ICONV_NOT_COMBINED;
2357  }else if ((wc2>>11) == 27) {
2358  /* unpaired surrogate */
2360  }else if (wc2 < 0xFFFF) {
2361  if (!x0213_combining_p(wc2))
2362  return NKF_ICONV_NOT_COMBINED;
2363  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2364  if (x0213_combining_table[i][1] == wc &&
2365  x0213_combining_table[i][2] == wc2) {
2366  c2 = x0213_combining_table[i][0] >> 8;
2367  c1 = x0213_combining_table[i][0] & 0x7f;
2368  (*oconv)(c2, c1);
2369  return 0;
2370  }
2371  }
2372  }else if (wc2 < 0x10FFFF) {
2373  return NKF_ICONV_NOT_COMBINED;
2374  } else {
2376  }
2377  return NKF_ICONV_NOT_COMBINED;
2378 }
2379 
2380 static nkf_char
2381 w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
2382 {
2383  nkf_char wc, wc2;
2384  wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2385  wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2386  if (wc2 < 0)
2387  return wc2;
2388  return unicode_iconv_combine(wc, wc2);
2389 }
2390 
2391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2393 static size_t
2394 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2395 {
2396  nkf_char wc;
2397 
2398  if (c1 == EOF) {
2399  (*oconv)(EOF, 0);
2400  return 0;
2401  }
2402 
2403  if (input_endian == ENDIAN_BIG) {
2404  if (0xD8 <= c1 && c1 <= 0xDB) {
2405  if (0xDC <= c3 && c3 <= 0xDF) {
2406  wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2407  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2408  } else {
2409  wc = c1 << 8 | c2;
2410  }
2411  } else {
2412  if (0xD8 <= c2 && c2 <= 0xDB) {
2413  if (0xDC <= c4 && c4 <= 0xDF) {
2414  wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2415  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2416  } else {
2417  wc = c2 << 8 | c1;
2418  }
2419  }
2420 
2421  return (*unicode_iconv)(wc, FALSE);
2422 }
2423 
2424 static size_t
2425 nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2426 {
2427  nkf_char wc, wc2;
2428 
2429  if (input_endian == ENDIAN_BIG) {
2430  if (0xD8 <= c3 && c3 <= 0xDB) {
2431  return NKF_ICONV_NOT_COMBINED;
2432  } else {
2433  wc = c1 << 8 | c2;
2434  wc2 = c3 << 8 | c4;
2435  }
2436  } else {
2437  if (0xD8 <= c2 && c2 <= 0xDB) {
2438  return NKF_ICONV_NOT_COMBINED;
2439  } else {
2440  wc = c2 << 8 | c1;
2441  wc2 = c4 << 8 | c3;
2442  }
2443  }
2444 
2445  return unicode_iconv_combine(wc, wc2);
2446 }
2447 
2448 static size_t
2449 nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
2450 {
2451  nkf_char wc;
2452  if (input_endian == ENDIAN_BIG)
2453  wc = c1 << 8 | c2;
2454  else
2455  wc = c2 << 8 | c1;
2456  return (*unicode_iconv)(wc, TRUE);
2457 }
2458 
2459 static nkf_char
2460 w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2461 {
2462  (*oconv)(c2, c1);
2463  return 16; /* different from w_iconv32 */
2464 }
2465 
2466 static nkf_char
2467 w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2468 {
2469  (*oconv)(c2, c1);
2470  return 32; /* different from w_iconv16 */
2471 }
2472 
2473 static nkf_char
2474 utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2475 {
2476  nkf_char wc;
2477 
2478  switch(input_endian){
2479  case ENDIAN_BIG:
2480  wc = c2 << 16 | c3 << 8 | c4;
2481  break;
2482  case ENDIAN_LITTLE:
2483  wc = c3 << 16 | c2 << 8 | c1;
2484  break;
2485  case ENDIAN_2143:
2486  wc = c1 << 16 | c4 << 8 | c3;
2487  break;
2488  case ENDIAN_3412:
2489  wc = c4 << 16 | c1 << 8 | c2;
2490  break;
2491  default:
2493  }
2494  return wc;
2495 }
2496 
2497 static size_t
2498 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2499 {
2500  nkf_char wc;
2501 
2502  if (c1 == EOF) {
2503  (*oconv)(EOF, 0);
2504  return 0;
2505  }
2506 
2507  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2508  if (wc < 0)
2509  return wc;
2510 
2511  return (*unicode_iconv)(wc, FALSE);
2512 }
2513 
2514 static nkf_char
2515 nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
2516 {
2517  nkf_char wc, wc2;
2518 
2519  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2520  if (wc < 0)
2521  return wc;
2522  wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2523  if (wc2 < 0)
2524  return wc2;
2525 
2526  return unicode_iconv_combine(wc, wc2);
2527 }
2528 
2529 static size_t
2530 nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2531 {
2532  nkf_char wc;
2533 
2534  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2535  return (*unicode_iconv)(wc, TRUE);
2536 }
2537 #endif
2538 
2539 #define output_ascii_escape_sequence(mode) do { \
2540  if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2541  (*o_putc)(ESC); \
2542  (*o_putc)('('); \
2543  (*o_putc)(ascii_intro); \
2544  output_mode = mode; \
2545  } \
2546  } while (0)
2547 
2548 static void
2549 output_escape_sequence(int mode)
2550 {
2551  if (output_mode == mode)
2552  return;
2553  switch(mode) {
2554  case ISO_8859_1:
2555  (*o_putc)(ESC);
2556  (*o_putc)('.');
2557  (*o_putc)('A');
2558  break;
2559  case JIS_X_0201_1976_K:
2560  (*o_putc)(ESC);
2561  (*o_putc)('(');
2562  (*o_putc)('I');
2563  break;
2564  case JIS_X_0208:
2565  (*o_putc)(ESC);
2566  (*o_putc)('$');
2567  (*o_putc)(kanji_intro);
2568  break;
2569  case JIS_X_0212:
2570  (*o_putc)(ESC);
2571  (*o_putc)('$');
2572  (*o_putc)('(');
2573  (*o_putc)('D');
2574  break;
2575  case JIS_X_0213_1:
2576  (*o_putc)(ESC);
2577  (*o_putc)('$');
2578  (*o_putc)('(');
2579  (*o_putc)('Q');
2580  break;
2581  case JIS_X_0213_2:
2582  (*o_putc)(ESC);
2583  (*o_putc)('$');
2584  (*o_putc)('(');
2585  (*o_putc)('P');
2586  break;
2587  }
2588  output_mode = mode;
2589 }
2590 
2591 static void
2592 j_oconv(nkf_char c2, nkf_char c1)
2593 {
2594 #ifdef NUMCHAR_OPTION
2595  if (c2 == 0 && nkf_char_unicode_p(c1)){
2596  w16e_conv(c1, &c2, &c1);
2597  if (c2 == 0 && nkf_char_unicode_p(c1)){
2598  c2 = c1 & VALUE_MASK;
2599  if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2600  /* CP5022x UDC */
2601  c1 &= 0xFFF;
2602  c2 = 0x7F + c1 / 94;
2603  c1 = 0x21 + c1 % 94;
2604  } else {
2605  if (encode_fallback) (*encode_fallback)(c1);
2606  return;
2607  }
2608  }
2609  }
2610 #endif
2611  if (c2 == 0) {
2613  (*o_putc)(c1);
2614  }
2615  else if (c2 == EOF) {
2617  (*o_putc)(EOF);
2618  }
2619  else if (c2 == ISO_8859_1) {
2621  (*o_putc)(c1|0x80);
2622  }
2623  else if (c2 == JIS_X_0201_1976_K) {
2624  output_escape_sequence(JIS_X_0201_1976_K);
2625  (*o_putc)(c1);
2626 #ifdef X0212_ENABLE
2627  } else if (is_eucg3(c2)){
2628  output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2629  (*o_putc)(c2 & 0x7f);
2630  (*o_putc)(c1);
2631 #endif
2632  } else {
2633  if(ms_ucs_map_f
2634  ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635  : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2636  output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2637  (*o_putc)(c2);
2638  (*o_putc)(c1);
2639  }
2640 }
2641 
2642 static void
2643 e_oconv(nkf_char c2, nkf_char c1)
2644 {
2645  if (c2 == 0 && nkf_char_unicode_p(c1)){
2646  w16e_conv(c1, &c2, &c1);
2647  if (c2 == 0 && nkf_char_unicode_p(c1)){
2648  c2 = c1 & VALUE_MASK;
2649  if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2650  /* eucJP-ms UDC */
2651  c1 &= 0xFFF;
2652  c2 = c1 / 94;
2653  c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654  c1 = 0x21 + c1 % 94;
2655  if (is_eucg3(c2)){
2656  (*o_putc)(0x8f);
2657  (*o_putc)((c2 & 0x7f) | 0x080);
2658  (*o_putc)(c1 | 0x080);
2659  }else{
2660  (*o_putc)((c2 & 0x7f) | 0x080);
2661  (*o_putc)(c1 | 0x080);
2662  }
2663  return;
2664  } else {
2665  if (encode_fallback) (*encode_fallback)(c1);
2666  return;
2667  }
2668  }
2669  }
2670 
2671  if (c2 == EOF) {
2672  (*o_putc)(EOF);
2673  } else if (c2 == 0) {
2674  output_mode = ASCII;
2675  (*o_putc)(c1);
2676  } else if (c2 == JIS_X_0201_1976_K) {
2677  output_mode = EUC_JP;
2678  (*o_putc)(SS2); (*o_putc)(c1|0x80);
2679  } else if (c2 == ISO_8859_1) {
2680  output_mode = ISO_8859_1;
2681  (*o_putc)(c1 | 0x080);
2682 #ifdef X0212_ENABLE
2683  } else if (is_eucg3(c2)){
2684  output_mode = EUC_JP;
2685 #ifdef SHIFTJIS_CP932
2686  if (!cp932inv_f){
2687  nkf_char s2, s1;
2688  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2689  s2e_conv(s2, s1, &c2, &c1);
2690  }
2691  }
2692 #endif
2693  if (c2 == 0) {
2694  output_mode = ASCII;
2695  (*o_putc)(c1);
2696  }else if (is_eucg3(c2)){
2697  if (x0212_f){
2698  (*o_putc)(0x8f);
2699  (*o_putc)((c2 & 0x7f) | 0x080);
2700  (*o_putc)(c1 | 0x080);
2701  }
2702  }else{
2703  (*o_putc)((c2 & 0x7f) | 0x080);
2704  (*o_putc)(c1 | 0x080);
2705  }
2706 #endif
2707  } else {
2708  if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2709  set_iconv(FALSE, 0);
2710  return; /* too late to rescue this char */
2711  }
2712  output_mode = EUC_JP;
2713  (*o_putc)(c2 | 0x080);
2714  (*o_putc)(c1 | 0x080);
2715  }
2716 }
2717 
2718 static void
2719 s_oconv(nkf_char c2, nkf_char c1)
2720 {
2721 #ifdef NUMCHAR_OPTION
2722  if (c2 == 0 && nkf_char_unicode_p(c1)){
2723  w16e_conv(c1, &c2, &c1);
2724  if (c2 == 0 && nkf_char_unicode_p(c1)){
2725  c2 = c1 & VALUE_MASK;
2726  if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2727  /* CP932 UDC */
2728  c1 &= 0xFFF;
2729  c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2730  c1 = c1 % 188;
2731  c1 += 0x40 + (c1 > 0x3e);
2732  (*o_putc)(c2);
2733  (*o_putc)(c1);
2734  return;
2735  } else {
2736  if(encode_fallback)(*encode_fallback)(c1);
2737  return;
2738  }
2739  }
2740  }
2741 #endif
2742  if (c2 == EOF) {
2743  (*o_putc)(EOF);
2744  return;
2745  } else if (c2 == 0) {
2746  output_mode = ASCII;
2747  (*o_putc)(c1);
2748  } else if (c2 == JIS_X_0201_1976_K) {
2749  output_mode = SHIFT_JIS;
2750  (*o_putc)(c1|0x80);
2751  } else if (c2 == ISO_8859_1) {
2752  output_mode = ISO_8859_1;
2753  (*o_putc)(c1 | 0x080);
2754 #ifdef X0212_ENABLE
2755  } else if (is_eucg3(c2)){
2756  output_mode = SHIFT_JIS;
2757  if (e2s_conv(c2, c1, &c2, &c1) == 0){
2758  (*o_putc)(c2);
2759  (*o_putc)(c1);
2760  }
2761 #endif
2762  } else {
2763  if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2764  set_iconv(FALSE, 0);
2765  return; /* too late to rescue this char */
2766  }
2767  output_mode = SHIFT_JIS;
2768  e2s_conv(c2, c1, &c2, &c1);
2769 
2770 #ifdef SHIFTJIS_CP932
2771  if (cp932inv_f
2772  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2773  nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2774  if (c){
2775  c2 = c >> 8;
2776  c1 = c & 0xff;
2777  }
2778  }
2779 #endif /* SHIFTJIS_CP932 */
2780 
2781  (*o_putc)(c2);
2782  if (prefix_table[(unsigned char)c1]){
2783  (*o_putc)(prefix_table[(unsigned char)c1]);
2784  }
2785  (*o_putc)(c1);
2786  }
2787 }
2788 
2789 #ifdef UTF8_OUTPUT_ENABLE
2790 #define OUTPUT_UTF8(val) do { \
2791  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2792  (*o_putc)(c1); \
2793  if (c2) (*o_putc)(c2); \
2794  if (c3) (*o_putc)(c3); \
2795  if (c4) (*o_putc)(c4); \
2796  } while (0)
2797 
2798 static void
2799 w_oconv(nkf_char c2, nkf_char c1)
2800 {
2801  nkf_char c3, c4;
2802  nkf_char val, val2;
2803 
2804  if (output_bom_f) {
2805  output_bom_f = FALSE;
2806  (*o_putc)('\357');
2807  (*o_putc)('\273');
2808  (*o_putc)('\277');
2809  }
2810 
2811  if (c2 == EOF) {
2812  (*o_putc)(EOF);
2813  return;
2814  }
2815 
2816  if (c2 == 0 && nkf_char_unicode_p(c1)){
2817  val = c1 & VALUE_MASK;
2818  OUTPUT_UTF8(val);
2819  return;
2820  }
2821 
2822  if (c2 == 0) {
2823  (*o_putc)(c1);
2824  } else {
2825  val = e2w_conv(c2, c1);
2826  if (val){
2827  val2 = e2w_combining(val, c2, c1);
2828  if (val2)
2829  OUTPUT_UTF8(val2);
2830  OUTPUT_UTF8(val);
2831  }
2832  }
2833 }
2834 
2835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
2836  if (output_endian == ENDIAN_LITTLE){ \
2837  (*o_putc)(c1); \
2838  (*o_putc)(c2); \
2839  }else{ \
2840  (*o_putc)(c2); \
2841  (*o_putc)(c1); \
2842  } \
2843  } while (0)
2844 
2845 #define OUTPUT_UTF16(val) do { \
2846  if (nkf_char_unicode_bmp_p(val)) { \
2847  c2 = (val >> 8) & 0xff; \
2848  c1 = val & 0xff; \
2849  OUTPUT_UTF16_BYTES(c1, c2); \
2850  } else { \
2851  val &= VALUE_MASK; \
2852  if (val <= UNICODE_MAX) { \
2853  c2 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ \
2854  c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
2855  OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2856  OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2857  } \
2858  } \
2859  } while (0)
2860 
2861 static void
2862 w_oconv16(nkf_char c2, nkf_char c1)
2863 {
2864  if (output_bom_f) {
2865  output_bom_f = FALSE;
2866  OUTPUT_UTF16_BYTES(0xFF, 0xFE);
2867  }
2868 
2869  if (c2 == EOF) {
2870  (*o_putc)(EOF);
2871  return;
2872  }
2873 
2874  if (c2 == 0 && nkf_char_unicode_p(c1)) {
2875  OUTPUT_UTF16(c1);
2876  } else if (c2) {
2877  nkf_char val, val2;
2878  val = e2w_conv(c2, c1);
2879  if (!val) return;
2880  val2 = e2w_combining(val, c2, c1);
2881  if (val2)
2882  OUTPUT_UTF16(val2);
2883  OUTPUT_UTF16(val);
2884  } else {
2885  OUTPUT_UTF16_BYTES(c1, c2);
2886  }
2887 }
2888 
2889 #define OUTPUT_UTF32(c) do { \
2890  if (output_endian == ENDIAN_LITTLE){ \
2891  (*o_putc)( (c) & 0xFF); \
2892  (*o_putc)(((c) >> 8) & 0xFF); \
2893  (*o_putc)(((c) >> 16) & 0xFF); \
2894  (*o_putc)(0); \
2895  }else{ \
2896  (*o_putc)(0); \
2897  (*o_putc)(((c) >> 16) & 0xFF); \
2898  (*o_putc)(((c) >> 8) & 0xFF); \
2899  (*o_putc)( (c) & 0xFF); \
2900  } \
2901  } while (0)
2902 
2903 static void
2904 w_oconv32(nkf_char c2, nkf_char c1)
2905 {
2906  if (output_bom_f) {
2907  output_bom_f = FALSE;
2908  if (output_endian == ENDIAN_LITTLE){
2909  (*o_putc)(0xFF);
2910  (*o_putc)(0xFE);
2911  (*o_putc)(0);
2912  (*o_putc)(0);
2913  }else{
2914  (*o_putc)(0);
2915  (*o_putc)(0);
2916  (*o_putc)(0xFE);
2917  (*o_putc)(0xFF);
2918  }
2919  }
2920 
2921  if (c2 == EOF) {
2922  (*o_putc)(EOF);
2923  return;
2924  }
2925 
2926  if (c2 == ISO_8859_1) {
2927  c1 |= 0x80;
2928  } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2929  c1 &= VALUE_MASK;
2930  } else if (c2) {
2931  nkf_char val, val2;
2932  val = e2w_conv(c2, c1);
2933  if (!val) return;
2934  val2 = e2w_combining(val, c2, c1);
2935  if (val2)
2936  OUTPUT_UTF32(val2);
2937  c1 = val;
2938  }
2939  OUTPUT_UTF32(c1);
2940 }
2941 #endif
2942 
2943 #define SCORE_L2 (1) /* Kanji Level 2 */
2944 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2945 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2946 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2947 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2948 #define SCORE_X0213 (SCORE_X0212 << 1) /* JIS X 0213 */
2949 #define SCORE_NO_EXIST (SCORE_X0213 << 1) /* Undefined Characters */
2950 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2951 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2952 
2953 #define SCORE_INIT (SCORE_iMIME)
2954 
2955 static const nkf_char score_table_A0[] = {
2956  0, 0, 0, 0,
2957  0, 0, 0, 0,
2960 };
2961 
2962 static const nkf_char score_table_F0[] = {
2967 };
2968 
2969 static const nkf_char score_table_8FA0[] = {
2974 };
2975 
2976 static const nkf_char score_table_8FE0[] = {
2981 };
2982 
2983 static const nkf_char score_table_8FF0[] = {
2988 };
2989 
2990 static void
2991 set_code_score(struct input_code *ptr, nkf_char score)
2992 {
2993  if (ptr){
2994  ptr->score |= score;
2995  }
2996 }
2997 
2998 static void
2999 clr_code_score(struct input_code *ptr, nkf_char score)
3000 {
3001  if (ptr){
3002  ptr->score &= ~score;
3003  }
3004 }
3005 
3006 static void
3007 code_score(struct input_code *ptr)
3008 {
3009  nkf_char c2 = ptr->buf[0];
3010  nkf_char c1 = ptr->buf[1];
3011  if (c2 < 0){
3012  set_code_score(ptr, SCORE_ERROR);
3013  }else if (c2 == SS2){
3014  set_code_score(ptr, SCORE_KANA);
3015  }else if (c2 == 0x8f){
3016  if ((c1 & 0x70) == 0x20){
3017  set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3018  }else if ((c1 & 0x70) == 0x60){
3019  set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3020  }else if ((c1 & 0x70) == 0x70){
3021  set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3022  }else{
3023  set_code_score(ptr, SCORE_X0212);
3024  }
3025 #ifdef UTF8_OUTPUT_ENABLE
3026  }else if (!e2w_conv(c2, c1)){
3027  set_code_score(ptr, SCORE_NO_EXIST);
3028 #endif
3029  }else if ((c2 & 0x70) == 0x20){
3030  set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3031  }else if ((c2 & 0x70) == 0x70){
3032  set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3033  }else if ((c2 & 0x70) >= 0x50){
3034  set_code_score(ptr, SCORE_L2);
3035  }
3036 }
3037 
3038 static void
3039 status_disable(struct input_code *ptr)
3040 {
3041  ptr->stat = -1;
3042  ptr->buf[0] = -1;
3043  code_score(ptr);
3044  if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
3045 }
3046 
3047 static void
3048 status_push_ch(struct input_code *ptr, nkf_char c)
3049 {
3050  ptr->buf[ptr->index++] = c;
3051 }
3052 
3053 static void
3054 status_clear(struct input_code *ptr)
3055 {
3056  ptr->stat = 0;
3057  ptr->index = 0;
3058 }
3059 
3060 static void
3061 status_reset(struct input_code *ptr)
3062 {
3063  status_clear(ptr);
3064  ptr->score = SCORE_INIT;
3065 }
3066 
3067 static void
3068 status_reinit(struct input_code *ptr)
3069 {
3070  status_reset(ptr);
3071  ptr->_file_stat = 0;
3072 }
3073 
3074 static void
3075 status_check(struct input_code *ptr, nkf_char c)
3076 {
3077  if (c <= DEL && estab_f){
3078  status_reset(ptr);
3079  }
3080 }
3081 
3082 static void
3083 s_status(struct input_code *ptr, nkf_char c)
3084 {
3085  switch(ptr->stat){
3086  case -1:
3087  status_check(ptr, c);
3088  break;
3089  case 0:
3090  if (c <= DEL){
3091  break;
3092  }else if (nkf_char_unicode_p(c)){
3093  break;
3094  }else if (0xa1 <= c && c <= 0xdf){
3095  status_push_ch(ptr, SS2);
3096  status_push_ch(ptr, c);
3097  code_score(ptr);
3098  status_clear(ptr);
3099  }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3100  ptr->stat = 1;
3101  status_push_ch(ptr, c);
3102  }else if (0xed <= c && c <= 0xee){
3103  ptr->stat = 3;
3104  status_push_ch(ptr, c);
3105 #ifdef SHIFTJIS_CP932
3106  }else if (is_ibmext_in_sjis(c)){
3107  ptr->stat = 2;
3108  status_push_ch(ptr, c);
3109 #endif /* SHIFTJIS_CP932 */
3110 #ifdef X0212_ENABLE
3111  }else if (0xf0 <= c && c <= 0xfc){
3112  ptr->stat = 1;
3113  status_push_ch(ptr, c);
3114 #endif /* X0212_ENABLE */
3115  }else{
3116  status_disable(ptr);
3117  }
3118  break;
3119  case 1:
3120  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3121  status_push_ch(ptr, c);
3122  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3123  code_score(ptr);
3124  status_clear(ptr);
3125  }else{
3126  status_disable(ptr);
3127  }
3128  break;
3129  case 2:
3130 #ifdef SHIFTJIS_CP932
3131  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3132  status_push_ch(ptr, c);
3133  if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
3134  set_code_score(ptr, SCORE_CP932);
3135  status_clear(ptr);
3136  break;
3137  }
3138  }
3139 #endif /* SHIFTJIS_CP932 */
3140  status_disable(ptr);
3141  break;
3142  case 3:
3143  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3144  status_push_ch(ptr, c);
3145  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3146  set_code_score(ptr, SCORE_CP932);
3147  status_clear(ptr);
3148  }else{
3149  status_disable(ptr);
3150  }
3151  break;
3152  }
3153 }
3154 
3155 static void
3156 e_status(struct input_code *ptr, nkf_char c)
3157 {
3158  switch (ptr->stat){
3159  case -1:
3160  status_check(ptr, c);
3161  break;
3162  case 0:
3163  if (c <= DEL){
3164  break;
3165  }else if (nkf_char_unicode_p(c)){
3166  break;
3167  }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
3168  ptr->stat = 1;
3169  status_push_ch(ptr, c);
3170 #ifdef X0212_ENABLE
3171  }else if (0x8f == c){
3172  ptr->stat = 2;
3173  status_push_ch(ptr, c);
3174 #endif /* X0212_ENABLE */
3175  }else{
3176  status_disable(ptr);
3177  }
3178  break;
3179  case 1:
3180  if (0xa1 <= c && c <= 0xfe){
3181  status_push_ch(ptr, c);
3182  code_score(ptr);
3183  status_clear(ptr);
3184  }else{
3185  status_disable(ptr);
3186  }
3187  break;
3188 #ifdef X0212_ENABLE
3189  case 2:
3190  if (0xa1 <= c && c <= 0xfe){
3191  ptr->stat = 1;
3192  status_push_ch(ptr, c);
3193  }else{
3194  status_disable(ptr);
3195  }
3196 #endif /* X0212_ENABLE */
3197  }
3198 }
3199 
3200 #ifdef UTF8_INPUT_ENABLE
3201 static void
3202 w_status(struct input_code *ptr, nkf_char c)
3203 {
3204  switch (ptr->stat){
3205  case -1:
3206  status_check(ptr, c);
3207  break;
3208  case 0:
3209  if (c <= DEL){
3210  break;
3211  }else if (nkf_char_unicode_p(c)){
3212  break;
3213  }else if (0xc0 <= c && c <= 0xdf){
3214  ptr->stat = 1;
3215  status_push_ch(ptr, c);
3216  }else if (0xe0 <= c && c <= 0xef){
3217  ptr->stat = 2;
3218  status_push_ch(ptr, c);
3219  }else if (0xf0 <= c && c <= 0xf4){
3220  ptr->stat = 3;
3221  status_push_ch(ptr, c);
3222  }else{
3223  status_disable(ptr);
3224  }
3225  break;
3226  case 1:
3227  case 2:
3228  if (0x80 <= c && c <= 0xbf){
3229  status_push_ch(ptr, c);
3230  if (ptr->index > ptr->stat){
3231  int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
3232  && ptr->buf[2] == 0xbf);
3233  w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
3234  &ptr->buf[0], &ptr->buf[1]);
3235  if (!bom){
3236  code_score(ptr);
3237  }
3238  status_clear(ptr);
3239  }
3240  }else{
3241  status_disable(ptr);
3242  }
3243  break;
3244  case 3:
3245  if (0x80 <= c && c <= 0xbf){
3246  if (ptr->index < ptr->stat){
3247  status_push_ch(ptr, c);
3248  } else {
3249  status_clear(ptr);
3250  }
3251  }else{
3252  status_disable(ptr);
3253  }
3254  break;
3255  }
3256 }
3257 #endif
3258 
3259 static void
3260 code_status(nkf_char c)
3261 {
3262  int action_flag = 1;
3263  struct input_code *result = 0;
3264  struct input_code *p = input_code_list;
3265  while (p->name){
3266  if (!p->status_func) {
3267  ++p;
3268  continue;
3269  }
3270  if (!p->status_func)
3271  continue;
3272  (p->status_func)(p, c);
3273  if (p->stat > 0){
3274  action_flag = 0;
3275  }else if(p->stat == 0){
3276  if (result){
3277  action_flag = 0;
3278  }else{
3279  result = p;
3280  }
3281  }
3282  ++p;
3283  }
3284 
3285  if (action_flag){
3286  if (result && !estab_f){
3287  set_iconv(TRUE, result->iconv_func);
3288  }else if (c <= DEL){
3289  struct input_code *ptr = input_code_list;
3290  while (ptr->name){
3291  status_reset(ptr);
3292  ++ptr;
3293  }
3294  }
3295  }
3296 }
3297 
3298 typedef struct {
3304 } nkf_state_t;
3305 
3306 static nkf_state_t *nkf_state = NULL;
3307 
3308 #define STD_GC_BUFSIZE (256)
3309 
3310 static void
3311 nkf_state_init(void)
3312 {
3313  if (nkf_state) {
3314  nkf_buf_clear(nkf_state->std_gc_buf);
3315  nkf_buf_clear(nkf_state->broken_buf);
3316  nkf_buf_clear(nkf_state->nfc_buf);
3317  }
3318  else {
3319  nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3320  nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3321  nkf_state->broken_buf = nkf_buf_new(3);
3322  nkf_state->nfc_buf = nkf_buf_new(9);
3323  }
3324  nkf_state->broken_state = 0;
3325  nkf_state->mimeout_state = 0;
3326 }
3327 
3328 #ifndef WIN32DLL
3329 static nkf_char
3330 std_getc(FILE *f)
3331 {
3332  if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3333  return nkf_buf_pop(nkf_state->std_gc_buf);
3334  }
3335  return getc(f);
3336 }
3337 #endif /*WIN32DLL*/
3338 
3339 static nkf_char
3340 std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3341 {
3342  nkf_buf_push(nkf_state->std_gc_buf, c);
3343  return c;
3344 }
3345 
3346 #ifndef WIN32DLL
3347 static void
3348 std_putc(nkf_char c)
3349 {
3350  if(c!=EOF)
3351  putchar(c);
3352 }
3353 #endif /*WIN32DLL*/
3354 
3355 static nkf_char hold_buf[HOLD_SIZE*2];
3356 static int hold_count = 0;
3357 static nkf_char
3358 push_hold_buf(nkf_char c2)
3359 {
3360  if (hold_count >= HOLD_SIZE*2)
3361  return (EOF);
3362  hold_buf[hold_count++] = c2;
3363  return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3364 }
3365 
3366 static int
3367 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3368 {
3369  int ret;
3370  int hold_index;
3371  int fromhold_count;
3372  nkf_char c3, c4;
3373 
3378  hold_count = 0;
3379  push_hold_buf(c1);
3380  push_hold_buf(c2);
3381 
3382  while ((c2 = (*i_getc)(f)) != EOF) {
3383  if (c2 == ESC){
3384  (*i_ungetc)(c2,f);
3385  break;
3386  }
3387  code_status(c2);
3388  if (push_hold_buf(c2) == EOF || estab_f) {
3389  break;
3390  }
3391  }
3392 
3393  if (!estab_f) {
3394  struct input_code *p = input_code_list;
3395  struct input_code *result = p;
3396  if (c2 == EOF) {
3397  code_status(c2);
3398  }
3399  while (p->name) {
3400  if (p->status_func && p->score < result->score) {
3401  result = p;
3402  }
3403  p++;
3404  }
3405  set_iconv(TRUE, result->iconv_func);
3406  }
3407 
3408 
3418  ret = c2;
3419  hold_index = 0;
3420  while (hold_index < hold_count){
3421  c1 = hold_buf[hold_index++];
3422  if (nkf_char_unicode_p(c1)) {
3423  (*oconv)(0, c1);
3424  continue;
3425  }
3426  else if (c1 <= DEL){
3427  (*iconv)(0, c1, 0);
3428  continue;
3429  }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3430  (*iconv)(JIS_X_0201_1976_K, c1, 0);
3431  continue;
3432  }
3433  fromhold_count = 1;
3434  if (hold_index < hold_count){
3435  c2 = hold_buf[hold_index++];
3436  fromhold_count++;
3437  }else{
3438  c2 = (*i_getc)(f);
3439  if (c2 == EOF){
3440  c4 = EOF;
3441  break;
3442  }
3443  code_status(c2);
3444  }
3445  c3 = 0;
3446  switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3447  case -2:
3448  /* 4 bytes UTF-8 */
3449  if (hold_index < hold_count){
3450  c3 = hold_buf[hold_index++];
3451  } else if ((c3 = (*i_getc)(f)) == EOF) {
3452  ret = EOF;
3453  break;
3454  }
3455  code_status(c3);
3456  if (hold_index < hold_count){
3457  c4 = hold_buf[hold_index++];
3458  } else if ((c4 = (*i_getc)(f)) == EOF) {
3459  c3 = ret = EOF;
3460  break;
3461  }
3462  code_status(c4);
3463  (*iconv)(c1, c2, (c3<<8)|c4);
3464  break;
3465  case -3:
3466  /* 4 bytes UTF-8 (check combining character) */
3467  if (hold_index < hold_count){
3468  c3 = hold_buf[hold_index++];
3469  fromhold_count++;
3470  } else if ((c3 = (*i_getc)(f)) == EOF) {
3471  w_iconv_nocombine(c1, c2, 0);
3472  break;
3473  }
3474  if (hold_index < hold_count){
3475  c4 = hold_buf[hold_index++];
3476  fromhold_count++;
3477  } else if ((c4 = (*i_getc)(f)) == EOF) {
3478  w_iconv_nocombine(c1, c2, 0);
3479  if (fromhold_count <= 2)
3480  (*i_ungetc)(c3,f);
3481  else
3482  hold_index--;
3483  continue;
3484  }
3485  if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3486  w_iconv_nocombine(c1, c2, 0);
3487  if (fromhold_count <= 2) {
3488  (*i_ungetc)(c4,f);
3489  (*i_ungetc)(c3,f);
3490  } else if (fromhold_count == 3) {
3491  (*i_ungetc)(c4,f);
3492  hold_index--;
3493  } else {
3494  hold_index -= 2;
3495  }
3496  }
3497  break;
3498  case -1:
3499  /* 3 bytes EUC or UTF-8 */
3500  if (hold_index < hold_count){
3501  c3 = hold_buf[hold_index++];
3502  fromhold_count++;
3503  } else if ((c3 = (*i_getc)(f)) == EOF) {
3504  ret = EOF;
3505  break;
3506  } else {
3507  code_status(c3);
3508  }
3509  if ((*iconv)(c1, c2, c3) == -3) {
3510  /* 6 bytes UTF-8 (check combining character) */
3511  nkf_char c5, c6;
3512  if (hold_index < hold_count){
3513  c4 = hold_buf[hold_index++];
3514  fromhold_count++;
3515  } else if ((c4 = (*i_getc)(f)) == EOF) {
3516  w_iconv_nocombine(c1, c2, c3);
3517  continue;
3518  }
3519  if (hold_index < hold_count){
3520  c5 = hold_buf[hold_index++];
3521  fromhold_count++;
3522  } else if ((c5 = (*i_getc)(f)) == EOF) {
3523  w_iconv_nocombine(c1, c2, c3);
3524  if (fromhold_count == 4)
3525  hold_index--;
3526  else
3527  (*i_ungetc)(c4,f);
3528  continue;
3529  }
3530  if (hold_index < hold_count){
3531  c6 = hold_buf[hold_index++];
3532  fromhold_count++;
3533  } else if ((c6 = (*i_getc)(f)) == EOF) {
3534  w_iconv_nocombine(c1, c2, c3);
3535  if (fromhold_count == 5) {
3536  hold_index -= 2;
3537  } else if (fromhold_count == 4) {
3538  hold_index--;
3539  (*i_ungetc)(c5,f);
3540  } else {
3541  (*i_ungetc)(c5,f);
3542  (*i_ungetc)(c4,f);
3543  }
3544  continue;
3545  }
3546  if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3547  w_iconv_nocombine(c1, c2, c3);
3548  if (fromhold_count == 6) {
3549  hold_index -= 3;
3550  } else if (fromhold_count == 5) {
3551  hold_index -= 2;
3552  (*i_ungetc)(c6,f);
3553  } else if (fromhold_count == 4) {
3554  hold_index--;
3555  (*i_ungetc)(c6,f);
3556  (*i_ungetc)(c5,f);
3557  } else {
3558  (*i_ungetc)(c6,f);
3559  (*i_ungetc)(c5,f);
3560  (*i_ungetc)(c4,f);
3561  }
3562  }
3563  }
3564  break;
3565  }
3566  if (c3 == EOF) break;
3567  }
3568  return ret;
3569 }
3570 
3571 /*
3572  * Check and Ignore BOM
3573  */
3574 static void
3575 check_bom(FILE *f)
3576 {
3577  int c2;
3578  input_bom_f = FALSE;
3579  switch(c2 = (*i_getc)(f)){
3580  case 0x00:
3581  if((c2 = (*i_getc)(f)) == 0x00){
3582  if((c2 = (*i_getc)(f)) == 0xFE){
3583  if((c2 = (*i_getc)(f)) == 0xFF){
3584  if(!input_encoding){
3585  set_iconv(TRUE, w_iconv32);
3586  }
3587  if (iconv == w_iconv32) {
3588  input_bom_f = TRUE;
3589  input_endian = ENDIAN_BIG;
3590  return;
3591  }
3592  (*i_ungetc)(0xFF,f);
3593  }else (*i_ungetc)(c2,f);
3594  (*i_ungetc)(0xFE,f);
3595  }else if(c2 == 0xFF){
3596  if((c2 = (*i_getc)(f)) == 0xFE){
3597  if(!input_encoding){
3598  set_iconv(TRUE, w_iconv32);
3599  }
3600  if (iconv == w_iconv32) {
3601  input_endian = ENDIAN_2143;
3602  return;
3603  }
3604  (*i_ungetc)(0xFF,f);
3605  }else (*i_ungetc)(c2,f);
3606  (*i_ungetc)(0xFF,f);
3607  }else (*i_ungetc)(c2,f);
3608  (*i_ungetc)(0x00,f);
3609  }else (*i_ungetc)(c2,f);
3610  (*i_ungetc)(0x00,f);
3611  break;
3612  case 0xEF:
3613  if((c2 = (*i_getc)(f)) == 0xBB){
3614  if((c2 = (*i_getc)(f)) == 0xBF){
3615  if(!input_encoding){
3616  set_iconv(TRUE, w_iconv);
3617  }
3618  if (iconv == w_iconv) {
3619  input_bom_f = TRUE;
3620  return;
3621  }
3622  (*i_ungetc)(0xBF,f);
3623  }else (*i_ungetc)(c2,f);
3624  (*i_ungetc)(0xBB,f);
3625  }else (*i_ungetc)(c2,f);
3626  (*i_ungetc)(0xEF,f);
3627  break;
3628  case 0xFE:
3629  if((c2 = (*i_getc)(f)) == 0xFF){
3630  if((c2 = (*i_getc)(f)) == 0x00){
3631  if((c2 = (*i_getc)(f)) == 0x00){
3632  if(!input_encoding){
3633  set_iconv(TRUE, w_iconv32);
3634  }
3635  if (iconv == w_iconv32) {
3636  input_endian = ENDIAN_3412;
3637  return;
3638  }
3639  (*i_ungetc)(0x00,f);
3640  }else (*i_ungetc)(c2,f);
3641  (*i_ungetc)(0x00,f);
3642  }else (*i_ungetc)(c2,f);
3643  if(!input_encoding){
3644  set_iconv(TRUE, w_iconv16);
3645  }
3646  if (iconv == w_iconv16) {
3647  input_endian = ENDIAN_BIG;
3648  input_bom_f = TRUE;
3649  return;
3650  }
3651  (*i_ungetc)(0xFF,f);
3652  }else (*i_ungetc)(c2,f);
3653  (*i_ungetc)(0xFE,f);
3654  break;
3655  case 0xFF:
3656  if((c2 = (*i_getc)(f)) == 0xFE){
3657  if((c2 = (*i_getc)(f)) == 0x00){
3658  if((c2 = (*i_getc)(f)) == 0x00){
3659  if(!input_encoding){
3660  set_iconv(TRUE, w_iconv32);
3661  }
3662  if (iconv == w_iconv32) {
3663  input_endian = ENDIAN_LITTLE;
3664  input_bom_f = TRUE;
3665  return;
3666  }
3667  (*i_ungetc)(0x00,f);
3668  }else (*i_ungetc)(c2,f);
3669  (*i_ungetc)(0x00,f);
3670  }else (*i_ungetc)(c2,f);
3671  if(!input_encoding){
3672  set_iconv(TRUE, w_iconv16);
3673  }
3674  if (iconv == w_iconv16) {
3675  input_endian = ENDIAN_LITTLE;
3676  input_bom_f = TRUE;
3677  return;
3678  }
3679  (*i_ungetc)(0xFE,f);
3680  }else (*i_ungetc)(c2,f);
3681  (*i_ungetc)(0xFF,f);
3682  break;
3683  default:
3684  (*i_ungetc)(c2,f);
3685  break;
3686  }
3687 }
3688 
3689 static nkf_char
3690 broken_getc(FILE *f)
3691 {
3692  nkf_char c, c1;
3693 
3694  if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3695  return nkf_buf_pop(nkf_state->broken_buf);
3696  }
3697  c = (*i_bgetc)(f);
3698  if (c=='$' && nkf_state->broken_state != ESC
3699  && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3700  c1= (*i_bgetc)(f);
3701  nkf_state->broken_state = 0;
3702  if (c1=='@'|| c1=='B') {
3703  nkf_buf_push(nkf_state->broken_buf, c1);
3704  nkf_buf_push(nkf_state->broken_buf, c);
3705  return ESC;
3706  } else {
3707  (*i_bungetc)(c1,f);
3708  return c;
3709  }
3710  } else if (c=='(' && nkf_state->broken_state != ESC
3711  && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3712  c1= (*i_bgetc)(f);
3713  nkf_state->broken_state = 0;
3714  if (c1=='J'|| c1=='B') {
3715  nkf_buf_push(nkf_state->broken_buf, c1);
3716  nkf_buf_push(nkf_state->broken_buf, c);
3717  return ESC;
3718  } else {
3719  (*i_bungetc)(c1,f);
3720  return c;
3721  }
3722  } else {
3723  nkf_state->broken_state = c;
3724  return c;
3725  }
3726 }
3727 
3728 static nkf_char
3729 broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3730 {
3731  if (nkf_buf_length(nkf_state->broken_buf) < 2)
3732  nkf_buf_push(nkf_state->broken_buf, c);
3733  return c;
3734 }
3735 
3736 static void
3737 eol_conv(nkf_char c2, nkf_char c1)
3738 {
3739  if (guess_f && input_eol != EOF) {
3740  if (c2 == 0 && c1 == LF) {
3741  if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3742  else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3743  } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3744  else if (!prev_cr);
3745  else if (!input_eol) input_eol = CR;
3746  else if (input_eol != CR) input_eol = EOF;
3747  }
3748  if (prev_cr || (c2 == 0 && c1 == LF)) {
3749  prev_cr = 0;
3750  if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3751  if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3752  }
3753  if (c2 == 0 && c1 == CR) prev_cr = CR;
3754  else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3755 }
3756 
3757 static void
3758 put_newline(void (*func)(nkf_char))
3759 {
3760  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3761  case CRLF:
3762  (*func)(0x0D);
3763  (*func)(0x0A);
3764  break;
3765  case CR:
3766  (*func)(0x0D);
3767  break;
3768  case LF:
3769  (*func)(0x0A);
3770  break;
3771  }
3772 }
3773 
3774 static void
3775 oconv_newline(void (*func)(nkf_char, nkf_char))
3776 {
3777  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3778  case CRLF:
3779  (*func)(0, 0x0D);
3780  (*func)(0, 0x0A);
3781  break;
3782  case CR:
3783  (*func)(0, 0x0D);
3784  break;
3785  case LF:
3786  (*func)(0, 0x0A);
3787  break;
3788  }
3789 }
3790 
3791 /*
3792  Return value of fold_conv()
3793 
3794  LF add newline and output char
3795  CR add newline and output nothing
3796  SP space
3797  0 skip
3798  1 (or else) normal output
3799 
3800  fold state in prev (previous character)
3801 
3802  >0x80 Japanese (X0208/X0201)
3803  <0x80 ASCII
3804  LF new line
3805  SP space
3806 
3807  This fold algorthm does not preserve heading space in a line.
3808  This is the main difference from fmt.
3809  */
3810 
3811 #define char_size(c2,c1) (c2?2:1)
3812 
3813 static void
3814 fold_conv(nkf_char c2, nkf_char c1)
3815 {
3816  nkf_char prev0;
3817  nkf_char fold_state;
3818 
3819  if (c1== CR && !fold_preserve_f) {
3820  fold_state=0; /* ignore cr */
3821  }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3822  f_prev = LF;
3823  fold_state=0; /* ignore cr */
3824  } else if (c1== BS) {
3825  if (f_line>0) f_line--;
3826  fold_state = 1;
3827  } else if (c2==EOF && f_line != 0) { /* close open last line */
3828  fold_state = LF;
3829  } else if ((c1==LF && !fold_preserve_f)
3830  || ((c1==CR||(c1==LF&&f_prev!=CR))
3831  && fold_preserve_f)) {
3832  /* new line */
3833  if (fold_preserve_f) {
3834  f_prev = c1;
3835  f_line = 0;
3836  fold_state = CR;
3837  } else if ((f_prev == c1)
3838  || (f_prev == LF)
3839  ) { /* duplicate newline */
3840  if (f_line) {
3841  f_line = 0;
3842  fold_state = LF; /* output two newline */
3843  } else {
3844  f_line = 0;
3845  fold_state = 1;
3846  }
3847  } else {
3848  if (f_prev&0x80) { /* Japanese? */
3849  f_prev = c1;
3850  fold_state = 0; /* ignore given single newline */
3851  } else if (f_prev==SP) {
3852  fold_state = 0;
3853  } else {
3854  f_prev = c1;
3855  if (++f_line<=fold_len)
3856  fold_state = SP;
3857  else {
3858  f_line = 0;
3859  fold_state = CR; /* fold and output nothing */
3860  }
3861  }
3862  }
3863  } else if (c1=='\f') {
3864  f_prev = LF;
3865  f_line = 0;
3866  fold_state = LF; /* output newline and clear */
3867  } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3868  /* X0208 kankaku or ascii space */
3869  if (f_prev == SP) {
3870  fold_state = 0; /* remove duplicate spaces */
3871  } else {
3872  f_prev = SP;
3873  if (++f_line<=fold_len)
3874  fold_state = SP; /* output ASCII space only */
3875  else {
3876  f_prev = SP; f_line = 0;
3877  fold_state = CR; /* fold and output nothing */
3878  }
3879  }
3880  } else {
3881  prev0 = f_prev; /* we still need this one... , but almost done */
3882  f_prev = c1;
3883  if (c2 || c2 == JIS_X_0201_1976_K)
3884  f_prev |= 0x80; /* this is Japanese */
3885  f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3886  if (f_line<=fold_len) { /* normal case */
3887  fold_state = 1;
3888  } else {
3889  if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3890  f_line = char_size(c2,c1);
3891  fold_state = LF; /* We can't wait, do fold now */
3892  } else if (c2 == JIS_X_0201_1976_K) {
3893  /* simple kinsoku rules return 1 means no folding */
3894  if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3895  else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3896  else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3897  else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3898  else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3899  else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3900  else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3901  f_line = 1;
3902  fold_state = LF;/* add one new f_line before this character */
3903  } else {
3904  f_line = 1;
3905  fold_state = LF;/* add one new f_line before this character */
3906  }
3907  } else if (c2==0) {
3908  /* kinsoku point in ASCII */
3909  if ( c1==')'|| /* { [ ( */
3910  c1==']'||
3911  c1=='}'||
3912  c1=='.'||
3913  c1==','||
3914  c1=='!'||
3915  c1=='?'||
3916  c1=='/'||
3917  c1==':'||
3918  c1==';') {
3919  fold_state = 1;
3920  /* just after special */
3921  } else if (!is_alnum(prev0)) {
3922  f_line = char_size(c2,c1);
3923  fold_state = LF;
3924  } else if ((prev0==SP) || /* ignored new f_line */
3925  (prev0==LF)|| /* ignored new f_line */
3926  (prev0&0x80)) { /* X0208 - ASCII */
3927  f_line = char_size(c2,c1);
3928  fold_state = LF;/* add one new f_line before this character */
3929  } else {
3930  fold_state = 1; /* default no fold in ASCII */
3931  }
3932  } else {
3933  if (c2=='!') {
3934  if (c1=='"') fold_state = 1; /* $B!"(B */
3935  else if (c1=='#') fold_state = 1; /* $B!#(B */
3936  else if (c1=='W') fold_state = 1; /* $B!W(B */
3937  else if (c1=='K') fold_state = 1; /* $B!K(B */
3938  else if (c1=='$') fold_state = 1; /* $B!$(B */
3939  else if (c1=='%') fold_state = 1; /* $B!%(B */
3940  else if (c1=='\'') fold_state = 1; /* $B!\(B */
3941  else if (c1=='(') fold_state = 1; /* $B!((B */
3942  else if (c1==')') fold_state = 1; /* $B!)(B */
3943  else if (c1=='*') fold_state = 1; /* $B!*(B */
3944  else if (c1=='+') fold_state = 1; /* $B!+(B */
3945  else if (c1==',') fold_state = 1; /* $B!,(B */
3946  /* default no fold in kinsoku */
3947  else {
3948  fold_state = LF;
3949  f_line = char_size(c2,c1);
3950  /* add one new f_line before this character */
3951  }
3952  } else {
3953  f_line = char_size(c2,c1);
3954  fold_state = LF;
3955  /* add one new f_line before this character */
3956  }
3957  }
3958  }
3959  }
3960  /* terminator process */
3961  switch(fold_state) {
3962  case LF:
3963  oconv_newline(o_fconv);
3964  (*o_fconv)(c2,c1);
3965  break;
3966  case 0:
3967  return;
3968  case CR:
3969  oconv_newline(o_fconv);
3970  break;
3971  case TAB:
3972  case SP:
3973  (*o_fconv)(0,SP);
3974  break;
3975  default:
3976  (*o_fconv)(c2,c1);
3977  }
3978 }
3979 
3980 static nkf_char z_prev2=0,z_prev1=0;
3981 
3982 static void
3983 z_conv(nkf_char c2, nkf_char c1)
3984 {
3985 
3986  /* if (c2) c1 &= 0x7f; assertion */
3987 
3988  if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3989  (*o_zconv)(c2,c1);
3990  return;
3991  }
3992 
3993  if (x0201_f) {
3994  if (z_prev2 == JIS_X_0201_1976_K) {
3995  if (c2 == JIS_X_0201_1976_K) {
3996  if (c1 == (0xde&0x7f)) { /* $BByE@(B */
3997  z_prev2 = 0;
3998  (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3999  return;
4000  } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4001  z_prev2 = 0;
4002  (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
4003  return;
4004  } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4005  z_prev2 = 0;
4006  (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
4007  return;
4008  }
4009  }
4010  z_prev2 = 0;
4011  (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
4012  }
4013  if (c2 == JIS_X_0201_1976_K) {
4014  if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
4015  /* wait for $BByE@(B or $BH>ByE@(B */
4016  z_prev1 = c1;
4017  z_prev2 = c2;
4018  return;
4019  } else {
4020  (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
4021  return;
4022  }
4023  }
4024  }
4025 
4026  if (c2 == EOF) {
4027  (*o_zconv)(c2, c1);
4028  return;
4029  }
4030 
4031  if (alpha_f&1 && c2 == 0x23) {
4032  /* JISX0208 Alphabet */
4033  c2 = 0;
4034  } else if (c2 == 0x21) {
4035  /* JISX0208 Kigou */
4036  if (0x21==c1) {
4037  if (alpha_f&2) {
4038  c2 = 0;
4039  c1 = SP;
4040  } else if (alpha_f&4) {
4041  (*o_zconv)(0, SP);
4042  (*o_zconv)(0, SP);
4043  return;
4044  }
4045  } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4046  c2 = 0;
4047  c1 = fv[c1-0x20];
4048  }
4049  }
4050 
4051  if (alpha_f&8 && c2 == 0) {
4052  /* HTML Entity */
4053  const char *entity = 0;
4054  switch (c1){
4055  case '>': entity = "&gt;"; break;
4056  case '<': entity = "&lt;"; break;
4057  case '\"': entity = "&quot;"; break;
4058  case '&': entity = "&amp;"; break;
4059  }
4060  if (entity){
4061  while (*entity) (*o_zconv)(0, *entity++);
4062  return;
4063  }
4064  }
4065 
4066  if (alpha_f & 16) {
4067  /* JIS X 0208 Katakana to JIS X 0201 Katakana */
4068  if (c2 == 0x21) {
4069  nkf_char c = 0;
4070  switch (c1) {
4071  case 0x23:
4072  /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4073  c = 0xA1;
4074  break;
4075  case 0x56:
4076  /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4077  c = 0xA2;
4078  break;
4079  case 0x57:
4080  /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4081  c = 0xA3;
4082  break;
4083  case 0x22:
4084  /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4085  c = 0xA4;
4086  break;
4087  case 0x26:
4088  /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4089  c = 0xA5;
4090  break;
4091  case 0x3C:
4092  /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4093  c = 0xB0;
4094  break;
4095  case 0x2B:
4096  /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4097  c = 0xDE;
4098  break;
4099  case 0x2C:
4100  /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4101  c = 0xDF;
4102  break;
4103  }
4104  if (c) {
4105  (*o_zconv)(JIS_X_0201_1976_K, c);
4106  return;
4107  }
4108  } else if (c2 == 0x25) {
4109  /* JISX0208 Katakana */
4110  static const int fullwidth_to_halfwidth[] =
4111  {
4112  0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4113  0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4114  0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4115  0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4116  0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4117  0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4118  0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4119  0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4120  0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4121  0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4122  0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4123  0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4124  };
4125  if (fullwidth_to_halfwidth[c1-0x20]){
4126  c2 = fullwidth_to_halfwidth[c1-0x20];
4127  (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
4128  if (c2 & 0xFF) {
4129  (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
4130  }
4131  return;
4132  }
4133  } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
4134  ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
4135  (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
4136  return;
4137  }
4138  }
4139  (*o_zconv)(c2,c1);
4140 }
4141 
4142 
4143 #define rot13(c) ( \
4144  ( c < 'A') ? c: \
4145  (c <= 'M') ? (c + 13): \
4146  (c <= 'Z') ? (c - 13): \
4147  (c < 'a') ? (c): \
4148  (c <= 'm') ? (c + 13): \
4149  (c <= 'z') ? (c - 13): \
4150  (c) \
4151  )
4152 
4153 #define rot47(c) ( \
4154  ( c < '!') ? c: \
4155  ( c <= 'O') ? (c + 47) : \
4156  ( c <= '~') ? (c - 47) : \
4157  c \
4158  )
4159 
4160 static void
4161 rot_conv(nkf_char c2, nkf_char c1)
4162 {
4163  if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
4164  c1 = rot13(c1);
4165  } else if (c2) {
4166  c1 = rot47(c1);
4167  c2 = rot47(c2);
4168  }
4169  (*o_rot_conv)(c2,c1);
4170 }
4171 
4172 static void
4173 hira_conv(nkf_char c2, nkf_char c1)
4174 {
4175  if (hira_f & 1) {
4176  if (c2 == 0x25) {
4177  if (0x20 < c1 && c1 < 0x74) {
4178  c2 = 0x24;
4179  (*o_hira_conv)(c2,c1);
4180  return;
4181  } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
4182  c2 = 0;
4183  c1 = nkf_char_unicode_new(0x3094);
4184  (*o_hira_conv)(c2,c1);
4185  return;
4186  }
4187  } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4188  c1 += 2;
4189  (*o_hira_conv)(c2,c1);
4190  return;
4191  }
4192  }
4193  if (hira_f & 2) {
4194  if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
4195  c2 = 0x25;
4196  c1 = 0x74;
4197  } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4198  c2 = 0x25;
4199  } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4200  c1 -= 2;
4201  }
4202  }
4203  (*o_hira_conv)(c2,c1);
4204 }
4205 
4206 
4207 static void
4208 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4209 {
4210 #define RANGE_NUM_MAX 18
4211  static const nkf_char range[RANGE_NUM_MAX][2] = {
4212  {0x222f, 0x2239,},
4213  {0x2242, 0x2249,},
4214  {0x2251, 0x225b,},
4215  {0x226b, 0x2271,},
4216  {0x227a, 0x227d,},
4217  {0x2321, 0x232f,},
4218  {0x233a, 0x2340,},
4219  {0x235b, 0x2360,},
4220  {0x237b, 0x237e,},
4221  {0x2474, 0x247e,},
4222  {0x2577, 0x257e,},
4223  {0x2639, 0x2640,},
4224  {0x2659, 0x267e,},
4225  {0x2742, 0x2750,},
4226  {0x2772, 0x277e,},
4227  {0x2841, 0x287e,},
4228  {0x4f54, 0x4f7e,},
4229  {0x7425, 0x747e},
4230  };
4231  nkf_char i;
4232  nkf_char start, end, c;
4233 
4234  if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4235  c2 = GETA1;
4236  c1 = GETA2;
4237  }
4238  if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4239  c2 = GETA1;
4240  c1 = GETA2;
4241  }
4242 
4243  for (i = 0; i < RANGE_NUM_MAX; i++) {
4244  start = range[i][0];
4245  end = range[i][1];
4246  c = (c2 << 8) + c1;
4247  if (c >= start && c <= end) {
4248  c2 = GETA1;
4249  c1 = GETA2;
4250  }
4251  }
4252  (*o_iso2022jp_check_conv)(c2,c1);
4253 }
4254 
4255 
4256 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4257 
4258 static const unsigned char *mime_pattern[] = {
4259  (const unsigned char *)"\075?EUC-JP?B?",
4260  (const unsigned char *)"\075?SHIFT_JIS?B?",
4261  (const unsigned char *)"\075?ISO-8859-1?Q?",
4262  (const unsigned char *)"\075?ISO-8859-1?B?",
4263  (const unsigned char *)"\075?ISO-2022-JP?B?",
4264  (const unsigned char *)"\075?ISO-2022-JP?B?",
4265  (const unsigned char *)"\075?ISO-2022-JP?Q?",
4266 #if defined(UTF8_INPUT_ENABLE)
4267  (const unsigned char *)"\075?UTF-8?B?",
4268  (const unsigned char *)"\075?UTF-8?Q?",
4269 #endif
4270  (const unsigned char *)"\075?US-ASCII?Q?",
4271  NULL
4272 };
4273 
4274 
4275 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
4277  e_iconv, s_iconv, 0, 0, 0, 0, 0,
4278 #if defined(UTF8_INPUT_ENABLE)
4279  w_iconv, w_iconv,
4280 #endif
4281  0,
4282 };
4283 
4284 static const nkf_char mime_encode[] = {
4286 #if defined(UTF8_INPUT_ENABLE)
4287  UTF_8, UTF_8,
4288 #endif
4289  ASCII,
4290  0
4291 };
4292 
4293 static const nkf_char mime_encode_method[] = {
4294  'B', 'B','Q', 'B', 'B', 'B', 'Q',
4295 #if defined(UTF8_INPUT_ENABLE)
4296  'B', 'Q',
4297 #endif
4298  'Q',
4299  0
4300 };
4301 
4302 
4303 /* MIME preprocessor fifo */
4304 
4305 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
4306 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
4307 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
4308 static struct {
4309  unsigned char buf[MIME_BUF_SIZE];
4310  unsigned int top;
4311  unsigned int last; /* decoded */
4312  unsigned int input; /* undecoded */
4313 } mime_input_state;
4314 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
4315 
4316 #define MAXRECOVER 20
4317 
4318 static void
4319 mime_input_buf_unshift(nkf_char c)
4320 {
4321  mime_input_buf(--mime_input_state.top) = (unsigned char)c;
4322 }
4323 
4324 static nkf_char
4325 mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
4326 {
4327  mime_input_buf_unshift(c);
4328  return c;
4329 }
4330 
4331 static nkf_char
4332 mime_ungetc_buf(nkf_char c, FILE *f)
4333 {
4334  if (mimebuf_f)
4335  (*i_mungetc_buf)(c,f);
4336  else
4337  mime_input_buf(--mime_input_state.input) = (unsigned char)c;
4338  return c;
4339 }
4340 
4341 static nkf_char
4342 mime_getc_buf(FILE *f)
4343 {
4344  /* we don't keep eof of mime_input_buf, because it contains ?= as
4345  a terminator. It was checked in mime_integrity. */
4346  return ((mimebuf_f)?
4347  (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
4348 }
4349 
4350 static void
4351 switch_mime_getc(void)
4352 {
4353  if (i_getc!=mime_getc) {
4354  i_mgetc = i_getc; i_getc = mime_getc;
4355  i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4356  if(mime_f==STRICT_MIME) {
4357  i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4358  i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4359  }
4360  }
4361 }
4362 
4363 static void
4364 unswitch_mime_getc(void)
4365 {
4366  if(mime_f==STRICT_MIME) {
4367  i_mgetc = i_mgetc_buf;
4368  i_mungetc = i_mungetc_buf;
4369  }
4370  i_getc = i_mgetc;
4371  i_ungetc = i_mungetc;
4372  if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4373  mime_iconv_back = NULL;
4374 }
4375 
4376 static nkf_char
4377 mime_integrity(FILE *f, const unsigned char *p)
4378 {
4379  nkf_char c,d;
4380  unsigned int q;
4381  /* In buffered mode, read until =? or NL or buffer full
4382  */
4383  mime_input_state.input = mime_input_state.top;
4384  mime_input_state.last = mime_input_state.top;
4385 
4386  while(*p) mime_input_buf(mime_input_state.input++) = *p++;
4387  d = 0;
4388  q = mime_input_state.input;
4389  while((c=(*i_getc)(f))!=EOF) {
4390  if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4391  break; /* buffer full */
4392  }
4393  if (c=='=' && d=='?') {
4394  /* checked. skip header, start decode */
4395  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4396  /* mime_last_input = mime_input_state.input; */
4397  mime_input_state.input = q;
4398  switch_mime_getc();
4399  return 1;
4400  }
4401  if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4402  break;
4403  /* Should we check length mod 4? */
4404  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4405  d=c;
4406  }
4407  /* In case of Incomplete MIME, no MIME decode */
4408  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4409  mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4410  mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4411  switch_mime_getc(); /* anyway we need buffered getc */
4412  return 1;
4413 }
4414 
4415 static nkf_char
4416 mime_begin_strict(FILE *f)
4417 {
4418  nkf_char c1 = 0;
4419  int i,j,k;
4420  const unsigned char *p,*q;
4421  nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4422 
4423  mime_decode_mode = FALSE;
4424  /* =? has been checked */
4425  j = 0;
4426  p = mime_pattern[j];
4427  r[0]='='; r[1]='?';
4428 
4429  for(i=2;p[i]>SP;i++) { /* start at =? */
4430  if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4431  /* pattern fails, try next one */
4432  q = p;
4433  while (mime_pattern[++j]) {
4434  p = mime_pattern[j];
4435  for(k=2;k<i;k++) /* assume length(p) > i */
4436  if (p[k]!=q[k]) break;
4437  if (k==i && nkf_toupper(c1)==p[k]) break;
4438  }
4439  p = mime_pattern[j];
4440  if (p) continue; /* found next one, continue */
4441  /* all fails, output from recovery buffer */
4442  (*i_ungetc)(c1,f);
4443  for(j=0;j<i;j++) {
4444  (*oconv)(0,r[j]);
4445  }
4446  return c1;
4447  }
4448  }
4449  mime_decode_mode = p[i-2];
4450 
4451  mime_iconv_back = iconv;
4452  set_iconv(FALSE, mime_priority_func[j]);
4453  clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4454 
4455  if (mime_decode_mode=='B') {
4456  mimebuf_f = unbuf_f;
4457  if (!unbuf_f) {
4458  /* do MIME integrity check */
4459  return mime_integrity(f,mime_pattern[j]);
4460  }
4461  }
4462  switch_mime_getc();
4463  mimebuf_f = TRUE;
4464  return c1;
4465 }
4466 
4467 static nkf_char
4468 mime_begin(FILE *f)
4469 {
4470  nkf_char c1 = 0;
4471  int i,k;
4472 
4473  /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4474  /* re-read and convert again from mime_buffer. */
4475 
4476  /* =? has been checked */
4477  k = mime_input_state.last;
4478  mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4479  for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4480  /* We accept any character type even if it is breaked by new lines */
4481  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4482  if (c1==LF||c1==SP||c1==CR||
4483  c1=='-'||c1=='_'||is_alnum(c1)) continue;
4484  if (c1=='=') {
4485  /* Failed. But this could be another MIME preemble */
4486  (*i_ungetc)(c1,f);
4487  mime_input_state.last--;
4488  break;
4489  }
4490  if (c1!='?') break;
4491  else {
4492  /* c1=='?' */
4493  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4494  if (!(++i<MAXRECOVER) || c1==EOF) break;
4495  if (c1=='b'||c1=='B') {
4496  mime_decode_mode = 'B';
4497  } else if (c1=='q'||c1=='Q') {
4498  mime_decode_mode = 'Q';
4499  } else {
4500  break;
4501  }
4502  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4503  if (!(++i<MAXRECOVER) || c1==EOF) break;
4504  if (c1!='?') {
4505  mime_decode_mode = FALSE;
4506  }
4507  break;
4508  }
4509  }
4510  switch_mime_getc();
4511  if (!mime_decode_mode) {
4512  /* false MIME premble, restart from mime_buffer */
4513  mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4514  /* Since we are in MIME mode until buffer becomes empty, */
4515  /* we never go into mime_begin again for a while. */
4516  return c1;
4517  }
4518  /* discard mime preemble, and goto MIME mode */
4519  mime_input_state.last = k;
4520  /* do no MIME integrity check */
4521  return c1; /* used only for checking EOF */
4522 }
4523 
4524 #ifdef CHECK_OPTION
4525 static void
4526 no_putc(ARG_UNUSED nkf_char c)
4527 {
4528  ;
4529 }
4530 
4531 static void
4532 debug(const char *str)
4533 {
4534  if (debug_f){
4535  fprintf(stderr, "%s\n", str ? str : "NULL");
4536  }
4537 }
4538 #endif
4539 
4540 static void
4541 set_input_codename(const char *codename)
4542 {
4543  if (!input_codename) {
4544  input_codename = codename;
4545  } else if (strcmp(codename, input_codename) != 0) {
4546  input_codename = "";
4547  }
4548 }
4549 
4550 static const char*
4551 get_guessed_code(void)
4552 {
4553  if (input_codename && !*input_codename) {
4554  input_codename = "BINARY";
4555  } else {
4556  struct input_code *p = find_inputcode_byfunc(iconv);
4557  if (!input_codename) {
4558  input_codename = "ASCII";
4559  } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4560  if (p->score & (SCORE_DEPEND|SCORE_CP932))
4561  input_codename = "CP932";
4562  } else if (strcmp(input_codename, "EUC-JP") == 0) {
4563  if (p->score & SCORE_X0213)
4564  input_codename = "EUC-JIS-2004";
4565  else if (p->score & (SCORE_X0212))
4566  input_codename = "EUCJP-MS";
4567  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4568  input_codename = "CP51932";
4569  } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4570  if (p->score & (SCORE_KANA))
4571  input_codename = "CP50221";
4572  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4573  input_codename = "CP50220";
4574  }
4575  }
4576  return input_codename;
4577 }
4578 
4579 #if !defined(PERL_XS) && !defined(WIN32DLL)
4580 static void
4581 print_guessed_code(char *filename)
4582 {
4583  if (filename != NULL) printf("%s: ", filename);
4584  if (input_codename && !*input_codename) {
4585  printf("BINARY\n");
4586  } else {
4587  input_codename = get_guessed_code();
4588  if (guess_f == 1) {
4589  printf("%s\n", input_codename);
4590  } else {
4591  printf("%s%s%s%s\n",
4592  input_codename,
4593  iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4594  input_endian == ENDIAN_LITTLE ? " LE" :
4595  input_endian == ENDIAN_BIG ? " BE" :
4596  "[BUG]",
4597  input_bom_f ? " (BOM)" : "",
4598  input_eol == CR ? " (CR)" :
4599  input_eol == LF ? " (LF)" :
4600  input_eol == CRLF ? " (CRLF)" :
4601  input_eol == EOF ? " (MIXED NL)" :
4602  "");
4603  }
4604  }
4605 }
4606 #endif /*WIN32DLL*/
4607 
4608 #ifdef INPUT_OPTION
4609 
4610 static nkf_char
4611 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4612 {
4613  nkf_char c1, c2, c3;
4614  c1 = (*g)(f);
4615  if (c1 != ch){
4616  return c1;
4617  }
4618  c2 = (*g)(f);
4619  if (!nkf_isxdigit(c2)){
4620  (*u)(c2, f);
4621  return c1;
4622  }
4623  c3 = (*g)(f);
4624  if (!nkf_isxdigit(c3)){
4625  (*u)(c2, f);
4626  (*u)(c3, f);
4627  return c1;
4628  }
4629  return (hex2bin(c2) << 4) | hex2bin(c3);
4630 }
4631 
4632 static nkf_char
4633 cap_getc(FILE *f)
4634 {
4635  return hex_getc(':', f, i_cgetc, i_cungetc);
4636 }
4637 
4638 static nkf_char
4639 cap_ungetc(nkf_char c, FILE *f)
4640 {
4641  return (*i_cungetc)(c, f);
4642 }
4643 
4644 static nkf_char
4645 url_getc(FILE *f)
4646 {
4647  return hex_getc('%', f, i_ugetc, i_uungetc);
4648 }
4649 
4650 static nkf_char
4651 url_ungetc(nkf_char c, FILE *f)
4652 {
4653  return (*i_uungetc)(c, f);
4654 }
4655 #endif
4656 
4657 #ifdef NUMCHAR_OPTION
4658 static nkf_char
4659 numchar_getc(FILE *f)
4660 {
4661  nkf_char (*g)(FILE *) = i_ngetc;
4662  nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4663  int i = 0, j;
4664  nkf_char buf[12];
4665  nkf_char c = -1;
4666 
4667  buf[i] = (*g)(f);
4668  if (buf[i] == '&'){
4669  buf[++i] = (*g)(f);
4670  if (buf[i] == '#'){
4671  c = 0;
4672  buf[++i] = (*g)(f);
4673  if (buf[i] == 'x' || buf[i] == 'X'){
4674  for (j = 0; j < 7; j++){
4675  buf[++i] = (*g)(f);
4676  if (!nkf_isxdigit(buf[i])){
4677  if (buf[i] != ';'){
4678  c = -1;
4679  }
4680  break;
4681  }
4682  c <<= 4;
4683  c |= hex2bin(buf[i]);
4684  }
4685  }else{
4686  for (j = 0; j < 8; j++){
4687  if (j){
4688  buf[++i] = (*g)(f);
4689  }
4690  if (!nkf_isdigit(buf[i])){
4691  if (buf[i] != ';'){
4692  c = -1;
4693  }
4694  break;
4695  }
4696  c *= 10;
4697  c += hex2bin(buf[i]);
4698  }
4699  }
4700  }
4701  }
4702  if (c != -1){
4703  return nkf_char_unicode_new(c);
4704  }
4705  while (i > 0){
4706  (*u)(buf[i], f);
4707  --i;
4708  }
4709  return buf[0];
4710 }
4711 
4712 static nkf_char
4713 numchar_ungetc(nkf_char c, FILE *f)
4714 {
4715  return (*i_nungetc)(c, f);
4716 }
4717 #endif
4718 
4719 #ifdef UNICODE_NORMALIZATION
4720 
4721 static nkf_char
4722 nfc_getc(FILE *f)
4723 {
4724  nkf_char (*g)(FILE *f) = i_nfc_getc;
4725  nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4726  nkf_buf_t *buf = nkf_state->nfc_buf;
4727  const unsigned char *array;
4728  int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4729  nkf_char c = (*g)(f);
4730 
4731  if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4732 
4733  nkf_buf_push(buf, c);
4734  do {
4735  while (lower <= upper) {
4736  int mid = (lower+upper) / 2;
4737  int len;
4738  array = normalization_table[mid].nfd;
4739  for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4740  if (len >= nkf_buf_length(buf)) {
4741  c = (*g)(f);
4742  if (c == EOF) {
4743  len = 0;
4744  lower = 1, upper = 0;
4745  break;
4746  }
4747  nkf_buf_push(buf, c);
4748  }
4749  if (array[len] != nkf_buf_at(buf, len)) {
4750  if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4751  else upper = mid - 1;
4752  len = 0;
4753  break;
4754  }
4755  }
4756  if (len > 0) {
4757  int i;
4758  array = normalization_table[mid].nfc;
4759  nkf_buf_clear(buf);
4760  for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4761  nkf_buf_push(buf, array[i]);
4762  break;
4763  }
4764  }
4765  } while (lower <= upper);
4766 
4767  while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4768  c = nkf_buf_pop(buf);
4769 
4770  return c;
4771 }
4772 
4773 static nkf_char
4774 nfc_ungetc(nkf_char c, FILE *f)
4775 {
4776  return (*i_nfc_ungetc)(c, f);
4777 }
4778 #endif /* UNICODE_NORMALIZATION */
4779 
4780 
4781 static nkf_char
4782 base64decode(nkf_char c)
4783 {
4784  int i;
4785  if (c > '@') {
4786  if (c < '[') {
4787  i = c - 'A'; /* A..Z 0-25 */
4788  } else if (c == '_') {
4789  i = '?' /* 63 */ ; /* _ 63 */
4790  } else {
4791  i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4792  }
4793  } else if (c > '/') {
4794  i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4795  } else if (c == '+' || c == '-') {
4796  i = '>' /* 62 */ ; /* + and - 62 */
4797  } else {
4798  i = '?' /* 63 */ ; /* / 63 */
4799  }
4800  return (i);
4801 }
4802 
4803 static nkf_char
4804 mime_getc(FILE *f)
4805 {
4806  nkf_char c1, c2, c3, c4, cc;
4807  nkf_char t1, t2, t3, t4, mode, exit_mode;
4808  nkf_char lwsp_count;
4809  char *lwsp_buf;
4810  char *lwsp_buf_new;
4811  nkf_char lwsp_size = 128;
4812 
4813  if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4814  return mime_input_buf(mime_input_state.top++);
4815  }
4816  if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4817  mime_decode_mode=FALSE;
4818  unswitch_mime_getc();
4819  return (*i_getc)(f);
4820  }
4821 
4822  if (mimebuf_f == FIXED_MIME)
4823  exit_mode = mime_decode_mode;
4824  else
4825  exit_mode = FALSE;
4826  if (mime_decode_mode == 'Q') {
4827  if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4828  restart_mime_q:
4829  if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4830  if (c1<=SP || DEL<=c1) {
4831  mime_decode_mode = exit_mode; /* prepare for quit */
4832  return c1;
4833  }
4834  if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4835  return c1;
4836  }
4837 
4838  mime_decode_mode = exit_mode; /* prepare for quit */
4839  if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4840  if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4841  /* end Q encoding */
4842  input_mode = exit_mode;
4843  lwsp_count = 0;
4844  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4845  while ((c1=(*i_getc)(f))!=EOF) {
4846  switch (c1) {
4847  case LF:
4848  case CR:
4849  if (c1==LF) {
4850  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4851  i_ungetc(SP,f);
4852  continue;
4853  } else {
4854  i_ungetc(c1,f);
4855  }
4856  c1 = LF;
4857  } else {
4858  if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4859  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4860  i_ungetc(SP,f);
4861  continue;
4862  } else {
4863  i_ungetc(c1,f);
4864  }
4865  i_ungetc(LF,f);
4866  } else {
4867  i_ungetc(c1,f);
4868  }
4869  c1 = CR;
4870  }
4871  break;
4872  case SP:
4873  case TAB:
4874  lwsp_buf[lwsp_count] = (unsigned char)c1;
4875  if (lwsp_count++>lwsp_size){
4876  lwsp_size <<= 1;
4877  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4878  lwsp_buf = lwsp_buf_new;
4879  }
4880  continue;
4881  }
4882  break;
4883  }
4884  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4885  i_ungetc(c1,f);
4886  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4887  i_ungetc(lwsp_buf[lwsp_count],f);
4888  c1 = lwsp_buf[0];
4889  }
4890  nkf_xfree(lwsp_buf);
4891  return c1;
4892  }
4893  if (c1=='='&&c2<SP) { /* this is soft wrap */
4894  while((c1 = (*i_mgetc)(f)) <=SP) {
4895  if (c1 == EOF) return (EOF);
4896  }
4897  mime_decode_mode = 'Q'; /* still in MIME */
4898  goto restart_mime_q;
4899  }
4900  if (c1=='?') {
4901  mime_decode_mode = 'Q'; /* still in MIME */
4902  (*i_mungetc)(c2,f);
4903  return c1;
4904  }
4905  if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4906  if (c2<=SP) return c2;
4907  mime_decode_mode = 'Q'; /* still in MIME */
4908  return ((hex2bin(c2)<<4) + hex2bin(c3));
4909  }
4910 
4911  if (mime_decode_mode != 'B') {
4912  mime_decode_mode = FALSE;
4913  return (*i_mgetc)(f);
4914  }
4915 
4916 
4917  /* Base64 encoding */
4918  /*
4919  MIME allows line break in the middle of
4920  Base64, but we are very pessimistic in decoding
4921  in unbuf mode because MIME encoded code may broken by
4922  less or editor's control sequence (such as ESC-[-K in unbuffered
4923  mode. ignore incomplete MIME.
4924  */
4925  mode = mime_decode_mode;
4926  mime_decode_mode = exit_mode; /* prepare for quit */
4927 
4928  while ((c1 = (*i_mgetc)(f))<=SP) {
4929  if (c1==EOF)
4930  return (EOF);
4931  }
4932  mime_c2_retry:
4933  if ((c2 = (*i_mgetc)(f))<=SP) {
4934  if (c2==EOF)
4935  return (EOF);
4936  if (mime_f != STRICT_MIME) goto mime_c2_retry;
4937  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4938  return c2;
4939  }
4940  if ((c1 == '?') && (c2 == '=')) {
4941  input_mode = ASCII;
4942  lwsp_count = 0;
4943  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4944  while ((c1=(*i_getc)(f))!=EOF) {
4945  switch (c1) {
4946  case LF:
4947  case CR:
4948  if (c1==LF) {
4949  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4950  i_ungetc(SP,f);
4951  continue;
4952  } else {
4953  i_ungetc(c1,f);
4954  }
4955  c1 = LF;
4956  } else {
4957  if ((c1=(*i_getc)(f))!=EOF) {
4958  if (c1==SP) {
4959  i_ungetc(SP,f);
4960  continue;
4961  } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4962  i_ungetc(SP,f);
4963  continue;
4964  } else {
4965  i_ungetc(c1,f);
4966  }
4967  i_ungetc(LF,f);
4968  } else {
4969  i_ungetc(c1,f);
4970  }
4971  c1 = CR;
4972  }
4973  break;
4974  case SP:
4975  case TAB:
4976  lwsp_buf[lwsp_count] = (unsigned char)c1;
4977  if (lwsp_count++>lwsp_size){
4978  lwsp_size <<= 1;
4979  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4980  lwsp_buf = lwsp_buf_new;
4981  }
4982  continue;
4983  }
4984  break;
4985  }
4986  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4987  i_ungetc(c1,f);
4988  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4989  i_ungetc(lwsp_buf[lwsp_count],f);
4990  c1 = lwsp_buf[0];
4991  }
4992  nkf_xfree(lwsp_buf);
4993  return c1;
4994  }
4995  mime_c3_retry:
4996  if ((c3 = (*i_mgetc)(f))<=SP) {
4997  if (c3==EOF)
4998  return (EOF);
4999  if (mime_f != STRICT_MIME) goto mime_c3_retry;
5000  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5001  return c3;
5002  }
5003  mime_c4_retry:
5004  if ((c4 = (*i_mgetc)(f))<=SP) {
5005  if (c4==EOF)
5006  return (EOF);
5007  if (mime_f != STRICT_MIME) goto mime_c4_retry;
5008  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5009  return c4;
5010  }
5011 
5012  mime_decode_mode = mode; /* still in MIME sigh... */
5013 
5014  /* BASE 64 decoding */
5015 
5016  t1 = 0x3f & base64decode(c1);
5017  t2 = 0x3f & base64decode(c2);
5018  t3 = 0x3f & base64decode(c3);
5019  t4 = 0x3f & base64decode(c4);
5020  cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5021  if (c2 != '=') {
5022  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5023  cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5024  if (c3 != '=') {
5025  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5026  cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5027  if (c4 != '=')
5028  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5029  }
5030  } else {
5031  return c1;
5032  }
5033  return mime_input_buf(mime_input_state.top++);
5034 }
5035 
5036 static const char basis_64[] =
5037  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5038 
5039 #define MIMEOUT_BUF_LENGTH 74
5040 static struct {
5041  unsigned char buf[MIMEOUT_BUF_LENGTH+1];
5042  int count;
5043 } mimeout_state;
5044 
5045 /*nkf_char mime_lastchar2, mime_lastchar1;*/
5046 
5047 static void
5048 open_mime(nkf_char mode)
5049 {
5050  const unsigned char *p;
5051  int i;
5052  int j;
5053  p = mime_pattern[0];
5054  for(i=0;mime_pattern[i];i++) {
5055  if (mode == mime_encode[i]) {
5056  p = mime_pattern[i];
5057  break;
5058  }
5059  }
5060  mimeout_mode = mime_encode_method[i];
5061  i = 0;
5062  if (base64_count>45) {
5063  if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
5064  (*o_mputc)(mimeout_state.buf[i]);
5065  i++;
5066  }
5067  put_newline(o_mputc);
5068  (*o_mputc)(SP);
5069  base64_count = 1;
5070  if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
5071  i++;
5072  }
5073  }
5074  for (;i<mimeout_state.count;i++) {
5075  if (nkf_isspace(mimeout_state.buf[i])) {
5076  (*o_mputc)(mimeout_state.buf[i]);
5077  base64_count ++;
5078  } else {
5079  break;
5080  }
5081  }
5082  while(*p) {
5083  (*o_mputc)(*p++);
5084  base64_count ++;
5085  }
5086  j = mimeout_state.count;
5087  mimeout_state.count = 0;
5088  for (;i<j;i++) {
5089  mime_putc(mimeout_state.buf[i]);
5090  }
5091 }
5092 
5093 static void
5094 mime_prechar(nkf_char c2, nkf_char c1)
5095 {
5096  if (mimeout_mode > 0){
5097  if (c2 == EOF){
5098  if (base64_count + mimeout_state.count/3*4> 73){
5099  (*o_base64conv)(EOF,0);
5100  oconv_newline(o_base64conv);
5101  (*o_base64conv)(0,SP);
5102  base64_count = 1;
5103  }
5104  } else {
5105  if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
5106  (*o_base64conv)(EOF,0);
5107  oconv_newline(o_base64conv);
5108  (*o_base64conv)(0,SP);
5109  base64_count = 1;
5110  mimeout_mode = -1;
5111  }
5112  }
5113  } else if (c2) {
5114  if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
5115  mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
5116  open_mime(output_mode);
5117  (*o_base64conv)(EOF,0);
5118  oconv_newline(o_base64conv);
5119  (*o_base64conv)(0,SP);
5120  base64_count = 1;
5121  mimeout_mode = -1;
5122  }
5123  }
5124 }
5125 
5126 static void
5127 close_mime(void)
5128 {
5129  (*o_mputc)('?');
5130  (*o_mputc)('=');
5131  base64_count += 2;
5132  mimeout_mode = 0;
5133 }
5134 
5135 static void
5136 eof_mime(void)
5137 {
5138  switch(mimeout_mode) {
5139  case 'Q':
5140  case 'B':
5141  break;
5142  case 2:
5143  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
5144  (*o_mputc)('=');
5145  (*o_mputc)('=');
5146  base64_count += 3;
5147  break;
5148  case 1:
5149  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
5150  (*o_mputc)('=');
5151  base64_count += 2;
5152  break;
5153  }
5154  if (mimeout_mode > 0) {
5155  if (mimeout_f!=FIXED_MIME) {
5156  close_mime();
5157  } else if (mimeout_mode != 'Q')
5158  mimeout_mode = 'B';
5159  }
5160 }
5161 
5162 static void
5163 mimeout_addchar(nkf_char c)
5164 {
5165  switch(mimeout_mode) {
5166  case 'Q':
5167  if (c==CR||c==LF) {
5168  (*o_mputc)(c);
5169  base64_count = 0;
5170  } else if(!nkf_isalnum(c)) {
5171  (*o_mputc)('=');
5172  (*o_mputc)(bin2hex(((c>>4)&0xf)));
5173  (*o_mputc)(bin2hex((c&0xf)));
5174  base64_count += 3;
5175  } else {
5176  (*o_mputc)(c);
5177  base64_count++;
5178  }
5179  break;
5180  case 'B':
5181  nkf_state->mimeout_state=c;
5182  (*o_mputc)(basis_64[c>>2]);
5183  mimeout_mode=2;
5184  base64_count ++;
5185  break;
5186  case 2:
5187  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5188  nkf_state->mimeout_state=c;
5189  mimeout_mode=1;
5190  base64_count ++;
5191  break;
5192  case 1:
5193  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5194  (*o_mputc)(basis_64[c & 0x3F]);
5195  mimeout_mode='B';
5196  base64_count += 2;
5197  break;
5198  default:
5199  (*o_mputc)(c);
5200  base64_count++;
5201  break;
5202  }
5203 }
5204 
5205 static void
5206 mime_putc(nkf_char c)
5207 {
5208  int i, j;
5209  nkf_char lastchar;
5210 
5211  if (mimeout_f == FIXED_MIME){
5212  if (mimeout_mode == 'Q'){
5213  if (base64_count > 71){
5214  if (c!=CR && c!=LF) {
5215  (*o_mputc)('=');
5216  put_newline(o_mputc);
5217  }
5218  base64_count = 0;
5219  }
5220  }else{
5221  if (base64_count > 71){
5222  eof_mime();
5223  put_newline(o_mputc);
5224  base64_count = 0;
5225  }
5226  if (c == EOF) { /* c==EOF */
5227  eof_mime();
5228  }
5229  }
5230  if (c != EOF) { /* c==EOF */
5231  mimeout_addchar(c);
5232  }
5233  return;
5234  }
5235 
5236  /* mimeout_f != FIXED_MIME */
5237 
5238  if (c == EOF) { /* c==EOF */
5239  if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5240  j = mimeout_state.count;
5241  mimeout_state.count = 0;
5242  i = 0;
5243  if (mimeout_mode > 0) {
5244  if (!nkf_isblank(mimeout_state.buf[j-1])) {
5245  for (;i<j;i++) {
5246  if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5247  break;
5248  }
5249  mimeout_addchar(mimeout_state.buf[i]);
5250  }
5251  eof_mime();
5252  for (;i<j;i++) {
5253  mimeout_addchar(mimeout_state.buf[i]);
5254  }
5255  } else {
5256  for (;i<j;i++) {
5257  mimeout_addchar(mimeout_state.buf[i]);
5258  }
5259  eof_mime();
5260  }
5261  } else {
5262  for (;i<j;i++) {
5263  mimeout_addchar(mimeout_state.buf[i]);
5264  }
5265  }
5266  return;
5267  }
5268 
5269  if (mimeout_state.count > 0){
5270  lastchar = mimeout_state.buf[mimeout_state.count - 1];
5271  }else{
5272  lastchar = -1;
5273  }
5274 
5275  if (mimeout_mode=='Q') {
5276  if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
5277  if (c == CR || c == LF) {
5278  close_mime();
5279  (*o_mputc)(c);
5280  base64_count = 0;
5281  return;
5282  } else if (c <= SP) {
5283  close_mime();
5284  if (base64_count > 70) {
5285  put_newline(o_mputc);
5286  base64_count = 0;
5287  }
5288  if (!nkf_isblank(c)) {
5289  (*o_mputc)(SP);
5290  base64_count++;
5291  }
5292  } else {
5293  if (base64_count > 70) {
5294  close_mime();
5295  put_newline(o_mputc);
5296  (*o_mputc)(SP);
5297  base64_count = 1;
5298  open_mime(output_mode);
5299  }
5300  if (!nkf_noescape_mime(c)) {
5301  mimeout_addchar(c);
5302  return;
5303  }
5304  }
5305  if (c != 0x1B) {
5306  (*o_mputc)(c);
5307  base64_count++;
5308  return;
5309  }
5310  }
5311  }
5312 
5313  if (mimeout_mode <= 0) {
5314  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5315  output_mode == UTF_8)) {
5316  if (nkf_isspace(c)) {
5317  int flag = 0;
5318  if (mimeout_mode == -1) {
5319  flag = 1;
5320  }
5321  if (c==CR || c==LF) {
5322  if (flag) {
5323  open_mime(output_mode);
5324  output_mode = 0;
5325  } else {
5326  base64_count = 0;
5327  }
5328  }
5329  for (i=0;i<mimeout_state.count;i++) {
5330  (*o_mputc)(mimeout_state.buf[i]);
5331  if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
5332  base64_count = 0;
5333  }else{
5334  base64_count++;
5335  }
5336  }
5337  if (flag) {
5338  eof_mime();
5339  base64_count = 0;
5340  mimeout_mode = 0;
5341  }
5342  mimeout_state.buf[0] = (char)c;
5343  mimeout_state.count = 1;
5344  }else{
5345  if (base64_count > 1
5346  && base64_count + mimeout_state.count > 76
5347  && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
5348  static const char *str = "boundary=\"";
5349  static int len = 10;
5350  i = 0;
5351 
5352  for (; i < mimeout_state.count - len; ++i) {
5353  if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
5354  i += len - 2;
5355  break;
5356  }
5357  }
5358 
5359  if (i == 0 || i == mimeout_state.count - len) {
5360  put_newline(o_mputc);
5361  base64_count = 0;
5362  if (!nkf_isspace(mimeout_state.buf[0])){
5363  (*o_mputc)(SP);
5364  base64_count++;
5365  }
5366  }
5367  else {
5368  int j;
5369  for (j = 0; j <= i; ++j) {
5370  (*o_mputc)(mimeout_state.buf[j]);
5371  }
5372  put_newline(o_mputc);
5373  base64_count = 1;
5374  for (; j <= mimeout_state.count; ++j) {
5375  mimeout_state.buf[j - i] = mimeout_state.buf[j];
5376  }
5377  mimeout_state.count -= i;
5378  }
5379  }
5380  mimeout_state.buf[mimeout_state.count++] = (char)c;
5381  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5382  open_mime(output_mode);
5383  }
5384  }
5385  return;
5386  }else{
5387  if (lastchar==CR || lastchar == LF){
5388  for (i=0;i<mimeout_state.count;i++) {
5389  (*o_mputc)(mimeout_state.buf[i]);
5390  }
5391  base64_count = 0;
5392  mimeout_state.count = 0;
5393  }
5394  if (lastchar==SP) {
5395  for (i=0;i<mimeout_state.count-1;i++) {
5396  (*o_mputc)(mimeout_state.buf[i]);
5397  base64_count++;
5398  }
5399  mimeout_state.buf[0] = SP;
5400  mimeout_state.count = 1;
5401  }
5402  open_mime(output_mode);
5403  }
5404  }else{
5405  /* mimeout_mode == 'B', 1, 2 */
5406  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5407  output_mode == UTF_8)) {
5408  if (lastchar == CR || lastchar == LF){
5409  if (nkf_isblank(c)) {
5410  for (i=0;i<mimeout_state.count;i++) {
5411  mimeout_addchar(mimeout_state.buf[i]);
5412  }
5413  mimeout_state.count = 0;
5414  } else {
5415  eof_mime();
5416  for (i=0;i<mimeout_state.count;i++) {
5417  (*o_mputc)(mimeout_state.buf[i]);
5418  }
5419  base64_count = 0;
5420  mimeout_state.count = 0;
5421  }
5422  mimeout_state.buf[mimeout_state.count++] = (char)c;
5423  return;
5424  }
5425  if (nkf_isspace(c)) {
5426  for (i=0;i<mimeout_state.count;i++) {
5427  if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5428  eof_mime();
5429  for (i=0;i<mimeout_state.count;i++) {
5430  (*o_mputc)(mimeout_state.buf[i]);
5431  base64_count++;
5432  }
5433  mimeout_state.count = 0;
5434  }
5435  }
5436  mimeout_state.buf[mimeout_state.count++] = (char)c;
5437  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5438  eof_mime();
5439  for (j=0;j<mimeout_state.count;j++) {
5440  (*o_mputc)(mimeout_state.buf[j]);
5441  base64_count++;
5442  }
5443  mimeout_state.count = 0;
5444  }
5445  return;
5446  }
5447  if (mimeout_state.count>0 && SP<c && c!='=') {
5448  mimeout_state.buf[mimeout_state.count++] = (char)c;
5449  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5450  j = mimeout_state.count;
5451  mimeout_state.count = 0;
5452  for (i=0;i<j;i++) {
5453  mimeout_addchar(mimeout_state.buf[i]);
5454  }
5455  }
5456  return;
5457  }
5458  }
5459  }
5460  if (mimeout_state.count>0) {
5461  j = mimeout_state.count;
5462  mimeout_state.count = 0;
5463  for (i=0;i<j;i++) {
5464  if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5465  break;
5466  mimeout_addchar(mimeout_state.buf[i]);
5467  }
5468  if (i<j) {
5469  eof_mime();
5470  base64_count=0;
5471  for (;i<j;i++) {
5472  (*o_mputc)(mimeout_state.buf[i]);
5473  }
5474  open_mime(output_mode);
5475  }
5476  }
5477  mimeout_addchar(c);
5478 }
5479 
5480 static void
5481 base64_conv(nkf_char c2, nkf_char c1)
5482 {
5483  mime_prechar(c2, c1);
5484  (*o_base64conv)(c2,c1);
5485 }
5486 
5487 #ifdef HAVE_ICONV_H
5488 typedef struct nkf_iconv_t {
5489  iconv_t cd;
5490  char *input_buffer;
5491  size_t input_buffer_size;
5492  char *output_buffer;
5493  size_t output_buffer_size;
5494 };
5495 
5496 static nkf_iconv_t
5497 nkf_iconv_new(char *tocode, char *fromcode)
5498 {
5499  nkf_iconv_t converter;
5500 
5501  converter->input_buffer_size = IOBUF_SIZE;
5502  converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5503  converter->output_buffer_size = IOBUF_SIZE * 2;
5504  converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5505  converter->cd = iconv_open(tocode, fromcode);
5506  if (converter->cd == (iconv_t)-1)
5507  {
5508  switch (errno) {
5509  case EINVAL:
5510  perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5511  return -1;
5512  default:
5513  perror("can't iconv_open");
5514  }
5515  }
5516 }
5517 
5518 static size_t
5519 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5520 {
5521  size_t invalid = (size_t)0;
5522  char *input_buffer = converter->input_buffer;
5523  size_t input_length = (size_t)0;
5524  char *output_buffer = converter->output_buffer;
5525  size_t output_length = converter->output_buffer_size;
5526  int c;
5527 
5528  do {
5529  if (c != EOF) {
5530  while ((c = (*i_getc)(f)) != EOF) {
5531  input_buffer[input_length++] = c;
5532  if (input_length < converter->input_buffer_size) break;
5533  }
5534  }
5535 
5536  size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5537  while (output_length-- > 0) {
5538  (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5539  }
5540  if (ret == (size_t) - 1) {
5541  switch (errno) {
5542  case EINVAL:
5543  if (input_buffer != converter->input_buffer)
5544  memmove(converter->input_buffer, input_buffer, input_length);
5545  break;
5546  case E2BIG:
5547  converter->output_buffer_size *= 2;
5548  output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5549  if (output_buffer == NULL) {
5550  perror("can't realloc");
5551  return -1;
5552  }
5553  converter->output_buffer = output_buffer;
5554  break;
5555  default:
5556  perror("can't iconv");
5557  return -1;
5558  }
5559  } else {
5560  invalid += ret;
5561  }
5562  } while (1);
5563 
5564  return invalid;
5565 }
5566 
5567 
5568 static void
5569 nkf_iconv_close(nkf_iconv_t *convert)
5570 {
5571  nkf_xfree(converter->inbuf);
5572  nkf_xfree(converter->outbuf);
5573  iconv_close(converter->cd);
5574 }
5575 #endif
5576 
5577 
5578 static void
5579 reinit(void)
5580 {
5581  {
5582  struct input_code *p = input_code_list;
5583  while (p->name){
5584  status_reinit(p++);
5585  }
5586  }
5587  unbuf_f = FALSE;
5588  estab_f = FALSE;
5589  nop_f = FALSE;
5590  binmode_f = TRUE;
5591  rot_f = FALSE;
5592  hira_f = FALSE;
5593  alpha_f = FALSE;
5594  mime_f = MIME_DECODE_DEFAULT;
5595  mime_decode_f = FALSE;
5596  mimebuf_f = FALSE;
5597  broken_f = FALSE;
5598  iso8859_f = FALSE;
5599  mimeout_f = FALSE;
5600  x0201_f = NKF_UNSPECIFIED;
5601  iso2022jp_f = FALSE;
5602 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5603  ms_ucs_map_f = UCS_MAP_ASCII;
5604 #endif
5605 #ifdef UTF8_INPUT_ENABLE
5606  no_cp932ext_f = FALSE;
5607  no_best_fit_chars_f = FALSE;
5608  encode_fallback = NULL;
5609  unicode_subchar = '?';
5610  input_endian = ENDIAN_BIG;
5611 #endif
5612 #ifdef UTF8_OUTPUT_ENABLE
5613  output_bom_f = FALSE;
5614  output_endian = ENDIAN_BIG;
5615 #endif
5616 #ifdef UNICODE_NORMALIZATION
5617  nfc_f = FALSE;
5618 #endif
5619 #ifdef INPUT_OPTION
5620  cap_f = FALSE;
5621  url_f = FALSE;
5622  numchar_f = FALSE;
5623 #endif
5624 #ifdef CHECK_OPTION
5625  noout_f = FALSE;
5626  debug_f = FALSE;
5627 #endif
5628  guess_f = 0;
5629 #ifdef EXEC_IO
5630  exec_f = 0;
5631 #endif
5632 #ifdef SHIFTJIS_CP932
5633  cp51932_f = TRUE;
5634  cp932inv_f = TRUE;
5635 #endif
5636 #ifdef X0212_ENABLE
5637  x0212_f = FALSE;
5638  x0213_f = FALSE;
5639 #endif
5640  {
5641  int i;
5642  for (i = 0; i < 256; i++){
5643  prefix_table[i] = 0;
5644  }
5645  }
5646  hold_count = 0;
5647  mimeout_state.count = 0;
5648  mimeout_mode = 0;
5649  base64_count = 0;
5650  f_line = 0;
5651  f_prev = 0;
5652  fold_preserve_f = FALSE;
5653  fold_f = FALSE;
5654  fold_len = 0;
5655  kanji_intro = DEFAULT_J;
5656  ascii_intro = DEFAULT_R;
5657  fold_margin = FOLD_MARGIN;
5658  o_zconv = no_connection;
5659  o_fconv = no_connection;
5660  o_eol_conv = no_connection;
5661  o_rot_conv = no_connection;
5662  o_hira_conv = no_connection;
5663  o_base64conv = no_connection;
5664  o_iso2022jp_check_conv = no_connection;
5665  o_putc = std_putc;
5666  i_getc = std_getc;
5667  i_ungetc = std_ungetc;
5668  i_bgetc = std_getc;
5669  i_bungetc = std_ungetc;
5670  o_mputc = std_putc;
5671  i_mgetc = std_getc;
5672  i_mungetc = std_ungetc;
5673  i_mgetc_buf = std_getc;
5674  i_mungetc_buf = std_ungetc;
5675  output_mode = ASCII;
5676  input_mode = ASCII;
5677  mime_decode_mode = FALSE;
5678  file_out_f = FALSE;
5679  eolmode_f = 0;
5680  input_eol = 0;
5681  prev_cr = 0;
5682  option_mode = 0;
5683  z_prev2=0,z_prev1=0;
5684 #ifdef CHECK_OPTION
5685  iconv_for_check = 0;
5686 #endif
5687  input_codename = NULL;
5688  input_encoding = NULL;
5689  output_encoding = NULL;
5690  nkf_state_init();
5691 #ifdef WIN32DLL
5692  reinitdll();
5693 #endif /*WIN32DLL*/
5694 }
5695 
5696 static int
5697 module_connection(void)
5698 {
5699  if (input_encoding) set_input_encoding(input_encoding);
5700  if (!output_encoding) {
5701  output_encoding = nkf_default_encoding();
5702  }
5703  if (!output_encoding) {
5704  if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5705  else return -1;
5706  }
5707  set_output_encoding(output_encoding);
5708  oconv = nkf_enc_to_oconv(output_encoding);
5709  o_putc = std_putc;
5710  if (nkf_enc_unicode_p(output_encoding))
5711  output_mode = UTF_8;
5712 
5713  if (x0201_f == NKF_UNSPECIFIED) {
5714  x0201_f = X0201_DEFAULT;
5715  }
5716 
5717  /* replace continuation module, from output side */
5718 
5719  /* output redirection */
5720 #ifdef CHECK_OPTION
5721  if (noout_f || guess_f){
5722  o_putc = no_putc;
5723  }
5724 #endif
5725  if (mimeout_f) {
5726  o_mputc = o_putc;
5727  o_putc = mime_putc;
5728  if (mimeout_f == TRUE) {
5729  o_base64conv = oconv; oconv = base64_conv;
5730  }
5731  /* base64_count = 0; */
5732  }
5733 
5734  if (eolmode_f || guess_f) {
5735  o_eol_conv = oconv; oconv = eol_conv;
5736  }
5737  if (rot_f) {
5738  o_rot_conv = oconv; oconv = rot_conv;
5739  }
5740  if (iso2022jp_f) {
5741  o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5742  }
5743  if (hira_f) {
5744  o_hira_conv = oconv; oconv = hira_conv;
5745  }
5746  if (fold_f) {
5747  o_fconv = oconv; oconv = fold_conv;
5748  f_line = 0;
5749  }
5750  if (alpha_f || x0201_f) {
5751  o_zconv = oconv; oconv = z_conv;
5752  }
5753 
5754  i_getc = std_getc;
5755  i_ungetc = std_ungetc;
5756  /* input redirection */
5757 #ifdef INPUT_OPTION
5758  if (cap_f){
5759  i_cgetc = i_getc; i_getc = cap_getc;
5760  i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5761  }
5762  if (url_f){
5763  i_ugetc = i_getc; i_getc = url_getc;
5764  i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5765  }
5766 #endif
5767 #ifdef NUMCHAR_OPTION
5768  if (numchar_f){
5769  i_ngetc = i_getc; i_getc = numchar_getc;
5770  i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5771  }
5772 #endif
5773 #ifdef UNICODE_NORMALIZATION
5774  if (nfc_f){
5775  i_nfc_getc = i_getc; i_getc = nfc_getc;
5776  i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5777  }
5778 #endif
5779  if (mime_f && mimebuf_f==FIXED_MIME) {
5780  i_mgetc = i_getc; i_getc = mime_getc;
5781  i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5782  }
5783  if (broken_f & 1) {
5784  i_bgetc = i_getc; i_getc = broken_getc;
5785  i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5786  }
5787  if (input_encoding) {
5788  set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5789  } else {
5790  set_iconv(FALSE, e_iconv);
5791  }
5792 
5793  {
5794  struct input_code *p = input_code_list;
5795  while (p->name){
5796  status_reinit(p++);
5797  }
5798  }
5799  return 0;
5800 }
5801 
5802 /*
5803  Conversion main loop. Code detection only.
5804  */
5805 
5806 #if !defined(PERL_XS) && !defined(WIN32DLL)
5807 static nkf_char
5808 noconvert(FILE *f)
5809 {
5810  nkf_char c;
5811 
5812  if (nop_f == 2)
5813  module_connection();
5814  while ((c = (*i_getc)(f)) != EOF)
5815  (*o_putc)(c);
5816  (*o_putc)(EOF);
5817  return 1;
5818 }
5819 #endif
5820 
5821 #define NEXT continue /* no output, get next */
5822 #define SKIP c2=0;continue /* no output, get next */
5823 #define MORE c2=c1;continue /* need one more byte */
5824 #define SEND (void)0 /* output c1 and c2, get next */
5825 #define LAST break /* end of loop, go closing */
5826 #define set_input_mode(mode) do { \
5827  input_mode = mode; \
5828  shift_mode = 0; \
5829  set_input_codename("ISO-2022-JP"); \
5830  debug("ISO-2022-JP"); \
5831 } while (0)
5832 
5833 static int
5834 kanji_convert(FILE *f)
5835 {
5836  nkf_char c1=0, c2=0, c3=0, c4=0;
5837  int shift_mode = 0; /* 0, 1, 2, 3 */
5838  int g2 = 0;
5839  int is_8bit = FALSE;
5840 
5841  if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5842  is_8bit = TRUE;
5843  }
5844 
5845  input_mode = ASCII;
5846  output_mode = ASCII;
5847 
5848  if (module_connection() < 0) {
5849 #if !defined(PERL_XS) && !defined(WIN32DLL)
5850  fprintf(stderr, "no output encoding given\n");
5851 #endif
5852  return -1;
5853  }
5854  check_bom(f);
5855 
5856 #ifdef UTF8_INPUT_ENABLE
5857  if(iconv == w_iconv32){
5858  while ((c1 = (*i_getc)(f)) != EOF &&
5859  (c2 = (*i_getc)(f)) != EOF &&
5860  (c3 = (*i_getc)(f)) != EOF &&
5861  (c4 = (*i_getc)(f)) != EOF) {
5862  nkf_char c5, c6, c7, c8;
5863  if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5864  if ((c5 = (*i_getc)(f)) != EOF &&
5865  (c6 = (*i_getc)(f)) != EOF &&
5866  (c7 = (*i_getc)(f)) != EOF &&
5867  (c8 = (*i_getc)(f)) != EOF) {
5868  if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5869  (*i_ungetc)(c8, f);
5870  (*i_ungetc)(c7, f);
5871  (*i_ungetc)(c6, f);
5872  (*i_ungetc)(c5, f);
5873  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5874  }
5875  } else {
5876  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5877  }
5878  }
5879  }
5880  goto finished;
5881  }
5882  else if (iconv == w_iconv16) {
5883  while ((c1 = (*i_getc)(f)) != EOF &&
5884  (c2 = (*i_getc)(f)) != EOF) {
5885  size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5886  if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5887  (c3 = (*i_getc)(f)) != EOF &&
5888  (c4 = (*i_getc)(f)) != EOF) {
5889  nkf_iconv_utf_16(c1, c2, c3, c4);
5890  } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5891  if ((c3 = (*i_getc)(f)) != EOF &&
5892  (c4 = (*i_getc)(f)) != EOF) {
5893  if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5894  (*i_ungetc)(c4, f);
5895  (*i_ungetc)(c3, f);
5896  nkf_iconv_utf_16_nocombine(c1, c2);
5897  }
5898  } else {
5899  nkf_iconv_utf_16_nocombine(c1, c2);
5900  }
5901  }
5902  }
5903  goto finished;
5904  }
5905 #endif
5906 
5907  while ((c1 = (*i_getc)(f)) != EOF) {
5908 #ifdef INPUT_CODE_FIX
5909  if (!input_encoding)
5910 #endif
5911  code_status(c1);
5912  if (c2) {
5913  /* second byte */
5914  if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5915  /* in case of 8th bit is on */
5916  if (!estab_f&&!mime_decode_mode) {
5917  /* in case of not established yet */
5918  /* It is still ambiguous */
5919  if (h_conv(f, c2, c1)==EOF) {
5920  LAST;
5921  }
5922  else {
5923  SKIP;
5924  }
5925  }
5926  else {
5927  /* in case of already established */
5928  if (c1 < 0x40) {
5929  /* ignore bogus code */
5930  SKIP;
5931  } else {
5932  SEND;
5933  }
5934  }
5935  }
5936  else {
5937  /* 2nd byte of 7 bit code or SJIS */
5938  SEND;
5939  }
5940  }
5941  else if (nkf_char_unicode_p(c1)) {
5942  (*oconv)(0, c1);
5943  NEXT;
5944  }
5945  else {
5946  /* first byte */
5947  if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5948  /* CP5022x */
5949  MORE;
5950  }else if (input_codename && input_codename[0] == 'I' &&
5951  0xA1 <= c1 && c1 <= 0xDF) {
5952  /* JIS X 0201 Katakana in 8bit JIS */
5953  c2 = JIS_X_0201_1976_K;
5954  c1 &= 0x7f;
5955  SEND;
5956  } else if (c1 > DEL) {
5957  /* 8 bit code */
5958  if (!estab_f && !iso8859_f) {
5959  /* not established yet */
5960  MORE;
5961  } else { /* estab_f==TRUE */
5962  if (iso8859_f) {
5963  c2 = ISO_8859_1;
5964  c1 &= 0x7f;
5965  SEND;
5966  }
5967  else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5968  (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5969  /* JIS X 0201 */
5970  c2 = JIS_X_0201_1976_K;
5971  c1 &= 0x7f;
5972  SEND;
5973  }
5974  else {
5975  /* already established */
5976  MORE;
5977  }
5978  }
5979  } else if (SP < c1 && c1 < DEL) {
5980  /* in case of Roman characters */
5981  if (shift_mode) {
5982  /* output 1 shifted byte */
5983  if (iso8859_f) {
5984  c2 = ISO_8859_1;
5985  SEND;
5986  } else if (nkf_byte_jisx0201_katakana_p(c1)){
5987  /* output 1 shifted byte */
5988  c2 = JIS_X_0201_1976_K;
5989  SEND;
5990  } else {
5991  /* look like bogus code */
5992  SKIP;
5993  }
5994  } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5995  input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5996  /* in case of Kanji shifted */
5997  MORE;
5998  } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5999  /* Check MIME code */
6000  if ((c1 = (*i_getc)(f)) == EOF) {
6001  (*oconv)(0, '=');
6002  LAST;
6003  } else if (c1 == '?') {
6004  /* =? is mime conversion start sequence */
6005  if(mime_f == STRICT_MIME) {
6006  /* check in real detail */
6007  if (mime_begin_strict(f) == EOF)
6008  LAST;
6009  SKIP;
6010  } else if (mime_begin(f) == EOF)
6011  LAST;
6012  SKIP;
6013  } else {
6014  (*oconv)(0, '=');
6015  (*i_ungetc)(c1,f);
6016  SKIP;
6017  }
6018  } else {
6019  /* normal ASCII code */
6020  SEND;
6021  }
6022  } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
6023  shift_mode = 0;
6024  SKIP;
6025  } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
6026  shift_mode = 1;
6027  SKIP;
6028  } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
6029  if ((c1 = (*i_getc)(f)) == EOF) {
6030  (*oconv)(0, ESC);
6031  LAST;
6032  }
6033  else if (c1 == '&') {
6034  /* IRR */
6035  if ((c1 = (*i_getc)(f)) == EOF) {
6036  LAST;
6037  } else {
6038  SKIP;
6039  }
6040  }
6041  else if (c1 == '$') {
6042  /* GZDMx */
6043  if ((c1 = (*i_getc)(f)) == EOF) {
6044  /* don't send bogus code
6045  (*oconv)(0, ESC);
6046  (*oconv)(0, '$'); */
6047  LAST;
6048  } else if (c1 == '@' || c1 == 'B') {
6049  /* JIS X 0208 */
6051  SKIP;
6052  } else if (c1 == '(') {
6053  /* GZDM4 */
6054  if ((c1 = (*i_getc)(f)) == EOF) {
6055  /* don't send bogus code
6056  (*oconv)(0, ESC);
6057  (*oconv)(0, '$');
6058  (*oconv)(0, '(');
6059  */
6060  LAST;
6061  } else if (c1 == '@'|| c1 == 'B') {
6062  /* JIS X 0208 */
6064  SKIP;
6065 #ifdef X0212_ENABLE
6066  } else if (c1 == 'D'){
6068  SKIP;
6069 #endif /* X0212_ENABLE */
6070  } else if (c1 == 'O' || c1 == 'Q'){
6072  SKIP;
6073  } else if (c1 == 'P'){
6075  SKIP;
6076  } else {
6077  /* could be some special code */
6078  (*oconv)(0, ESC);
6079  (*oconv)(0, '$');
6080  (*oconv)(0, '(');
6081  (*oconv)(0, c1);
6082  SKIP;
6083  }
6084  } else if (broken_f&0x2) {
6085  /* accept any ESC-(-x as broken code ... */
6086  input_mode = JIS_X_0208;
6087  shift_mode = 0;
6088  SKIP;
6089  } else {
6090  (*oconv)(0, ESC);
6091  (*oconv)(0, '$');
6092  (*oconv)(0, c1);
6093  SKIP;
6094  }
6095  } else if (c1 == '(') {
6096  /* GZD4 */
6097  if ((c1 = (*i_getc)(f)) == EOF) {
6098  /* don't send bogus code
6099  (*oconv)(0, ESC);
6100  (*oconv)(0, '('); */
6101  LAST;
6102  }
6103  else if (c1 == 'I') {
6104  /* JIS X 0201 Katakana */
6106  shift_mode = 1;
6107  SKIP;
6108  }
6109  else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
6110  /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
6112  SKIP;
6113  }
6114  else if (broken_f&0x2) {
6116  SKIP;
6117  }
6118  else {
6119  (*oconv)(0, ESC);
6120  (*oconv)(0, '(');
6121  SEND;
6122  }
6123  }
6124  else if (c1 == '.') {
6125  /* G2D6 */
6126  if ((c1 = (*i_getc)(f)) == EOF) {
6127  LAST;
6128  }
6129  else if (c1 == 'A') {
6130  /* ISO-8859-1 */
6131  g2 = ISO_8859_1;
6132  SKIP;
6133  }
6134  else {
6135  (*oconv)(0, ESC);
6136  (*oconv)(0, '.');
6137  SEND;
6138  }
6139  }
6140  else if (c1 == 'N') {
6141  /* SS2 */
6142  c1 = (*i_getc)(f);
6143  if (g2 == ISO_8859_1) {
6144  c2 = ISO_8859_1;
6145  SEND;
6146  }else{
6147  (*i_ungetc)(c1, f);
6148  /* lonely ESC */
6149  (*oconv)(0, ESC);
6150  SEND;
6151  }
6152  }
6153  else {
6154  i_ungetc(c1,f);
6155  /* lonely ESC */
6156  (*oconv)(0, ESC);
6157  SKIP;
6158  }
6159  } else if (c1 == ESC && iconv == s_iconv) {
6160  /* ESC in Shift_JIS */
6161  if ((c1 = (*i_getc)(f)) == EOF) {
6162  (*oconv)(0, ESC);
6163  LAST;
6164  } else if (c1 == '$') {
6165  /* J-PHONE emoji */
6166  if ((c1 = (*i_getc)(f)) == EOF) {
6167  LAST;
6168  } else if (('E' <= c1 && c1 <= 'G') ||
6169  ('O' <= c1 && c1 <= 'Q')) {
6170  /*
6171  NUM : 0 1 2 3 4 5
6172  BYTE: G E F O P Q
6173  C%7 : 1 6 0 2 3 4
6174  C%7 : 0 1 2 3 4 5 6
6175  NUM : 2 0 3 4 5 X 1
6176  */
6177  static const nkf_char jphone_emoji_first_table[7] =
6178  {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6179  c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
6180  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6181  while (SP <= c1 && c1 <= 'z') {
6182  (*oconv)(0, c1 + c3);
6183  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6184  }
6185  SKIP;
6186  }
6187  else {
6188  (*oconv)(0, ESC);
6189  (*oconv)(0, '$');
6190  SEND;
6191  }
6192  }
6193  else {
6194  i_ungetc(c1,f);
6195  /* lonely ESC */
6196  (*oconv)(0, ESC);
6197  SKIP;
6198  }
6199  } else if (c1 == LF || c1 == CR) {
6200  if (broken_f&4) {
6201  input_mode = ASCII; set_iconv(FALSE, 0);
6202  SEND;
6203  } else if (mime_decode_f && !mime_decode_mode){
6204  if (c1 == LF) {
6205  if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
6206  i_ungetc(SP,f);
6207  continue;
6208  } else {
6209  i_ungetc(c1,f);
6210  }
6211  c1 = LF;
6212  SEND;
6213  } else { /* if (c1 == CR)*/
6214  if ((c1=(*i_getc)(f))!=EOF) {
6215  if (c1==SP) {
6216  i_ungetc(SP,f);
6217  continue;
6218  } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
6219  i_ungetc(SP,f);
6220  continue;
6221  } else {
6222  i_ungetc(c1,f);
6223  }
6224  i_ungetc(LF,f);
6225  } else {
6226  i_ungetc(c1,f);
6227  }
6228  c1 = CR;
6229  SEND;
6230  }
6231  }
6232  } else
6233  SEND;
6234  }
6235  /* send: */
6236  switch(input_mode){
6237  case ASCII:
6238  switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
6239  case -2:
6240  /* 4 bytes UTF-8 */
6241  if ((c3 = (*i_getc)(f)) != EOF) {
6242  code_status(c3);
6243  c3 <<= 8;
6244  if ((c4 = (*i_getc)(f)) != EOF) {
6245  code_status(c4);
6246  (*iconv)(c2, c1, c3|c4);
6247  }
6248  }
6249  break;
6250  case -3:
6251  /* 4 bytes UTF-8 (check combining character) */
6252  if ((c3 = (*i_getc)(f)) != EOF) {
6253  if ((c4 = (*i_getc)(f)) != EOF) {
6254  if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6255  (*i_ungetc)(c4, f);
6256  (*i_ungetc)(c3, f);
6257  w_iconv_nocombine(c2, c1, 0);
6258  }
6259  } else {
6260  (*i_ungetc)(c3, f);
6261  w_iconv_nocombine(c2, c1, 0);
6262  }
6263  } else {
6264  w_iconv_nocombine(c2, c1, 0);
6265  }
6266  break;
6267  case -1:
6268  /* 3 bytes EUC or UTF-8 */
6269  if ((c3 = (*i_getc)(f)) != EOF) {
6270  code_status(c3);
6271  if ((*iconv)(c2, c1, c3) == -3) {
6272  /* 6 bytes UTF-8 (check combining character) */
6273  nkf_char c5, c6;
6274  if ((c4 = (*i_getc)(f)) != EOF) {
6275  if ((c5 = (*i_getc)(f)) != EOF) {
6276  if ((c6 = (*i_getc)(f)) != EOF) {
6277  if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6278  (*i_ungetc)(c6, f);
6279  (*i_ungetc)(c5, f);
6280  (*i_ungetc)(c4, f);
6281  w_iconv_nocombine(c2, c1, c3);
6282  }
6283  } else {
6284  (*i_ungetc)(c5, f);
6285  (*i_ungetc)(c4, f);
6286  w_iconv_nocombine(c2, c1, c3);
6287  }
6288  } else {
6289  (*i_ungetc)(c4, f);
6290  w_iconv_nocombine(c2, c1, c3);
6291  }
6292  } else {
6293  w_iconv_nocombine(c2, c1, c3);
6294  }
6295  }
6296  }
6297  break;
6298  }
6299  break;
6300  case JIS_X_0208:
6301  case JIS_X_0213_1:
6302  if (ms_ucs_map_f &&
6303  0x7F <= c2 && c2 <= 0x92 &&
6304  0x21 <= c1 && c1 <= 0x7E) {
6305  /* CP932 UDC */
6306  c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
6307  c2 = 0;
6308  }
6309  (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
6310  break;
6311 #ifdef X0212_ENABLE
6312  case JIS_X_0212:
6313  (*oconv)(PREFIX_EUCG3 | c2, c1);
6314  break;
6315 #endif /* X0212_ENABLE */
6316  case JIS_X_0213_2:
6317  (*oconv)(PREFIX_EUCG3 | c2, c1);
6318  break;
6319  default:
6320  (*oconv)(input_mode, c1); /* other special case */
6321  }
6322 
6323  c2 = 0;
6324  c3 = 0;
6325  continue;
6326  /* goto next_word */
6327  }
6328 
6329 finished:
6330  /* epilogue */
6331  (*iconv)(EOF, 0, 0);
6332  if (!input_codename)
6333  {
6334  if (is_8bit) {
6335  struct input_code *p = input_code_list;
6336  struct input_code *result = p;
6337  while (p->name){
6338  if (p->score < result->score) result = p;
6339  ++p;
6340  }
6341  set_input_codename(result->name);
6342 #ifdef CHECK_OPTION
6343  debug(result->name);
6344 #endif
6345  }
6346  }
6347  return 0;
6348 }
6349 
6350 /*
6351  * int options(unsigned char *cp)
6352  *
6353  * return values:
6354  * 0: success
6355  * -1: ArgumentError
6356  */
6357 static int
6358 options(unsigned char *cp)
6359 {
6360  nkf_char i, j;
6361  unsigned char *p;
6362  unsigned char *cp_back = NULL;
6363  nkf_encoding *enc;
6364 
6365  if (option_mode==1)
6366  return 0;
6367  while(*cp && *cp++!='-');
6368  while (*cp || cp_back) {
6369  if(!*cp){
6370  cp = cp_back;
6371  cp_back = NULL;
6372  continue;
6373  }
6374  p = 0;
6375  switch (*cp++) {
6376  case '-': /* literal options */
6377  if (!*cp || *cp == SP) { /* ignore the rest of arguments */
6378  option_mode = 1;
6379  return 0;
6380  }
6381  for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
6382  p = (unsigned char *)long_option[i].name;
6383  for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
6384  if (*p == cp[j] || cp[j] == SP){
6385  p = &cp[j] + 1;
6386  break;
6387  }
6388  p = 0;
6389  }
6390  if (p == 0) {
6391 #if !defined(PERL_XS) && !defined(WIN32DLL)
6392  fprintf(stderr, "unknown long option: --%s\n", cp);
6393 #endif
6394  return -1;
6395  }
6396  while(*cp && *cp != SP && cp++);
6397  if (long_option[i].alias[0]){
6398  cp_back = cp;
6399  cp = (unsigned char *)long_option[i].alias;
6400  }else{
6401 #ifndef PERL_XS
6402  if (strcmp(long_option[i].name, "help") == 0){
6403  usage();
6404  exit(EXIT_SUCCESS);
6405  }
6406 #endif
6407  if (strcmp(long_option[i].name, "ic=") == 0){
6408  enc = nkf_enc_find((char *)p);
6409  if (!enc) continue;
6410  input_encoding = enc;
6411  continue;
6412  }
6413  if (strcmp(long_option[i].name, "oc=") == 0){
6414  enc = nkf_enc_find((char *)p);
6415  /* if (enc <= 0) continue; */
6416  if (!enc) continue;
6417  output_encoding = enc;
6418  continue;
6419  }
6420  if (strcmp(long_option[i].name, "guess=") == 0){
6421  if (p[0] == '0' || p[0] == '1') {
6422  guess_f = 1;
6423  } else {
6424  guess_f = 2;
6425  }
6426  continue;
6427  }
6428 #ifdef OVERWRITE
6429  if (strcmp(long_option[i].name, "overwrite") == 0){
6430  file_out_f = TRUE;
6431  overwrite_f = TRUE;
6432  preserve_time_f = TRUE;
6433  continue;
6434  }
6435  if (strcmp(long_option[i].name, "overwrite=") == 0){
6436  file_out_f = TRUE;
6437  overwrite_f = TRUE;
6438  preserve_time_f = TRUE;
6439  backup_f = TRUE;
6440  backup_suffix = (char *)p;
6441  continue;
6442  }
6443  if (strcmp(long_option[i].name, "in-place") == 0){
6444  file_out_f = TRUE;
6445  overwrite_f = TRUE;
6446  preserve_time_f = FALSE;
6447  continue;
6448  }
6449  if (strcmp(long_option[i].name, "in-place=") == 0){
6450  file_out_f = TRUE;
6451  overwrite_f = TRUE;
6452  preserve_time_f = FALSE;
6453  backup_f = TRUE;
6454  backup_suffix = (char *)p;
6455  continue;
6456  }
6457 #endif
6458 #ifdef INPUT_OPTION
6459  if (strcmp(long_option[i].name, "cap-input") == 0){
6460  cap_f = TRUE;
6461  continue;
6462  }
6463  if (strcmp(long_option[i].name, "url-input") == 0){
6464  url_f = TRUE;
6465  continue;
6466  }
6467 #endif
6468 #ifdef NUMCHAR_OPTION
6469  if (strcmp(long_option[i].name, "numchar-input") == 0){
6470  numchar_f = TRUE;
6471  continue;
6472  }
6473 #endif
6474 #ifdef CHECK_OPTION
6475  if (strcmp(long_option[i].name, "no-output") == 0){
6476  noout_f = TRUE;
6477  continue;
6478  }
6479  if (strcmp(long_option[i].name, "debug") == 0){
6480  debug_f = TRUE;
6481  continue;
6482  }
6483 #endif
6484  if (strcmp(long_option[i].name, "cp932") == 0){
6485 #ifdef SHIFTJIS_CP932
6486  cp51932_f = TRUE;
6487  cp932inv_f = -TRUE;
6488 #endif
6489 #ifdef UTF8_OUTPUT_ENABLE
6490  ms_ucs_map_f = UCS_MAP_CP932;
6491 #endif
6492  continue;
6493  }
6494  if (strcmp(long_option[i].name, "no-cp932") == 0){
6495 #ifdef SHIFTJIS_CP932
6496  cp51932_f = FALSE;
6497  cp932inv_f = FALSE;
6498 #endif
6499 #ifdef UTF8_OUTPUT_ENABLE
6500  ms_ucs_map_f = UCS_MAP_ASCII;
6501 #endif
6502  continue;
6503  }
6504 #ifdef SHIFTJIS_CP932
6505  if (strcmp(long_option[i].name, "cp932inv") == 0){
6506  cp932inv_f = -TRUE;
6507  continue;
6508  }
6509 #endif
6510 
6511 #ifdef X0212_ENABLE
6512  if (strcmp(long_option[i].name, "x0212") == 0){
6513  x0212_f = TRUE;
6514  continue;
6515  }
6516 #endif
6517 
6518 #ifdef EXEC_IO
6519  if (strcmp(long_option[i].name, "exec-in") == 0){
6520  exec_f = 1;
6521  return 0;
6522  }
6523  if (strcmp(long_option[i].name, "exec-out") == 0){
6524  exec_f = -1;
6525  return 0;
6526  }
6527 #endif
6528 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6529  if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6530  no_cp932ext_f = TRUE;
6531  continue;
6532  }
6533  if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6534  no_best_fit_chars_f = TRUE;
6535  continue;
6536  }
6537  if (strcmp(long_option[i].name, "fb-skip") == 0){
6538  encode_fallback = NULL;
6539  continue;
6540  }
6541  if (strcmp(long_option[i].name, "fb-html") == 0){
6542  encode_fallback = encode_fallback_html;
6543  continue;
6544  }
6545  if (strcmp(long_option[i].name, "fb-xml") == 0){
6546  encode_fallback = encode_fallback_xml;
6547  continue;
6548  }
6549  if (strcmp(long_option[i].name, "fb-java") == 0){
6550  encode_fallback = encode_fallback_java;
6551  continue;
6552  }
6553  if (strcmp(long_option[i].name, "fb-perl") == 0){
6554  encode_fallback = encode_fallback_perl;
6555  continue;
6556  }
6557  if (strcmp(long_option[i].name, "fb-subchar") == 0){
6558  encode_fallback = encode_fallback_subchar;
6559  continue;
6560  }
6561  if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6562  encode_fallback = encode_fallback_subchar;
6563  unicode_subchar = 0;
6564  if (p[0] != '0'){
6565  /* decimal number */
6566  for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6567  unicode_subchar *= 10;
6568  unicode_subchar += hex2bin(p[i]);
6569  }
6570  }else if(p[1] == 'x' || p[1] == 'X'){
6571  /* hexadecimal number */
6572  for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6573  unicode_subchar <<= 4;
6574  unicode_subchar |= hex2bin(p[i]);
6575  }
6576  }else{
6577  /* octal number */
6578  for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6579  unicode_subchar *= 8;
6580  unicode_subchar += hex2bin(p[i]);
6581  }
6582  }
6583  w16e_conv(unicode_subchar, &i, &j);
6584  unicode_subchar = i<<8 | j;
6585  continue;
6586  }
6587 #endif
6588 #ifdef UTF8_OUTPUT_ENABLE
6589  if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6590  ms_ucs_map_f = UCS_MAP_MS;
6591  continue;
6592  }
6593 #endif
6594 #ifdef UNICODE_NORMALIZATION
6595  if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6596  nfc_f = TRUE;
6597  continue;
6598  }
6599 #endif
6600  if (strcmp(long_option[i].name, "prefix=") == 0){
6601  if (nkf_isgraph(p[0])){
6602  for (i = 1; nkf_isgraph(p[i]); i++){
6603  prefix_table[p[i]] = p[0];
6604  }
6605  }
6606  continue;
6607  }
6608 #if !defined(PERL_XS) && !defined(WIN32DLL)
6609  fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6610 #endif
6611  return -1;
6612  }
6613  continue;
6614  case 'b': /* buffered mode */
6615  unbuf_f = FALSE;
6616  continue;
6617  case 'u': /* non bufferd mode */
6618  unbuf_f = TRUE;
6619  continue;
6620  case 't': /* transparent mode */
6621  if (*cp=='1') {
6622  /* alias of -t */
6623  cp++;
6624  nop_f = TRUE;
6625  } else if (*cp=='2') {
6626  /*
6627  * -t with put/get
6628  *
6629  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6630  *
6631  */
6632  cp++;
6633  nop_f = 2;
6634  } else
6635  nop_f = TRUE;
6636  continue;
6637  case 'j': /* JIS output */
6638  case 'n':
6639  output_encoding = nkf_enc_from_index(ISO_2022_JP);
6640  continue;
6641  case 'e': /* AT&T EUC output */
6642  output_encoding = nkf_enc_from_index(EUCJP_NKF);
6643  continue;
6644  case 's': /* SJIS output */
6645  output_encoding = nkf_enc_from_index(SHIFT_JIS);
6646  continue;
6647  case 'l': /* ISO8859 Latin-1 support, no conversion */
6648  iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6649  input_encoding = nkf_enc_from_index(ISO_8859_1);
6650  continue;
6651  case 'i': /* Kanji IN ESC-$-@/B */
6652  if (*cp=='@'||*cp=='B')
6653  kanji_intro = *cp++;
6654  continue;
6655  case 'o': /* ASCII IN ESC-(-J/B/H */
6656  /* ESC ( H was used in initial JUNET messages */
6657  if (*cp=='J'||*cp=='B'||*cp=='H')
6658  ascii_intro = *cp++;
6659  continue;
6660  case 'h':
6661  /*
6662  bit:1 katakana->hiragana
6663  bit:2 hiragana->katakana
6664  */
6665  if ('9'>= *cp && *cp>='0')
6666  hira_f |= (*cp++ -'0');
6667  else
6668  hira_f |= 1;
6669  continue;
6670  case 'r':
6671  rot_f = TRUE;
6672  continue;
6673 #if defined(MSDOS) || defined(__OS2__)
6674  case 'T':
6675  binmode_f = FALSE;
6676  continue;
6677 #endif
6678 #ifndef PERL_XS
6679  case 'V':
6680  show_configuration();
6681  exit(EXIT_SUCCESS);
6682  break;
6683  case 'v':
6684  version();
6685  exit(EXIT_SUCCESS);
6686  break;
6687 #endif
6688 #ifdef UTF8_OUTPUT_ENABLE
6689  case 'w': /* UTF-{8,16,32} output */
6690  if (cp[0] == '8') {
6691  cp++;
6692  if (cp[0] == '0'){
6693  cp++;
6694  output_encoding = nkf_enc_from_index(UTF_8N);
6695  } else {
6696  output_bom_f = TRUE;
6697  output_encoding = nkf_enc_from_index(UTF_8_BOM);
6698  }
6699  } else {
6700  int enc_idx;
6701  if ('1'== cp[0] && '6'==cp[1]) {
6702  cp += 2;
6703  enc_idx = UTF_16;
6704  } else if ('3'== cp[0] && '2'==cp[1]) {
6705  cp += 2;
6706  enc_idx = UTF_32;
6707  } else {
6708  output_encoding = nkf_enc_from_index(UTF_8);
6709  continue;
6710  }
6711  if (cp[0]=='L') {
6712  cp++;
6713  output_endian = ENDIAN_LITTLE;
6714  output_bom_f = TRUE;
6715  } else if (cp[0] == 'B') {
6716  cp++;
6717  output_bom_f = TRUE;
6718  }
6719  if (cp[0] == '0'){
6720  output_bom_f = FALSE;
6721  cp++;
6722  enc_idx = enc_idx == UTF_16
6723  ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6724  : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6725  } else {
6726  enc_idx = enc_idx == UTF_16
6727  ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6728  : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6729  }
6730  output_encoding = nkf_enc_from_index(enc_idx);
6731  }
6732  continue;
6733 #endif
6734 #ifdef UTF8_INPUT_ENABLE
6735  case 'W': /* UTF input */
6736  if (cp[0] == '8') {
6737  cp++;
6738  input_encoding = nkf_enc_from_index(UTF_8);
6739  }else{
6740  int enc_idx;
6741  if ('1'== cp[0] && '6'==cp[1]) {
6742  cp += 2;
6743  input_endian = ENDIAN_BIG;
6744  enc_idx = UTF_16;
6745  } else if ('3'== cp[0] && '2'==cp[1]) {
6746  cp += 2;
6747  input_endian = ENDIAN_BIG;
6748  enc_idx = UTF_32;
6749  } else {
6750  input_encoding = nkf_enc_from_index(UTF_8);
6751  continue;
6752  }
6753  if (cp[0]=='L') {
6754  cp++;
6755  input_endian = ENDIAN_LITTLE;
6756  } else if (cp[0] == 'B') {
6757  cp++;
6758  input_endian = ENDIAN_BIG;
6759  }
6760  enc_idx = (enc_idx == UTF_16
6761  ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6762  : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6763  input_encoding = nkf_enc_from_index(enc_idx);
6764  }
6765  continue;
6766 #endif
6767  /* Input code assumption */
6768  case 'J': /* ISO-2022-JP input */
6769  input_encoding = nkf_enc_from_index(ISO_2022_JP);
6770  continue;
6771  case 'E': /* EUC-JP input */
6772  input_encoding = nkf_enc_from_index(EUCJP_NKF);
6773  continue;
6774  case 'S': /* Shift_JIS input */
6775  input_encoding = nkf_enc_from_index(SHIFT_JIS);
6776  continue;
6777  case 'Z': /* Convert X0208 alphabet to asii */
6778  /* alpha_f
6779  bit:0 Convert JIS X 0208 Alphabet to ASCII
6780  bit:1 Convert Kankaku to one space
6781  bit:2 Convert Kankaku to two spaces
6782  bit:3 Convert HTML Entity
6783  bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6784  */
6785  while ('0'<= *cp && *cp <='4') {
6786  alpha_f |= 1 << (*cp++ - '0');
6787  }
6788  alpha_f |= 1;
6789  continue;
6790  case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6791  x0201_f = FALSE; /* No X0201->X0208 conversion */
6792  /* accept X0201
6793  ESC-(-I in JIS, EUC, MS Kanji
6794  SI/SO in JIS, EUC, MS Kanji
6795  SS2 in EUC, JIS, not in MS Kanji
6796  MS Kanji (0xa0-0xdf)
6797  output X0201
6798  ESC-(-I in JIS (0x20-0x5f)
6799  SS2 in EUC (0xa0-0xdf)
6800  0xa0-0xd in MS Kanji (0xa0-0xdf)
6801  */
6802  continue;
6803  case 'X': /* Convert X0201 kana to X0208 */
6804  x0201_f = TRUE;
6805  continue;
6806  case 'F': /* prserve new lines */
6807  fold_preserve_f = TRUE;
6808  case 'f': /* folding -f60 or -f */
6809  fold_f = TRUE;
6810  fold_len = 0;
6811  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6812  fold_len *= 10;
6813  fold_len += *cp++ - '0';
6814  }
6815  if (!(0<fold_len && fold_len<BUFSIZ))
6816  fold_len = DEFAULT_FOLD;
6817  if (*cp=='-') {
6818  fold_margin = 0;
6819  cp++;
6820  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6821  fold_margin *= 10;
6822  fold_margin += *cp++ - '0';
6823  }
6824  }
6825  continue;
6826  case 'm': /* MIME support */
6827  /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6828  if (*cp=='B'||*cp=='Q') {
6829  mime_decode_mode = *cp++;
6830  mimebuf_f = FIXED_MIME;
6831  } else if (*cp=='N') {
6832  mime_f = TRUE; cp++;
6833  } else if (*cp=='S') {
6834  mime_f = STRICT_MIME; cp++;
6835  } else if (*cp=='0') {
6836  mime_decode_f = FALSE;
6837  mime_f = FALSE; cp++;
6838  } else {
6839  mime_f = STRICT_MIME;
6840  }
6841  continue;
6842  case 'M': /* MIME output */
6843  if (*cp=='B') {
6844  mimeout_mode = 'B';
6845  mimeout_f = FIXED_MIME; cp++;
6846  } else if (*cp=='Q') {
6847  mimeout_mode = 'Q';
6848  mimeout_f = FIXED_MIME; cp++;
6849  } else {
6850  mimeout_f = TRUE;
6851  }
6852  continue;
6853  case 'B': /* Broken JIS support */
6854  /* bit:0 no ESC JIS
6855  bit:1 allow any x on ESC-(-x or ESC-$-x
6856  bit:2 reset to ascii on NL
6857  */
6858  if ('9'>= *cp && *cp>='0')
6859  broken_f |= 1<<(*cp++ -'0');
6860  else
6861  broken_f |= TRUE;
6862  continue;
6863 #ifndef PERL_XS
6864  case 'O':/* for Output file */
6865  file_out_f = TRUE;
6866  continue;
6867 #endif
6868  case 'c':/* add cr code */
6869  eolmode_f = CRLF;
6870  continue;
6871  case 'd':/* delete cr code */
6872  eolmode_f = LF;
6873  continue;
6874  case 'I': /* ISO-2022-JP output */
6875  iso2022jp_f = TRUE;
6876  continue;
6877  case 'L': /* line mode */
6878  if (*cp=='u') { /* unix */
6879  eolmode_f = LF; cp++;
6880  } else if (*cp=='m') { /* mac */
6881  eolmode_f = CR; cp++;
6882  } else if (*cp=='w') { /* windows */
6883  eolmode_f = CRLF; cp++;
6884  } else if (*cp=='0') { /* no conversion */
6885  eolmode_f = 0; cp++;
6886  }
6887  continue;
6888 #ifndef PERL_XS
6889  case 'g':
6890  if ('2' <= *cp && *cp <= '9') {
6891  guess_f = 2;
6892  cp++;
6893  } else if (*cp == '0' || *cp == '1') {
6894  guess_f = 1;
6895  cp++;
6896  } else {
6897  guess_f = 1;
6898  }
6899  continue;
6900 #endif
6901  case SP:
6902  /* module multiple options in a string are allowed for Perl module */
6903  while(*cp && *cp++!='-');
6904  continue;
6905  default:
6906 #if !defined(PERL_XS) && !defined(WIN32DLL)
6907  fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6908 #endif
6909  /* bogus option but ignored */
6910  return -1;
6911  }
6912  }
6913  return 0;
6914 }
6915 
6916 #ifdef WIN32DLL
6917 #include "nkf32dll.c"
6918 #elif defined(PERL_XS)
6919 #else /* WIN32DLL */
6920 int
6921 main(int argc, char **argv)
6922 {
6923  FILE *fin;
6924  unsigned char *cp;
6925 
6926  char *outfname = NULL;
6927  char *origfname;
6928 
6929 #ifdef EASYWIN /*Easy Win */
6930  _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6931 #endif
6932 #ifdef DEFAULT_CODE_LOCALE
6933  setlocale(LC_CTYPE, "");
6934 #endif
6935  nkf_state_init();
6936 
6937  for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6938  cp = (unsigned char *)*argv;
6939  options(cp);
6940 #ifdef EXEC_IO
6941  if (exec_f){
6942  int fds[2], pid;
6943  if (pipe(fds) < 0 || (pid = fork()) < 0){
6944  abort();
6945  }
6946  if (pid == 0){
6947  if (exec_f > 0){
6948  close(fds[0]);
6949  dup2(fds[1], 1);
6950  }else{
6951  close(fds[1]);
6952  dup2(fds[0], 0);
6953  }
6954  execvp(argv[1], &argv[1]);
6955  }
6956  if (exec_f > 0){
6957  close(fds[1]);
6958  dup2(fds[0], 0);
6959  }else{
6960  close(fds[0]);
6961  dup2(fds[1], 1);
6962  }
6963  argc = 0;
6964  break;
6965  }
6966 #endif
6967  }
6968 
6969  if (guess_f) {
6970 #ifdef CHECK_OPTION
6971  int debug_f_back = debug_f;
6972 #endif
6973 #ifdef EXEC_IO
6974  int exec_f_back = exec_f;
6975 #endif
6976 #ifdef X0212_ENABLE
6977  int x0212_f_back = x0212_f;
6978 #endif
6979  int x0213_f_back = x0213_f;
6980  int guess_f_back = guess_f;
6981  reinit();
6982  guess_f = guess_f_back;
6983  mime_f = FALSE;
6984 #ifdef CHECK_OPTION
6985  debug_f = debug_f_back;
6986 #endif
6987 #ifdef EXEC_IO
6988  exec_f = exec_f_back;
6989 #endif
6990  x0212_f = x0212_f_back;
6991  x0213_f = x0213_f_back;
6992  }
6993 
6994  if (binmode_f == TRUE)
6995 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6996  if (freopen("","wb",stdout) == NULL)
6997  return (-1);
6998 #else
6999  setbinmode(stdout);
7000 #endif
7001 
7002  if (unbuf_f)
7003  setbuf(stdout, (char *) NULL);
7004  else
7005  setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
7006 
7007  if (argc == 0) {
7008  if (binmode_f == TRUE)
7009 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7010  if (freopen("","rb",stdin) == NULL) return (-1);
7011 #else
7012  setbinmode(stdin);
7013 #endif
7014  setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
7015  if (nop_f)
7016  noconvert(stdin);
7017  else {
7018  kanji_convert(stdin);
7019  if (guess_f) print_guessed_code(NULL);
7020  }
7021  } else {
7022  int nfiles = argc;
7023  int is_argument_error = FALSE;
7024  while (argc--) {
7025  input_codename = NULL;
7026  input_eol = 0;
7027 #ifdef CHECK_OPTION
7028  iconv_for_check = 0;
7029 #endif
7030  if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
7031  perror(*(argv-1));
7032  is_argument_error = TRUE;
7033  continue;
7034  } else {
7035 #ifdef OVERWRITE
7036  int fd = 0;
7037  int fd_backup = 0;
7038 #endif
7039 
7040  /* reopen file for stdout */
7041  if (file_out_f == TRUE) {
7042 #ifdef OVERWRITE
7043  if (overwrite_f){
7044  outfname = nkf_xmalloc(strlen(origfname)
7045  + strlen(".nkftmpXXXXXX")
7046  + 1);
7047  strcpy(outfname, origfname);
7048 #ifdef MSDOS
7049  {
7050  int i;
7051  for (i = strlen(outfname); i; --i){
7052  if (outfname[i - 1] == '/'
7053  || outfname[i - 1] == '\\'){
7054  break;
7055  }
7056  }
7057  outfname[i] = '\0';
7058  }
7059  strcat(outfname, "ntXXXXXX");
7060  mktemp(outfname);
7061  fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7062  S_IREAD | S_IWRITE);
7063 #else
7064  strcat(outfname, ".nkftmpXXXXXX");
7065  fd = mkstemp(outfname);
7066 #endif
7067  if (fd < 0
7068  || (fd_backup = dup(fileno(stdout))) < 0
7069  || dup2(fd, fileno(stdout)) < 0
7070  ){
7071  perror(origfname);
7072  return -1;
7073  }
7074  }else
7075 #endif
7076  if(argc == 1) {
7077  outfname = *argv++;
7078  argc--;
7079  } else {
7080  outfname = "nkf.out";
7081  }
7082 
7083  if(freopen(outfname, "w", stdout) == NULL) {
7084  perror (outfname);
7085  return (-1);
7086  }
7087  if (binmode_f == TRUE) {
7088 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7089  if (freopen("","wb",stdout) == NULL)
7090  return (-1);
7091 #else
7092  setbinmode(stdout);
7093 #endif
7094  }
7095  }
7096  if (binmode_f == TRUE)
7097 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7098  if (freopen("","rb",fin) == NULL)
7099  return (-1);
7100 #else
7101  setbinmode(fin);
7102 #endif
7103  setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
7104  if (nop_f)
7105  noconvert(fin);
7106  else {
7107  char *filename = NULL;
7108  kanji_convert(fin);
7109  if (nfiles > 1) filename = origfname;
7110  if (guess_f) print_guessed_code(filename);
7111  }
7112  fclose(fin);
7113 #ifdef OVERWRITE
7114  if (overwrite_f) {
7115  struct stat sb;
7116 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7117  time_t tb[2];
7118 #else
7119  struct utimbuf tb;
7120 #endif
7121 
7122  fflush(stdout);
7123  close(fd);
7124  if (dup2(fd_backup, fileno(stdout)) < 0){
7125  perror("dup2");
7126  }
7127  if (stat(origfname, &sb)) {
7128  fprintf(stderr, "Can't stat %s\n", origfname);
7129  }
7130  /* $B%Q!<%_%C%7%g%s$rI|85(B */
7131  if (chmod(outfname, sb.st_mode)) {
7132  fprintf(stderr, "Can't set permission %s\n", outfname);
7133  }
7134 
7135  /* $B%?%$%`%9%?%s%W$rI|85(B */
7136  if(preserve_time_f){
7137 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7138  tb[0] = tb[1] = sb.st_mtime;
7139  if (utime(outfname, tb)) {
7140  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7141  }
7142 #else
7143  tb.actime = sb.st_atime;
7144  tb.modtime = sb.st_mtime;
7145  if (utime(outfname, &tb)) {
7146  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7147  }
7148 #endif
7149  }
7150  if(backup_f){
7151  char *backup_filename = get_backup_filename(backup_suffix, origfname);
7152 #ifdef MSDOS
7153  unlink(backup_filename);
7154 #endif
7155  if (rename(origfname, backup_filename)) {
7156  perror(backup_filename);
7157  fprintf(stderr, "Can't rename %s to %s\n",
7158  origfname, backup_filename);
7159  }
7160  nkf_xfree(backup_filename);
7161  }else{
7162 #ifdef MSDOS
7163  if (unlink(origfname)){
7164  perror(origfname);
7165  }
7166 #endif
7167  }
7168  if (rename(outfname, origfname)) {
7169  perror(origfname);
7170  fprintf(stderr, "Can't rename %s to %s\n",
7171  outfname, origfname);
7172  }
7173  nkf_xfree(outfname);
7174  }
7175 #endif
7176  }
7177  }
7178  if (is_argument_error)
7179  return(-1);
7180  }
7181 #ifdef EASYWIN /*Easy Win */
7182  if (file_out_f == FALSE)
7183  scanf("%d",&end_check);
7184  else
7185  fclose(stdout);
7186 #else /* for Other OS */
7187  if (file_out_f == TRUE)
7188  fclose(stdout);
7189 #endif /*Easy Win */
7190  return (0);
7191 }
7192 #endif /* WIN32DLL */
#define nkf_char_unicode_new(c)
Definition: nkf.c:429
#define SP
Definition: nkf.c:75
Definition: nkf.c:98
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
Definition: utf8tbl.c:3220
#define OUTPUT_UTF16(val)
Definition: nkf.c:2845
#define output_ascii_escape_sequence(mode)
Definition: nkf.c:2539
#define OUTPUT_UTF16_BYTES(c1, c2)
Definition: nkf.c:2835
nkf_native_encoding NkfEncodingUTF_32
Definition: nkf.c:159
#define NKF_ICONV_INVALID_CODE_RANGE
Definition: nkf.c:2319
const unsigned short *const x0212_shiftjis[]
Definition: utf8tbl.c:14602
#define SCORE_iMIME
Definition: nkf.c:2950
#define FALSE
Definition: nkf.h:174
#define BS
Definition: nkf.c:70
size_t strlen(const char *)
nkf_char(* iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:332
#define NKF_ICONV_WAIT_COMBINING_CHAR
Definition: nkf.c:2320
Definition: nkf.c:96
const unsigned short *const *const utf8_to_euc_3bytes_932[]
Definition: utf8tbl.c:12572
nkf_char stat
Definition: nkf.c:327
#define NKF_RELEASE_DATE
Definition: nkf.c:24
#define SCORE_INIT
Definition: nkf.c:2953
#define INPUT_OPTION
Definition: config.h:19
#define nkf_enc_asciicompat(enc)
Definition: nkf.c:763
#define INPUT_CODE_FIX
Definition: config.h:12
#define NKF_UNSPECIFIED
Definition: nkf.c:387
#define SCORE_CP932
Definition: nkf.c:2946
#define nkf_enc_name(enc)
Definition: nkf.c:758
nkf_native_encoding NkfEncodingASCII
Definition: nkf.c:153
const int id
Definition: nkf.c:209
#define MIME_BUF_MASK
Definition: nkf.c:4306
#define nkf_isalnum(c)
Definition: nkf.c:289
Definition: nkf.c:62
nkf_buf_t * broken_buf
Definition: nkf.c:3301
const unsigned short *const utf8_to_euc_2bytes_932[]
Definition: utf8tbl.c:12470
#define LF
Definition: nkf.c:72
#define EXIT_SUCCESS
Definition: error.c:37
#define PREFIX_EUCG3
Definition: nkf.c:422
#define STD_GC_BUFSIZE
Definition: nkf.c:3308
#define nkf_enc_to_iconv(enc)
Definition: nkf.c:761
#define TAB
Definition: nkf.c:71
#define SCORE_L2
Definition: nkf.c:2943
#define UTF8_INPUT_ENABLE
Definition: config.h:5
nkf_encoding nkf_encoding_table[]
Definition: nkf.c:167
Definition: nkf.c:115
const unsigned short cp932inv[2][189]
Definition: utf8tbl.c:13624
Definition: nkf.c:90
#define nkf_char_unicode_p(c)
Definition: nkf.c:430
#define setbinmode(fp)
Definition: nkf.h:85
#define HOLD_SIZE
Definition: nkf.c:304
#define UTF16_TO_UTF32(lead, trail)
Definition: nkf.c:434
#define nkf_char_unicode_value_p(c)
Definition: nkf.c:432
const char * alias
Definition: nkf.c:1151
byte_order
Definition: nkf.c:61
#define COPY_RIGHT
Definition: nkf.c:25
#define nkf_buf_length(buf)
Definition: nkf.c:859
unsigned int last
Definition: nkf.c:4311
const unsigned short *const euc_to_utf8_2bytes[]
Definition: utf8tbl.c:3059
const int id
Definition: nkf.c:162
#define nkf_noescape_mime(c)
Definition: nkf.c:297
#define nkf_char_unicode_bmp_p(c)
Definition: nkf.c:431
Definition: nkf.c:111
const nkf_native_encoding * base_encoding
Definition: nkf.c:164
#define VALUE_MASK
Definition: nkf.c:425
#define DEFAULT_CODE_LOCALE
Definition: nkf.h:137
#define MIME_DECODE_DEFAULT
Definition: nkf.h:13
#define MORE
Definition: nkf.c:5823
#define NKF_ICONV_NOT_COMBINED
Definition: nkf.c:2321
#define OVERWRITE
Definition: config.h:16
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
Definition: nkf.c:87
#define nkf_isxdigit(c)
Definition: nkf.c:285
#define SCORE_NO_EXIST
Definition: nkf.c:2949
#define rot47(c)
Definition: nkf.c:4153
Definition: file.c:2673
#define NKF_VERSION
Definition: nkf.c:23
nkf_buf_t * std_gc_buf
Definition: nkf.c:3299
Definition: nkf.c:91
#define STRICT_MIME
Definition: nkf.c:58
nkf_char buf[3]
Definition: nkf.c:330
#define getc(f)
Definition: nkf.c:21
#define SO
Definition: nkf.c:78
#define UCS_MAP_CP932
Definition: nkf.c:349
#define SJ6394
unsigned int input
Definition: nkf.c:4312
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
Definition: utf8tbl.c:3278
const unsigned short euc_to_utf8_1byte[]
Definition: utf8tbl.c:3045
#define val
nkf_buf_t * nfc_buf
Definition: nkf.c:3303
#define SI
Definition: nkf.c:77
Definition: nkf.c:99
#define UTF8_OUTPUT_ENABLE
Definition: config.h:6
#define ARG_UNUSED
Definition: nkf.h:181
#define nkf_isoctal(c)
Definition: nkf.c:283
struct input_code input_code_list[]
Definition: nkf.c:475
#define NKF_ICONV_NEED_TWO_MORE_BYTES
Definition: nkf.c:2392
#define nkf_enc_unicode_p(enc)
Definition: nkf.c:766
#define CR
Definition: nkf.c:73
#define nkf_buf_empty_p(buf)
Definition: nkf.c:860
nkf_native_encoding NkfEncodingISO_2022_JP
Definition: nkf.c:154
#define SCORE_ERROR
Definition: nkf.c:2951
#define DEFAULT_J
Definition: nkf.c:311
#define MAXRECOVER
Definition: nkf.c:4316
#define SJ0162
#define rot13(c)
Definition: nkf.c:4143
int argc
Definition: ruby.c:187
#define set_input_mode(mode)
Definition: nkf.c:5826
#define realloc
Definition: ripper.c:359
long modtime
Definition: file.c:2675
const unsigned short *const euc_to_utf8_2bytes_ms[]
Definition: utf8tbl.c:3086
#define range(low, item, hi)
Definition: date_strftime.c:21
#define GETA2
Definition: nkf.c:316
#define MIME_BUF_SIZE
Definition: nkf.c:4305
#define EXIT_FAILURE
Definition: eval_intern.h:33
#define is_ibmext_in_sjis(c2)
Definition: nkf.c:301
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
Definition: utf8tbl.c:3250
#define nkf_isblank(c)
Definition: nkf.c:286
#define hex2bin(c)
Definition: nkf.c:292
#define EOF
Definition: vsnprintf.c:203
nkf_char * ptr
Definition: nkf.c:837
const char * name
Definition: nkf.c:163
#define nkf_isprint(c)
Definition: nkf.c:290
#define SCORE_KANA
Definition: nkf.c:2944
int errno
#define TRUE
Definition: nkf.h:175
struct @39 encoding_name_to_id_table[]
Definition: nkf.c:101
Definition: nkf.c:92
#define NUMCHAR_OPTION
Definition: config.h:22
#define malloc
Definition: ripper.c:358
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
Definition: utf8tbl.c:12584
const char * name
Definition: nkf.c:326
Definition: nkf.c:102
#define is_eucg3(c2)
Definition: nkf.c:296
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
Definition: utf8tbl.c:3223
#define mime_input_buf(n)
Definition: nkf.c:4307
#define nkf_enc_to_index(enc)
Definition: nkf.c:759
Definition: nkf.c:834
#define UCS_MAP_CP10001
Definition: nkf.c:350
#define debug(x)
Definition: _sdbm.c:51
#define CRLF
Definition: nkf.c:81
int _file_stat
Definition: nkf.c:333
#define SEND
Definition: nkf.c:5824
#define DEFAULT_NEWLINE
Definition: nkf.h:22
#define LAST
Definition: nkf.c:5825
nkf_native_encoding NkfEncodingUTF_16
Definition: nkf.c:158
#define bin2hex(c)
Definition: nkf.c:295
#define UCS_MAP_MS
Definition: nkf.c:348
const unsigned short *const utf8_to_euc_2bytes_ms[]
Definition: utf8tbl.c:12440
#define nkf_byte_jisx0201_katakana_p(c)
Definition: nkf.c:302
register unsigned int len
Definition: zonetab.h:51
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
Definition: utf8tbl.c:12578
long len
Definition: nkf.c:836
const unsigned short *const x0212_to_utf8_2bytes[]
Definition: utf8tbl.c:3167
const unsigned short *const euc_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3139
#define X0213_SURROGATE_FIND(tbl, size, euc)
Definition: nkf.c:1963
int count
Definition: nkf.c:5042
unsigned int top
Definition: nkf.c:4310
const unsigned short shiftjis_x0212[3][189]
Definition: utf8tbl.c:13681
#define SCORE_DEPEND
Definition: nkf.c:2945
#define setvbuffer(fp, buf, size)
Definition: nkf.h:91
int size
Definition: encoding.c:57
nkf_char mimeout_state
Definition: nkf.c:3302
#define f
int utime(const char *filename, const struct utimbuf *times)
const struct normalization_pair normalization_table[]
Definition: utf8tbl.c:12596
const unsigned short *const euc_to_utf8_2bytes_mac[]
Definition: utf8tbl.c:3113
const unsigned short *const utf8_to_euc_2bytes[]
Definition: utf8tbl.c:12410
#define SCORE_X0213
Definition: nkf.c:2948
#define UCS_MAP_ASCII
Definition: nkf.c:347
#define putchar(c)
Definition: nkf.c:28
#define nkf_toupper(c)
Definition: nkf.c:282
#define FIXED_MIME
Definition: nkf.c:57
#define GETA1
Definition: nkf.c:315
const unsigned short shiftjis_cp932[3][189]
Definition: utf8tbl.c:13544
#define HELP_OUTPUT
Definition: nkf.h:27
#define char_size(c2, c1)
Definition: nkf.c:3811
#define FOLD_MARGIN
Definition: nkf.c:504
#define SCORE_X0212
Definition: nkf.c:2947
#define SS2
Definition: nkf.c:79
#define nkf_isspace(c)
Definition: nkf.c:287
#define nkf_isgraph(c)
Definition: nkf.c:291
nkf_native_encoding NkfEncodingShift_JIS
Definition: nkf.c:155
long capa
Definition: nkf.c:835
Definition: nkf.c:112
#define nkf_isdigit(c)
Definition: nkf.c:284
#define assert
Definition: ruby_assert.h:37
#define RANGE_NUM_MAX
const char * name
Definition: nkf.c:148
Definition: nkf.c:110
#define nkf_enc_cp5022x_p(enc)
Definition: nkf.c:770
Definition: nkf.c:113
int main(int argc, char **argv)
Definition: nkf.c:6921
const char * name
Definition: nkf.c:208
#define nkf_xfree(ptr)
Definition: nkf.c:714
RUBY_EXTERN int dup2(int, int)
Definition: dup2.c:27
#define ESC
Definition: nkf.c:74
const unsigned short *const *const utf8_to_euc_3bytes[]
Definition: utf8tbl.c:12560
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3193
#define OUTPUT_UTF8(val)
Definition: nkf.c:2790
Definition: nkf.c:122
#define IOBUF_SIZE
Definition: nkf.c:308
#define fileno(p)
Definition: vsnprintf.c:219
#define is_alnum(c)
Definition: nkf.c:278
const unsigned short *const utf8_to_euc_2bytes_mac[]
Definition: utf8tbl.c:12500
#define NEXT
Definition: nkf.c:5821
#define nkf_enc_to_oconv(enc)
Definition: nkf.c:762
#define MIMEOUT_BUF_LENGTH
Definition: nkf.c:5039
Definition: nkf.c:108
#define DEFAULT_FOLD
Definition: nkf.c:505
#define DEL
Definition: nkf.c:76
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
Definition: utf8tbl.c:12566
nkf_native_encoding NkfEncodingUTF_8
Definition: nkf.c:157
#define DEFAULT_R
Definition: nkf.c:312
Definition: nkf.c:100
nkf_char broken_state
Definition: nkf.c:3300
#define NULL
Definition: _sdbm.c:102
int nkf_char
Definition: nkf.h:38
nkf_char index
Definition: nkf.c:329
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:4276
nkf_encodings
Definition: nkf.c:86
nkf_char score
Definition: nkf.c:328
Definition: nkf.c:118
#define OUTPUT_UTF32(c)
Definition: nkf.c:2889
#define NKF_INT32_C(n)
Definition: nkf.h:39
const unsigned short *const utf8_to_euc_2bytes_x0213[]
Definition: utf8tbl.c:12530
Definition: nkf.c:109
void(* status_func)(struct input_code *, nkf_char)
Definition: nkf.c:331
long actime
Definition: file.c:2674
char ** argv
Definition: ruby.c:188
#define X0201_DEFAULT
Definition: nkf.h:16
Definition: nkf.c:88
nkf_native_encoding NkfEncodingEUC_JP
Definition: nkf.c:156
Definition: nkf.c:117
Definition: nkf.c:120
#define SKIP
Definition: nkf.c:5822