23 #define NKF_VERSION "2.1.4" 24 #define NKF_RELEASE_DATE "2015-12-12" 26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \ 27 "Copyright (C) 1996-2015, The nkf Project." 38 # define INCL_DOSERRORS 168 {
ASCII,
"US-ASCII", &NkfEncodingASCII},
169 {
ISO_8859_1,
"ISO-8859-1", &NkfEncodingASCII},
170 {
ISO_2022_JP,
"ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {
CP50220,
"CP50220", &NkfEncodingISO_2022_JP},
172 {
CP50221,
"CP50221", &NkfEncodingISO_2022_JP},
173 {
CP50222,
"CP50222", &NkfEncodingISO_2022_JP},
177 {
SHIFT_JIS,
"Shift_JIS", &NkfEncodingShift_JIS},
178 {
WINDOWS_31J,
"Windows-31J", &NkfEncodingShift_JIS},
179 {
CP10001,
"CP10001", &NkfEncodingShift_JIS},
180 {
EUC_JP,
"EUC-JP", &NkfEncodingEUC_JP},
181 {
EUCJP_NKF,
"eucJP-nkf", &NkfEncodingEUC_JP},
182 {
CP51932,
"CP51932", &NkfEncodingEUC_JP},
183 {
EUCJP_MS,
"eucJP-MS", &NkfEncodingEUC_JP},
189 {
UTF_8,
"UTF-8", &NkfEncodingUTF_8},
190 {
UTF_8N,
"UTF-8N", &NkfEncodingUTF_8},
191 {
UTF_8_BOM,
"UTF-8-BOM", &NkfEncodingUTF_8},
192 {
UTF8_MAC,
"UTF8-MAC", &NkfEncodingUTF_8},
193 {
UTF_16,
"UTF-16", &NkfEncodingUTF_16},
194 {
UTF_16BE,
"UTF-16BE", &NkfEncodingUTF_16},
196 {
UTF_16LE,
"UTF-16LE", &NkfEncodingUTF_16},
198 {
UTF_32,
"UTF-32", &NkfEncodingUTF_32},
199 {
UTF_32BE,
"UTF-32BE", &NkfEncodingUTF_32},
201 {
UTF_32LE,
"UTF-32LE", &NkfEncodingUTF_32},
203 {
BINARY,
"BINARY", &NkfEncodingASCII},
265 #if defined(DEFAULT_CODE_JIS) 266 #define DEFAULT_ENCIDX ISO_2022_JP 267 #elif defined(DEFAULT_CODE_SJIS) 268 #define DEFAULT_ENCIDX SHIFT_JIS 269 #elif defined(DEFAULT_CODE_WINDOWS_31J) 270 #define DEFAULT_ENCIDX WINDOWS_31J 271 #elif defined(DEFAULT_CODE_EUC) 272 #define DEFAULT_ENCIDX EUC_JP 273 #elif defined(DEFAULT_CODE_UTF8) 274 #define DEFAULT_ENCIDX UTF_8 278 #define is_alnum(c) \ 279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) 282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) 283 #define nkf_isoctal(c) ('0'<=c && c<='7') 284 #define nkf_isdigit(c) ('0'<=c && c<='9') 285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) 286 #define nkf_isblank(c) (c == SP || c == TAB) 287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF) 288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) 289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c)) 290 #define nkf_isprint(c) (SP<=c && c<='~') 291 #define nkf_isgraph(c) ('!'<=c && c<='~') 292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \ 293 ('A'<=c&&c<='F') ? (c-'A'+10) : \ 294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0) 295 #define bin2hex(c) ("0123456789ABCDEF"[c&15]) 296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3) 297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \ 298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \ 299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22))) 301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END) 302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F) 304 #define HOLD_SIZE 1024 305 #if defined(INT_IS_SHORT) 306 #define IOBUF_SIZE 2048 308 #define IOBUF_SIZE 16384 311 #define DEFAULT_J 'B' 312 #define DEFAULT_R 'B' 322 extern POINT _BufferSize;
336 static const char *input_codename =
NULL;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 347 #define UCS_MAP_ASCII 0 349 #define UCS_MAP_CP932 2 350 #define UCS_MAP_CP10001 3 353 #ifdef UTF8_INPUT_ENABLE 355 static int no_cp932ext_f =
FALSE;
357 static int no_best_fit_chars_f =
FALSE;
359 static int input_bom_f =
FALSE;
360 static nkf_char unicode_subchar =
'?';
364 #ifdef UTF8_OUTPUT_ENABLE 365 static int output_bom_f =
FALSE;
382 #if !defined(PERL_XS) && !defined(WIN32DLL) 387 #define NKF_UNSPECIFIED (-TRUE) 390 static int unbuf_f =
FALSE;
391 static int estab_f =
FALSE;
392 static int nop_f =
FALSE;
393 static int binmode_f =
TRUE;
394 static int rot_f =
FALSE;
395 static int hira_f =
FALSE;
396 static int alpha_f =
FALSE;
398 static int mime_decode_f =
FALSE;
399 static int mimebuf_f =
FALSE;
400 static int broken_f =
FALSE;
401 static int iso8859_f =
FALSE;
402 static int mimeout_f =
FALSE;
404 static int iso2022jp_f =
FALSE;
406 #ifdef UNICODE_NORMALIZATION 407 static int nfc_f =
FALSE;
413 static int cap_f =
FALSE;
417 static int url_f =
FALSE;
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00) 423 #define CLASS_MASK NKF_INT32_C(0xFF000000) 424 #define CLASS_UNICODE NKF_INT32_C(0x01000000) 425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF) 426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF) 427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF) 428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3) 429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE) 430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE) 431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX) 432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX) 434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00)) 436 #ifdef NUMCHAR_OPTION 437 static int numchar_f =
FALSE;
443 static int noout_f =
FALSE;
445 static int debug_f =
FALSE;
446 static void debug(
const char *str);
450 static int guess_f = 0;
451 static void set_input_codename(
const char *codename);
454 static int exec_f = 0;
457 #ifdef SHIFTJIS_CP932 459 static int cp51932_f =
FALSE;
462 static int cp932inv_f =
TRUE;
467 static int x0212_f =
FALSE;
468 static int x0213_f =
FALSE;
470 static unsigned char prefix_table[256];
476 {
"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477 {
"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE 479 {
"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480 {
"UTF-16", 0, 0, 0, {0, 0, 0},
NULL, w_iconv16, 0},
481 {
"UTF-32", 0, 0, 0, {0, 0, 0},
NULL, w_iconv32, 0},
486 static int mimeout_mode = 0;
487 static int base64_count = 0;
492 static int f_line = 0;
493 static int f_prev = 0;
494 static int fold_preserve_f =
FALSE;
495 static int fold_f =
FALSE;
496 static int fold_len = 0;
499 static unsigned char kanji_intro =
DEFAULT_J;
500 static unsigned char ascii_intro =
DEFAULT_R;
504 #define FOLD_MARGIN 10 505 #define DEFAULT_FOLD 60 514 fprintf(stderr,
"nkf internal module connection failure.\n");
522 no_connection2(c2,c1,0);
534 static void (*o_iso2022jp_check_conv)(
nkf_char c2,
nkf_char c1) = no_connection;
538 static void (*o_putc)(
nkf_char c) = std_putc;
546 static void (*o_mputc)(
nkf_char c) = std_putc ;
556 static int output_mode =
ASCII;
557 static int input_mode =
ASCII;
558 static int mime_decode_mode =
FALSE;
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
628 static const unsigned char ev_x0213[]= {
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650 static const unsigned char fv[] = {
652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
668 static int option_mode = 0;
669 static int file_out_f =
FALSE;
671 static int overwrite_f =
FALSE;
672 static int preserve_time_f =
FALSE;
673 static int backup_f =
FALSE;
674 static char *backup_suffix =
"";
677 static int eolmode_f = 0;
678 static int input_eol = 0;
681 static int end_check;
685 nkf_xmalloc(
size_t size)
689 if (size == 0) size = 1;
693 perror(
"can't malloc");
701 nkf_xrealloc(
void *ptr,
size_t size)
703 if (size == 0) size = 1;
707 perror(
"can't realloc");
714 #define nkf_xfree(ptr) free(ptr) 717 nkf_str_caseeql(
const char *src,
const char *target)
720 for (i = 0; src[i] && target[i]; i++) {
723 if (src[i] || target[i])
return FALSE;
728 nkf_enc_from_index(
int idx)
733 return &nkf_encoding_table[idx];
737 nkf_enc_find_index(
const char *
name)
740 if (name[0] ==
'X' && *(name+1) ==
'-') name += 2;
750 nkf_enc_find(
const char *
name)
753 idx = nkf_enc_find_index(name);
754 if (idx < 0)
return 0;
755 return nkf_enc_from_index(idx);
758 #define nkf_enc_name(enc) (enc)->name 759 #define nkf_enc_to_index(enc) (enc)->id 760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding 761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv 762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv 763 #define nkf_enc_asciicompat(enc) (\ 764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\ 765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP) 766 #define nkf_enc_unicode_p(enc) (\ 767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\ 768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\ 769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32) 770 #define nkf_enc_cp5022x_p(enc) (\ 771 nkf_enc_to_index(enc) == CP50220 ||\ 772 nkf_enc_to_index(enc) == CP50221 ||\ 773 nkf_enc_to_index(enc) == CP50222) 775 #ifdef DEFAULT_CODE_LOCALE 777 nkf_locale_charmap(
void)
779 #ifdef HAVE_LANGINFO_H 780 return nl_langinfo(CODESET);
781 #elif defined(__WIN32__) 783 sprintf(buf,
"CP%d", GetACP());
785 #elif defined(__OS2__) 786 # if defined(INT_IS_SHORT) 792 ULONG ulCP[1], ulncp;
793 DosQueryCp(
sizeof(ulCP), ulCP, &ulncp);
794 if (ulCP[0] == 932 || ulCP[0] == 943)
795 strcpy(buf,
"Shift_JIS");
797 sprintf(buf,
"CP%lu", ulCP[0]);
805 nkf_locale_encoding(
void)
808 const char *encname = nkf_locale_charmap();
810 enc = nkf_enc_find(encname);
816 nkf_utf8_encoding(
void)
818 return &nkf_encoding_table[
UTF_8];
822 nkf_default_encoding(
void)
825 #ifdef DEFAULT_CODE_LOCALE 826 enc = nkf_locale_encoding();
827 #elif defined(DEFAULT_ENCIDX) 828 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
830 if (!enc) enc = nkf_utf8_encoding();
841 nkf_buf_new(
int length)
859 #define nkf_buf_length(buf) ((buf)->len) 860 #define nkf_buf_empty_p(buf) ((buf)->len == 0) 888 return buf->
ptr[--buf->
len];
894 #define fprintf dllprintf 907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n" 909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n" 914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 915 " UTF option is -W[8,[16,32][B,L]]\n" 917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n" 922 " M[BQ] MIME encode [B:base64 Q:quoted]\n" 923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n" 926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n" 927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n" 928 " 4: JISX0208 Katakana to JISX0201 Katakana\n" 929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n" 932 " O Output to File (DEFAULT 'nkf.out')\n" 933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n" 936 " --ic=<encoding> Specify the input encoding\n" 937 " --oc=<encoding> Specify the output encoding\n" 938 " --hiragana --katakana Hiragana/Katakana Conversion\n" 939 " --katakana-hiragana Converts each other\n" 943 " --{cap, url}-input Convert hex after ':' or '%%'\n" 946 " --numchar-input Convert Unicode Character Reference\n" 949 " --fb-{skip, html, xml, perl, java, subchar}\n" 950 " Specify unassigned character's replacement\n" 955 " --in-place[=SUF] Overwrite original files\n" 956 " --overwrite[=SUF] Preserve timestamp of original files\n" 958 " -g --guess Guess the input code\n" 959 " -v --version Print the version\n" 960 " --help/-V Print this help / configuration\n" 966 show_configuration(
void)
970 " Compile-time options:\n" 971 " Compiled at: " __DATE__
" " __TIME__
"\n" 974 " Default output encoding: " 977 #elif defined(DEFAULT_ENCIDX)
984 " Default output end of line: " 993 " Decode MIME encoded string: " 1000 " Convert JIS X 0201 Katakana: " 1007 " --help, --version output: " 1008 #
if HELP_OUTPUT_HELP_OUTPUT
1019 get_backup_filename(
const char *suffix,
const char *filename)
1021 char *backup_filename;
1022 int asterisk_count = 0;
1024 int filename_length =
strlen(filename);
1026 for(i = 0; suffix[i]; i++){
1027 if(suffix[i] ==
'*') asterisk_count++;
1031 backup_filename = nkf_xmalloc(
strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032 for(i = 0, j = 0; suffix[i];){
1033 if(suffix[i] ==
'*'){
1034 backup_filename[j] =
'\0';
1035 strncat(backup_filename, filename, filename_length);
1037 j += filename_length;
1039 backup_filename[j++] = suffix[i++];
1042 backup_filename[j] =
'\0';
1044 j = filename_length +
strlen(suffix);
1045 backup_filename = nkf_xmalloc(j + 1);
1046 strcpy(backup_filename, filename);
1047 strcat(backup_filename, suffix);
1048 backup_filename[j] =
'\0';
1050 return backup_filename;
1054 #ifdef UTF8_INPUT_ENABLE 1084 (*oconv)(0, 0x30+(c/10000 )%10);
1086 (*oconv)(0, 0x30+(c/1000 )%10);
1088 (*oconv)(0, 0x30+(c/100 )%10);
1090 (*oconv)(0, 0x30+(c/10 )%10);
1092 (*oconv)(0, 0x30+ c %10);
1103 nkf_each_char_to_hex(oconv, c);
1135 nkf_each_char_to_hex(oconv, c);
1141 encode_fallback_subchar(
nkf_char c)
1143 c = unicode_subchar;
1144 (*oconv)((c>>8)&0xFF, c&0xFF);
1149 static const struct {
1173 {
"katakana-hiragana",
"h3"},
1181 #ifdef UTF8_OUTPUT_ENABLE 1191 {
"fb-subchar=",
""},
1193 #ifdef UTF8_INPUT_ENABLE 1194 {
"utf8-input",
"W"},
1195 {
"utf16-input",
"W16"},
1196 {
"no-cp932ext",
""},
1197 {
"no-best-fit-chars",
""},
1199 #ifdef UNICODE_NORMALIZATION 1200 {
"utf8mac-input",
""},
1212 #ifdef NUMCHAR_OPTION 1213 {
"numchar-input",
""},
1219 #ifdef SHIFTJIS_CP932 1240 #ifdef SHIFTJIS_CP932 1243 #ifdef UTF8_OUTPUT_ENABLE 1262 #ifdef SHIFTJIS_CP932 1265 #ifdef UTF8_OUTPUT_ENABLE 1271 #ifdef SHIFTJIS_CP932 1274 #ifdef UTF8_OUTPUT_ENABLE 1284 #ifdef SHIFTJIS_CP932 1287 #ifdef UTF8_OUTPUT_ENABLE 1293 #ifdef SHIFTJIS_CP932 1296 #ifdef UTF8_OUTPUT_ENABLE 1302 #ifdef SHIFTJIS_CP932 1305 #ifdef UTF8_OUTPUT_ENABLE 1312 #ifdef SHIFTJIS_CP932 1314 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1320 #ifdef SHIFTJIS_CP932 1324 #ifdef UTF8_INPUT_ENABLE 1325 #ifdef UNICODE_NORMALIZATION 1357 #ifdef SHIFTJIS_CP932 1358 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1360 #ifdef UTF8_OUTPUT_ENABLE 1366 #ifdef SHIFTJIS_CP932 1367 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1369 #ifdef UTF8_OUTPUT_ENABLE 1374 #ifdef SHIFTJIS_CP932 1375 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1380 #ifdef SHIFTJIS_CP932 1381 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1388 #ifdef SHIFTJIS_CP932 1389 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1396 #ifdef UTF8_OUTPUT_ENABLE 1401 #ifdef UTF8_OUTPUT_ENABLE 1407 #ifdef SHIFTJIS_CP932 1408 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1410 #ifdef UTF8_OUTPUT_ENABLE 1416 #ifdef SHIFTJIS_CP932 1417 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1419 #ifdef UTF8_OUTPUT_ENABLE 1425 #ifdef SHIFTJIS_CP932 1426 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1428 #ifdef UTF8_OUTPUT_ENABLE 1435 #ifdef UTF8_OUTPUT_ENABLE 1442 #ifdef UTF8_OUTPUT_ENABLE 1449 #ifdef SHIFTJIS_CP932 1450 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1457 #ifdef SHIFTJIS_CP932 1458 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1461 #ifdef UTF8_OUTPUT_ENABLE 1463 output_bom_f =
TRUE;
1467 output_bom_f =
TRUE;
1471 output_bom_f =
FALSE;
1475 output_bom_f =
TRUE;
1479 output_bom_f =
TRUE;
1483 output_bom_f =
FALSE;
1487 output_bom_f =
TRUE;
1511 #ifdef INPUT_CODE_FIX 1512 if (
f || !input_encoding)
1520 && (
f == -
TRUE || !input_encoding)
1526 if (estab_f && iconv_for_check != iconv){
1527 struct input_code *p = find_inputcode_byfunc(iconv);
1529 set_input_codename(p->
name);
1532 iconv_for_check = iconv;
1544 if (0x75 <= c && c <= 0x7f){
1545 ret = c + (0x109 - 0x75);
1548 if (0x75 <= c && c <= 0x7f){
1549 ret = c + (0x113 - 0x75);
1560 if (0x7f <= c && c <= 0x88){
1561 ret = c + (0x75 - 0x7f);
1562 }
else if (0x89 <= c && c <= 0x92){
1572 static const char x0213_2_table[] =
1573 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1576 return x0213_2_table[ku];
1577 if (78 <= ku && ku <= 94)
1588 if (x0213_f && is_x0213_2_in_x0212(ndx)){
1589 if((0x21 <= ndx && ndx <= 0x2F)){
1590 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1593 }
else if(0x6E <= ndx && ndx <= 0x7E){
1594 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1603 const unsigned short *ptr;
1606 val = ptr[(c1 & 0x7f) - 0x21];
1615 c2 = x0212_shift(c2);
1619 if(0x7F < c2)
return 1;
1620 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE) 1631 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632 if (0xFC < c1)
return 1;
1633 #ifdef SHIFTJIS_CP932 1642 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643 val =
cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1668 if(x0213_f && c2 >= 0xF0){
1669 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
1670 c2 =
PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1673 if (0x9E < c1) c2++;
1676 #define SJ0162 0x00e1 1677 #define SJ6394 0x0161 1679 if (0x9E < c1) c2++;
1682 c1 = c1 - ((c1 >
DEL) ?
SP : 0x1F);
1689 c2 = x0212_unshift(c2);
1696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 1706 }
else if (
val < 0x800){
1707 *p1 = 0xc0 | (
val >> 6);
1708 *p2 = 0x80 | (
val & 0x3f);
1712 *p1 = 0xe0 | (
val >> 12);
1713 *p2 = 0x80 | ((
val >> 6) & 0x3f);
1714 *p3 = 0x80 | (
val & 0x3f);
1717 *p1 = 0xf0 | (
val >> 18);
1718 *p2 = 0x80 | ((
val >> 12) & 0x3f);
1719 *p3 = 0x80 | ((
val >> 6) & 0x3f);
1720 *p4 = 0x80 | (
val & 0x3f);
1737 else if (c1 <= 0xC1) {
1741 else if (c1 <= 0xDF) {
1743 wc = (c1 & 0x1F) << 6;
1746 else if (c1 <= 0xEF) {
1748 wc = (c1 & 0x0F) << 12;
1749 wc |= (c2 & 0x3F) << 6;
1752 else if (c2 <= 0xF4) {
1754 wc = (c1 & 0x0F) << 18;
1755 wc |= (c2 & 0x3F) << 12;
1756 wc |= (c3 & 0x3F) << 6;
1766 #ifdef UTF8_INPUT_ENABLE 1769 const unsigned short *
const *pp,
nkf_char psize,
1773 const unsigned short *p;
1776 if (pp == 0)
return 1;
1779 if (c1 < 0 || psize <= c1)
return 1;
1781 if (p == 0)
return 1;
1784 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0)
return 1;
1786 if (val == 0)
return 1;
1787 if (no_cp932ext_f && (
1807 const unsigned short *
const *pp;
1808 const unsigned short *
const *
const *ppp;
1809 static const char no_best_fit_chars_table_C2[] =
1810 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814 static const char no_best_fit_chars_table_C2_ms[] =
1815 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819 static const char no_best_fit_chars_table_932_C2[] =
1820 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824 static const char no_best_fit_chars_table_932_C3[] =
1825 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1834 }
else if(c2 < 0xe0){
1835 if(no_best_fit_chars_f){
1839 if(no_best_fit_chars_table_932_C2[c1&0x3F])
return 1;
1842 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1845 }
else if(!cp932inv_f){
1848 if(no_best_fit_chars_table_C2[c1&0x3F])
return 1;
1851 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1855 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F])
return 1;
1879 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1880 }
else if(c0 < 0xF0){
1881 if(no_best_fit_chars_f){
1883 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
return 1;
1889 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE)
return 1;
1892 if(c0 == 0x92)
return 1;
1897 if(c1 == 0x80 || c0 == 0x9C)
return 1;
1905 if(c0 == 0x94)
return 1;
1908 if(c0 == 0xBB)
return 1;
1918 if(c0 == 0x95)
return 1;
1921 if(c0 == 0xA5)
return 1;
1928 if(c0 == 0x8D)
return 1;
1931 if(c0 == 0x9E && !cp932inv_f)
return 1;
1934 if(0xA0 <= c0 && c0 <= 0xA5)
return 1;
1947 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1949 #ifdef SHIFTJIS_CP932 1950 if (!ret && !cp932inv_f &&
is_eucg3(*p2)) {
1952 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1953 s2e_conv(s2, s1, p2, p1);
1962 #ifdef UTF8_OUTPUT_ENABLE 1963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \ 1965 for (i = 0; i < size; i++) \ 1966 if (tbl[i][0] == euc) { \ 1975 const unsigned short *p;
1992 c2 = (c2&0x7f) - 0x21;
1993 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2002 c2 = (c2&0x7f) - 0x21;
2003 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2013 c1 = (c1 & 0x7f) - 0x21;
2014 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2016 if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2017 nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2038 for (i = 0; i < sizeof_x0213_combining_chars; i++)
2041 if (i >= sizeof_x0213_combining_chars)
2043 euc = (c2&0x7f)<<8 | (c1&0x7f);
2044 for (i = 0; i < sizeof_x0213_combining_table; i++)
2059 }
else if (0xc0 <= c2 && c2 <= 0xef) {
2060 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2061 #ifdef NUMCHAR_OPTION 2072 #ifdef UTF8_INPUT_ENABLE 2084 nkf_unicode_to_utf8(
val, &c1, &c2, &c3, &c4);
2085 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2097 for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2104 for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2123 if (iso2022jp_f && !x0201_f) {
2130 }
else if (c2 == 0x8f){
2134 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2139 c2 = (c2 << 8) | (c1 & 0x7f);
2141 #ifdef SHIFTJIS_CP932 2144 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2145 s2e_conv(s2, s1, &c2, &c1);
2158 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2165 #ifdef SHIFTJIS_CP932 2166 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2168 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2169 s2e_conv(s2, s1, &c2, &c1);
2187 if (iso2022jp_f && !x0201_f) {
2192 }
else if ((c2 ==
EOF) || (c2 == 0) || c2 <
SP) {
2194 }
else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2196 if(c1 == 0x7F)
return 0;
2200 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2201 if (ret)
return ret;
2208 x0213_wait_combining_p(
nkf_char wc)
2211 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2223 for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2235 static const char w_iconv_utf8_1st_byte[] =
2237 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2247 if (c1 < 0 || 0xff < c1) {
2248 }
else if (c1 == 0) {
2250 }
else if ((c1 & 0xC0) == 0x80) {
2253 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2255 if (c2 < 0x80 || 0xBF < c2)
return 0;
2258 if (c3 == 0)
return -1;
2259 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2264 if (c3 == 0)
return -1;
2265 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2269 if (c3 == 0)
return -1;
2270 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2274 if (c3 == 0)
return -2;
2275 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2279 if (c3 == 0)
return -2;
2280 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2284 if (c3 == 0)
return -2;
2285 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2293 if (c1 == 0 || c1 ==
EOF){
2294 }
else if ((c1 & 0xf8) == 0xf0) {
2298 if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2300 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2312 nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2319 #define NKF_ICONV_INVALID_CODE_RANGE -13 2320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14 2321 #define NKF_ICONV_NOT_COMBINED -15 2323 unicode_iconv(
nkf_char wc,
int nocombine)
2331 }
else if ((wc>>11) == 27) {
2334 }
else if (wc < 0xFFFF) {
2335 if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2337 ret = w16e_conv(wc, &c2, &c1);
2338 if (ret)
return ret;
2339 }
else if (wc < 0x10FFFF) {
2357 }
else if ((wc2>>11) == 27) {
2360 }
else if (wc2 < 0xFFFF) {
2361 if (!x0213_combining_p(wc2))
2363 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2372 }
else if (wc2 < 0x10FFFF) {
2384 wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2385 wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2388 return unicode_iconv_combine(wc, wc2);
2391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1 2392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2 2404 if (0xD8 <= c1 && c1 <= 0xDB) {
2405 if (0xDC <= c3 && c3 <= 0xDF) {
2412 if (0xD8 <= c2 && c2 <= 0xDB) {
2413 if (0xDC <= c4 && c4 <= 0xDF) {
2421 return (*unicode_iconv)(wc,
FALSE);
2430 if (0xD8 <= c3 && c3 <= 0xDB) {
2437 if (0xD8 <= c2 && c2 <= 0xDB) {
2445 return unicode_iconv_combine(wc, wc2);
2456 return (*unicode_iconv)(wc,
TRUE);
2478 switch(input_endian){
2480 wc = c2 << 16 | c3 << 8 | c4;
2483 wc = c3 << 16 | c2 << 8 | c1;
2486 wc = c1 << 16 | c4 << 8 | c3;
2489 wc = c4 << 16 | c1 << 8 | c2;
2507 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2511 return (*unicode_iconv)(wc,
FALSE);
2519 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2522 wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2526 return unicode_iconv_combine(wc, wc2);
2534 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2535 return (*unicode_iconv)(wc,
TRUE);
2539 #define output_ascii_escape_sequence(mode) do { \ 2540 if (output_mode != ASCII && output_mode != ISO_8859_1) { \ 2543 (*o_putc)(ascii_intro); \ 2544 output_mode = mode; \ 2549 output_escape_sequence(
int mode)
2551 if (output_mode == mode)
2567 (*o_putc)(kanji_intro);
2594 #ifdef NUMCHAR_OPTION 2596 w16e_conv(c1, &c2, &c1);
2599 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2602 c2 = 0x7F + c1 / 94;
2603 c1 = 0x21 + c1 % 94;
2605 if (encode_fallback) (*encode_fallback)(c1);
2615 else if (c2 ==
EOF) {
2629 (*o_putc)(c2 & 0x7f);
2634 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1)
return;
2646 w16e_conv(c1, &c2, &c1);
2649 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2653 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654 c1 = 0x21 + c1 % 94;
2657 (*o_putc)((c2 & 0x7f) | 0x080);
2658 (*o_putc)(c1 | 0x080);
2660 (*o_putc)((c2 & 0x7f) | 0x080);
2661 (*o_putc)(c1 | 0x080);
2665 if (encode_fallback) (*encode_fallback)(c1);
2673 }
else if (c2 == 0) {
2674 output_mode =
ASCII;
2678 (*o_putc)(
SS2); (*o_putc)(c1|0x80);
2681 (*o_putc)(c1 | 0x080);
2685 #ifdef SHIFTJIS_CP932 2688 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2689 s2e_conv(s2, s1, &c2, &c1);
2694 output_mode =
ASCII;
2699 (*o_putc)((c2 & 0x7f) | 0x080);
2700 (*o_putc)(c1 | 0x080);
2703 (*o_putc)((c2 & 0x7f) | 0x080);
2704 (*o_putc)(c1 | 0x080);
2709 set_iconv(
FALSE, 0);
2713 (*o_putc)(c2 | 0x080);
2714 (*o_putc)(c1 | 0x080);
2721 #ifdef NUMCHAR_OPTION 2723 w16e_conv(c1, &c2, &c1);
2726 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2729 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2731 c1 += 0x40 + (c1 > 0x3e);
2736 if(encode_fallback)(*encode_fallback)(c1);
2745 }
else if (c2 == 0) {
2746 output_mode =
ASCII;
2753 (*o_putc)(c1 | 0x080);
2757 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2764 set_iconv(
FALSE, 0);
2768 e2s_conv(c2, c1, &c2, &c1);
2770 #ifdef SHIFTJIS_CP932 2772 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2782 if (prefix_table[(
unsigned char)c1]){
2783 (*o_putc)(prefix_table[(
unsigned char)c1]);
2789 #ifdef UTF8_OUTPUT_ENABLE 2790 #define OUTPUT_UTF8(val) do { \ 2791 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \ 2793 if (c2) (*o_putc)(c2); \ 2794 if (c3) (*o_putc)(c3); \ 2795 if (c4) (*o_putc)(c4); \ 2805 output_bom_f =
FALSE;
2825 val = e2w_conv(c2, c1);
2827 val2 = e2w_combining(val, c2, c1);
2835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \ 2836 if (output_endian == ENDIAN_LITTLE){ \ 2845 #define OUTPUT_UTF16(val) do { \ 2846 if (nkf_char_unicode_bmp_p(val)) { \ 2847 c2 = (val >> 8) & 0xff; \ 2849 OUTPUT_UTF16_BYTES(c1, c2); \ 2851 val &= VALUE_MASK; \ 2852 if (val <= UNICODE_MAX) { \ 2853 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); \ 2854 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); \ 2855 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \ 2856 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \ 2865 output_bom_f =
FALSE;
2878 val = e2w_conv(c2, c1);
2880 val2 = e2w_combining(val, c2, c1);
2889 #define OUTPUT_UTF32(c) do { \ 2890 if (output_endian == ENDIAN_LITTLE){ \ 2891 (*o_putc)( (c) & 0xFF); \ 2892 (*o_putc)(((c) >> 8) & 0xFF); \ 2893 (*o_putc)(((c) >> 16) & 0xFF); \ 2897 (*o_putc)(((c) >> 16) & 0xFF); \ 2898 (*o_putc)(((c) >> 8) & 0xFF); \ 2899 (*o_putc)( (c) & 0xFF); \ 2907 output_bom_f =
FALSE;
2932 val = e2w_conv(c2, c1);
2934 val2 = e2w_combining(val, c2, c1);
2943 #define SCORE_L2 (1) 2944 #define SCORE_KANA (SCORE_L2 << 1) 2945 #define SCORE_DEPEND (SCORE_KANA << 1) 2946 #define SCORE_CP932 (SCORE_DEPEND << 1) 2947 #define SCORE_X0212 (SCORE_CP932 << 1) 2948 #define SCORE_X0213 (SCORE_X0212 << 1) 2949 #define SCORE_NO_EXIST (SCORE_X0213 << 1) 2950 #define SCORE_iMIME (SCORE_NO_EXIST << 1) 2951 #define SCORE_ERROR (SCORE_iMIME << 1) 2953 #define SCORE_INIT (SCORE_iMIME) 2955 static const nkf_char score_table_A0[] = {
2962 static const nkf_char score_table_F0[] = {
2969 static const nkf_char score_table_8FA0[] = {
2976 static const nkf_char score_table_8FE0[] = {
2983 static const nkf_char score_table_8FF0[] = {
3002 ptr->
score &= ~score;
3013 }
else if (c2 ==
SS2){
3015 }
else if (c2 == 0x8f){
3016 if ((c1 & 0x70) == 0x20){
3017 set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3018 }
else if ((c1 & 0x70) == 0x60){
3019 set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3020 }
else if ((c1 & 0x70) == 0x70){
3021 set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3025 #ifdef UTF8_OUTPUT_ENABLE 3026 }
else if (!e2w_conv(c2, c1)){
3029 }
else if ((c2 & 0x70) == 0x20){
3030 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3031 }
else if ((c2 & 0x70) == 0x70){
3032 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3033 }
else if ((c2 & 0x70) >= 0x50){
3077 if (c <=
DEL && estab_f){
3087 status_check(ptr, c);
3094 }
else if (0xa1 <= c && c <= 0xdf){
3095 status_push_ch(ptr,
SS2);
3096 status_push_ch(ptr, c);
3099 }
else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3101 status_push_ch(ptr, c);
3102 }
else if (0xed <= c && c <= 0xee){
3104 status_push_ch(ptr, c);
3105 #ifdef SHIFTJIS_CP932 3108 status_push_ch(ptr, c);
3111 }
else if (0xf0 <= c && c <= 0xfc){
3113 status_push_ch(ptr, c);
3116 status_disable(ptr);
3120 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3121 status_push_ch(ptr, c);
3122 s2e_conv(ptr->
buf[0], ptr->
buf[1], &ptr->
buf[0], &ptr->
buf[1]);
3126 status_disable(ptr);
3130 #ifdef SHIFTJIS_CP932 3131 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3132 status_push_ch(ptr, c);
3133 if (s2e_conv(ptr->
buf[0], ptr->
buf[1], &ptr->
buf[0], &ptr->
buf[1]) == 0) {
3140 status_disable(ptr);
3143 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3144 status_push_ch(ptr, c);
3145 s2e_conv(ptr->
buf[0], ptr->
buf[1], &ptr->
buf[0], &ptr->
buf[1]);
3149 status_disable(ptr);
3160 status_check(ptr, c);
3167 }
else if (
SS2 == c || (0xa1 <= c && c <= 0xfe)){
3169 status_push_ch(ptr, c);
3171 }
else if (0x8f == c){
3173 status_push_ch(ptr, c);
3176 status_disable(ptr);
3180 if (0xa1 <= c && c <= 0xfe){
3181 status_push_ch(ptr, c);
3185 status_disable(ptr);
3190 if (0xa1 <= c && c <= 0xfe){
3192 status_push_ch(ptr, c);
3194 status_disable(ptr);
3200 #ifdef UTF8_INPUT_ENABLE 3206 status_check(ptr, c);
3213 }
else if (0xc0 <= c && c <= 0xdf){
3215 status_push_ch(ptr, c);
3216 }
else if (0xe0 <= c && c <= 0xef){
3218 status_push_ch(ptr, c);
3219 }
else if (0xf0 <= c && c <= 0xf4){
3221 status_push_ch(ptr, c);
3223 status_disable(ptr);
3228 if (0x80 <= c && c <= 0xbf){
3229 status_push_ch(ptr, c);
3231 int bom = (ptr->
buf[0] == 0xef && ptr->
buf[1] == 0xbb
3232 && ptr->
buf[2] == 0xbf);
3233 w2e_conv(ptr->
buf[0], ptr->
buf[1], ptr->
buf[2],
3234 &ptr->
buf[0], &ptr->
buf[1]);
3241 status_disable(ptr);
3245 if (0x80 <= c && c <= 0xbf){
3247 status_push_ch(ptr, c);
3252 status_disable(ptr);
3262 int action_flag = 1;
3275 }
else if(p->
stat == 0){
3286 if (result && !estab_f){
3288 }
else if (c <=
DEL){
3308 #define STD_GC_BUFSIZE (256) 3311 nkf_state_init(
void)
3316 nkf_buf_clear(nkf_state->
nfc_buf);
3322 nkf_state->
nfc_buf = nkf_buf_new(9);
3356 static int hold_count = 0;
3362 hold_buf[hold_count++] = c2;
3363 return ((hold_count >=
HOLD_SIZE*2) ?
EOF : hold_count);
3382 while ((c2 = (*i_getc)(f)) !=
EOF) {
3388 if (push_hold_buf(c2) ==
EOF || estab_f) {
3420 while (hold_index < hold_count){
3421 c1 = hold_buf[hold_index++];
3426 else if (c1 <=
DEL){
3429 }
else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3434 if (hold_index < hold_count){
3435 c2 = hold_buf[hold_index++];
3446 switch ((*iconv)(c1, c2, 0)) {
3449 if (hold_index < hold_count){
3450 c3 = hold_buf[hold_index++];
3451 }
else if ((c3 = (*i_getc)(f)) ==
EOF) {
3456 if (hold_index < hold_count){
3457 c4 = hold_buf[hold_index++];
3458 }
else if ((c4 = (*i_getc)(f)) ==
EOF) {
3463 (*iconv)(c1, c2, (c3<<8)|c4);
3467 if (hold_index < hold_count){
3468 c3 = hold_buf[hold_index++];
3470 }
else if ((c3 = (*i_getc)(f)) ==
EOF) {
3471 w_iconv_nocombine(c1, c2, 0);
3474 if (hold_index < hold_count){
3475 c4 = hold_buf[hold_index++];
3477 }
else if ((c4 = (*i_getc)(f)) ==
EOF) {
3478 w_iconv_nocombine(c1, c2, 0);
3479 if (fromhold_count <= 2)
3485 if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3486 w_iconv_nocombine(c1, c2, 0);
3487 if (fromhold_count <= 2) {
3490 }
else if (fromhold_count == 3) {
3500 if (hold_index < hold_count){
3501 c3 = hold_buf[hold_index++];
3503 }
else if ((c3 = (*i_getc)(
f)) ==
EOF) {
3509 if ((*iconv)(c1, c2, c3) == -3) {
3512 if (hold_index < hold_count){
3513 c4 = hold_buf[hold_index++];
3515 }
else if ((c4 = (*i_getc)(f)) ==
EOF) {
3516 w_iconv_nocombine(c1, c2, c3);
3519 if (hold_index < hold_count){
3520 c5 = hold_buf[hold_index++];
3522 }
else if ((c5 = (*i_getc)(f)) ==
EOF) {
3523 w_iconv_nocombine(c1, c2, c3);
3524 if (fromhold_count == 4)
3530 if (hold_index < hold_count){
3531 c6 = hold_buf[hold_index++];
3533 }
else if ((c6 = (*i_getc)(f)) ==
EOF) {
3534 w_iconv_nocombine(c1, c2, c3);
3535 if (fromhold_count == 5) {
3537 }
else if (fromhold_count == 4) {
3546 if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3547 w_iconv_nocombine(c1, c2, c3);
3548 if (fromhold_count == 6) {
3550 }
else if (fromhold_count == 5) {
3553 }
else if (fromhold_count == 4) {
3566 if (c3 ==
EOF)
break;
3578 input_bom_f =
FALSE;
3579 switch(c2 = (*i_getc)(f)){
3581 if((c2 = (*i_getc)(f)) == 0x00){
3582 if((c2 = (*i_getc)(f)) == 0xFE){
3583 if((c2 = (*i_getc)(f)) == 0xFF){
3584 if(!input_encoding){
3585 set_iconv(
TRUE, w_iconv32);
3587 if (iconv == w_iconv32) {
3592 (*i_ungetc)(0xFF,
f);
3593 }
else (*i_ungetc)(c2,
f);
3594 (*i_ungetc)(0xFE,
f);
3595 }
else if(c2 == 0xFF){
3596 if((c2 = (*i_getc)(
f)) == 0xFE){
3597 if(!input_encoding){
3598 set_iconv(
TRUE, w_iconv32);
3600 if (iconv == w_iconv32) {
3604 (*i_ungetc)(0xFF,
f);
3605 }
else (*i_ungetc)(c2,
f);
3606 (*i_ungetc)(0xFF,
f);
3607 }
else (*i_ungetc)(c2,
f);
3608 (*i_ungetc)(0x00,
f);
3609 }
else (*i_ungetc)(c2,
f);
3610 (*i_ungetc)(0x00,
f);
3613 if((c2 = (*i_getc)(
f)) == 0xBB){
3614 if((c2 = (*i_getc)(
f)) == 0xBF){
3615 if(!input_encoding){
3616 set_iconv(
TRUE, w_iconv);
3618 if (iconv == w_iconv) {
3622 (*i_ungetc)(0xBF,
f);
3623 }
else (*i_ungetc)(c2,
f);
3624 (*i_ungetc)(0xBB,
f);
3625 }
else (*i_ungetc)(c2,
f);
3626 (*i_ungetc)(0xEF,
f);
3629 if((c2 = (*i_getc)(
f)) == 0xFF){
3630 if((c2 = (*i_getc)(
f)) == 0x00){
3631 if((c2 = (*i_getc)(
f)) == 0x00){
3632 if(!input_encoding){
3633 set_iconv(
TRUE, w_iconv32);
3635 if (iconv == w_iconv32) {
3639 (*i_ungetc)(0x00,
f);
3640 }
else (*i_ungetc)(c2,
f);
3641 (*i_ungetc)(0x00,
f);
3642 }
else (*i_ungetc)(c2,
f);
3643 if(!input_encoding){
3644 set_iconv(
TRUE, w_iconv16);
3646 if (iconv == w_iconv16) {
3651 (*i_ungetc)(0xFF,
f);
3652 }
else (*i_ungetc)(c2,
f);
3653 (*i_ungetc)(0xFE,
f);
3656 if((c2 = (*i_getc)(
f)) == 0xFE){
3657 if((c2 = (*i_getc)(
f)) == 0x00){
3658 if((c2 = (*i_getc)(
f)) == 0x00){
3659 if(!input_encoding){
3660 set_iconv(
TRUE, w_iconv32);
3662 if (iconv == w_iconv32) {
3667 (*i_ungetc)(0x00,
f);
3668 }
else (*i_ungetc)(c2,
f);
3669 (*i_ungetc)(0x00,
f);
3670 }
else (*i_ungetc)(c2,
f);
3671 if(!input_encoding){
3672 set_iconv(
TRUE, w_iconv16);
3674 if (iconv == w_iconv16) {
3679 (*i_ungetc)(0xFE,
f);
3680 }
else (*i_ungetc)(c2,
f);
3681 (*i_ungetc)(0xFF,
f);
3690 broken_getc(
FILE *f)
3702 if (c1==
'@'|| c1==
'B') {
3714 if (c1==
'J'|| c1==
'B') {
3739 if (guess_f && input_eol !=
EOF) {
3740 if (c2 == 0 && c1 ==
LF) {
3741 if (!input_eol) input_eol = prev_cr ?
CRLF :
LF;
3742 else if (input_eol != (prev_cr ?
CRLF :
LF)) input_eol =
EOF;
3743 }
else if (c2 == 0 && c1 ==
CR && input_eol ==
LF) input_eol =
EOF;
3745 else if (!input_eol) input_eol =
CR;
3746 else if (input_eol !=
CR) input_eol =
EOF;
3748 if (prev_cr || (c2 == 0 && c1 ==
LF)) {
3750 if (eolmode_f !=
LF) (*o_eol_conv)(0,
CR);
3751 if (eolmode_f !=
CR) (*o_eol_conv)(0,
LF);
3753 if (c2 == 0 && c1 ==
CR) prev_cr =
CR;
3754 else if (c2 != 0 || c1 !=
LF) (*o_eol_conv)(c2, c1);
3758 put_newline(
void (*func)(
nkf_char))
3811 #define char_size(c2,c1) (c2?2:1) 3819 if (c1==
CR && !fold_preserve_f) {
3821 }
else if (c1==
LF&&f_prev==
CR && fold_preserve_f) {
3824 }
else if (c1==
BS) {
3825 if (f_line>0) f_line--;
3827 }
else if (c2==
EOF && f_line != 0) {
3829 }
else if ((c1==
LF && !fold_preserve_f)
3830 || ((c1==
CR||(c1==
LF&&f_prev!=
CR))
3831 && fold_preserve_f)) {
3833 if (fold_preserve_f) {
3837 }
else if ((f_prev == c1)
3851 }
else if (f_prev==
SP) {
3855 if (++f_line<=fold_len)
3863 }
else if (c1==
'\f') {
3867 }
else if ((c2==0 &&
nkf_isblank(c1)) || (c2 ==
'!' && c1 ==
'!')) {
3873 if (++f_line<=fold_len)
3876 f_prev =
SP; f_line = 0;
3886 if (f_line<=fold_len) {
3889 if (f_line>fold_len+fold_margin) {
3894 if (c1==(0xde&0x7f)) fold_state = 1;
3895 else if (c1==(0xdf&0x7f)) fold_state = 1;
3896 else if (c1==(0xa4&0x7f)) fold_state = 1;
3897 else if (c1==(0xa3&0x7f)) fold_state = 1;
3898 else if (c1==(0xa1&0x7f)) fold_state = 1;
3899 else if (c1==(0xb0&0x7f)) fold_state = 1;
3900 else if (
SP<=c1 && c1<=(0xdf&0x7f)) {
3924 }
else if ((prev0==
SP) ||
3934 if (c1==
'"') fold_state = 1;
3935 else if (c1==
'#') fold_state = 1;
3936 else if (c1==
'W') fold_state = 1;
3937 else if (c1==
'K') fold_state = 1;
3938 else if (c1==
'$') fold_state = 1;
3939 else if (c1==
'%') fold_state = 1;
3940 else if (c1==
'\'') fold_state = 1;
3941 else if (c1==
'(') fold_state = 1;
3942 else if (c1==
')') fold_state = 1;
3943 else if (c1==
'*') fold_state = 1;
3944 else if (c1==
'+') fold_state = 1;
3945 else if (c1==
',') fold_state = 1;
3961 switch(fold_state) {
3963 oconv_newline(o_fconv);
3969 oconv_newline(o_fconv);
3980 static nkf_char z_prev2=0,z_prev1=0;
3996 if (c1 == (0xde&0x7f)) {
3998 (*o_zconv)(dv[(z_prev1-
SP)*2], dv[(z_prev1-
SP)*2+1]);
4000 }
else if (c1 == (0xdf&0x7f) && ev[(z_prev1-
SP)*2]) {
4002 (*o_zconv)(ev[(z_prev1-
SP)*2], ev[(z_prev1-
SP)*2+1]);
4004 }
else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-
SP)*2]) {
4006 (*o_zconv)(ev_x0213[(z_prev1-
SP)*2], ev_x0213[(z_prev1-
SP)*2+1]);
4011 (*o_zconv)(cv[(z_prev1-
SP)*2], cv[(z_prev1-
SP)*2+1]);
4014 if (dv[(c1-
SP)*2] || ev[(c1-
SP)*2] || (x0213_f && ev_x0213[(c1-
SP)*2])) {
4020 (*o_zconv)(cv[(c1-
SP)*2], cv[(c1-
SP)*2+1]);
4031 if (alpha_f&1 && c2 == 0x23) {
4034 }
else if (c2 == 0x21) {
4040 }
else if (alpha_f&4) {
4045 }
else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4051 if (alpha_f&8 && c2 == 0) {
4053 const char *entity = 0;
4055 case '>': entity =
">";
break;
4056 case '<': entity =
"<";
break;
4057 case '\"': entity =
""";
break;
4058 case '&': entity =
"&";
break;
4061 while (*entity) (*o_zconv)(0, *entity++);
4108 }
else if (c2 == 0x25) {
4110 static const int fullwidth_to_halfwidth[] =
4112 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4113 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4114 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4115 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4116 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4117 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4118 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4119 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4120 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4121 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4122 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4123 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4125 if (fullwidth_to_halfwidth[c1-0x20]){
4126 c2 = fullwidth_to_halfwidth[c1-0x20];
4143 #define rot13(c) ( \ 4145 (c <= 'M') ? (c + 13): \ 4146 (c <= 'Z') ? (c - 13): \ 4148 (c <= 'm') ? (c + 13): \ 4149 (c <= 'z') ? (c - 13): \ 4153 #define rot47(c) ( \ 4155 ( c <= 'O') ? (c + 47) : \ 4156 ( c <= '~') ? (c - 47) : \ 4169 (*o_rot_conv)(c2,c1);
4177 if (0x20 < c1 && c1 < 0x74) {
4179 (*o_hira_conv)(c2,c1);
4184 (*o_hira_conv)(c2,c1);
4187 }
else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4189 (*o_hira_conv)(c2,c1);
4197 }
else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4199 }
else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4203 (*o_hira_conv)(c2,c1);
4210 #define RANGE_NUM_MAX 18 4234 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4238 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4244 start = range[i][0];
4247 if (c >= start && c <= end) {
4252 (*o_iso2022jp_check_conv)(c2,c1);
4258 static const unsigned char *mime_pattern[] = {
4259 (
const unsigned char *)
"\075?EUC-JP?B?",
4260 (
const unsigned char *)
"\075?SHIFT_JIS?B?",
4261 (
const unsigned char *)
"\075?ISO-8859-1?Q?",
4262 (
const unsigned char *)
"\075?ISO-8859-1?B?",
4263 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4264 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4265 (
const unsigned char *)
"\075?ISO-2022-JP?Q?",
4267 (
const unsigned char *)
"\075?UTF-8?B?",
4268 (
const unsigned char *)
"\075?UTF-8?Q?",
4270 (
const unsigned char *)
"\075?US-ASCII?Q?",
4277 e_iconv, s_iconv, 0, 0, 0, 0, 0,
4278 #if defined(UTF8_INPUT_ENABLE) 4284 static const nkf_char mime_encode[] = {
4286 #if defined(UTF8_INPUT_ENABLE) 4293 static const nkf_char mime_encode_method[] = {
4294 'B',
'B',
'Q',
'B',
'B',
'B',
'Q',
4295 #if defined(UTF8_INPUT_ENABLE) 4305 #define MIME_BUF_SIZE (1024) 4306 #define MIME_BUF_MASK (MIME_BUF_SIZE-1) 4307 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK] 4316 #define MAXRECOVER 20 4327 mime_input_buf_unshift(c);
4335 (*i_mungetc_buf)(c,
f);
4342 mime_getc_buf(
FILE *f)
4346 return ((mimebuf_f)?
4351 switch_mime_getc(
void)
4353 if (i_getc!=mime_getc) {
4354 i_mgetc = i_getc; i_getc = mime_getc;
4355 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4357 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4358 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4364 unswitch_mime_getc(
void)
4367 i_mgetc = i_mgetc_buf;
4368 i_mungetc = i_mungetc_buf;
4371 i_ungetc = i_mungetc;
4372 if(mime_iconv_back)set_iconv(
FALSE, mime_iconv_back);
4373 mime_iconv_back =
NULL;
4377 mime_integrity(
FILE *f,
const unsigned char *p)
4383 mime_input_state.input = mime_input_state.top;
4384 mime_input_state.last = mime_input_state.top;
4388 q = mime_input_state.input;
4389 while((c=(*i_getc)(f))!=
EOF) {
4390 if (((mime_input_state.input-mime_input_state.top)&
MIME_BUF_MASK)==0) {
4393 if (c==
'=' && d==
'?') {
4397 mime_input_state.input = q;
4401 if (!( (c==
'+'||c==
'/'|| c==
'=' || c==
'?' ||
is_alnum(c))))
4409 mime_input_state.last = mime_input_state.input;
4410 mime_decode_mode = 1;
4416 mime_begin_strict(
FILE *f)
4420 const unsigned char *p,*q;
4423 mime_decode_mode =
FALSE;
4426 p = mime_pattern[j];
4429 for(i=2;p[i]>
SP;i++) {
4430 if (((r[i] = c1 = (*i_getc)(f))==
EOF) ||
nkf_toupper(c1) != p[i]) {
4433 while (mime_pattern[++j]) {
4434 p = mime_pattern[j];
4436 if (p[k]!=q[k])
break;
4439 p = mime_pattern[j];
4449 mime_decode_mode = p[i-2];
4451 mime_iconv_back = iconv;
4455 if (mime_decode_mode==
'B') {
4456 mimebuf_f = unbuf_f;
4459 return mime_integrity(f,mime_pattern[j]);
4477 k = mime_input_state.last;
4481 c1 = (*i_getc)(
f);
mime_input_buf(mime_input_state.last++) = (
unsigned char)c1;
4482 if (c1==
LF||c1==
SP||c1==
CR||
4483 c1==
'-'||c1==
'_'||
is_alnum(c1))
continue;
4487 mime_input_state.last--;
4493 c1 = (*i_getc)(
f);
mime_input_buf(mime_input_state.last++) = (
unsigned char)c1;
4494 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4495 if (c1==
'b'||c1==
'B') {
4496 mime_decode_mode =
'B';
4497 }
else if (c1==
'q'||c1==
'Q') {
4498 mime_decode_mode =
'Q';
4502 c1 = (*i_getc)(
f);
mime_input_buf(mime_input_state.last++) = (
unsigned char)c1;
4503 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4505 mime_decode_mode =
FALSE;
4511 if (!mime_decode_mode) {
4513 mime_decode_mode = 1;
4519 mime_input_state.last = k;
4532 debug(
const char *str)
4535 fprintf(stderr,
"%s\n", str ? str :
"NULL");
4541 set_input_codename(
const char *codename)
4543 if (!input_codename) {
4544 input_codename = codename;
4545 }
else if (strcmp(codename, input_codename) != 0) {
4546 input_codename =
"";
4551 get_guessed_code(
void)
4553 if (input_codename && !*input_codename) {
4554 input_codename =
"BINARY";
4556 struct input_code *p = find_inputcode_byfunc(iconv);
4557 if (!input_codename) {
4558 input_codename =
"ASCII";
4559 }
else if (strcmp(input_codename,
"Shift_JIS") == 0) {
4561 input_codename =
"CP932";
4562 }
else if (strcmp(input_codename,
"EUC-JP") == 0) {
4564 input_codename =
"EUC-JIS-2004";
4566 input_codename =
"EUCJP-MS";
4568 input_codename =
"CP51932";
4569 }
else if (strcmp(input_codename,
"ISO-2022-JP") == 0) {
4571 input_codename =
"CP50221";
4573 input_codename =
"CP50220";
4576 return input_codename;
4579 #if !defined(PERL_XS) && !defined(WIN32DLL) 4581 print_guessed_code(
char *filename)
4583 if (filename !=
NULL) printf(
"%s: ", filename);
4584 if (input_codename && !*input_codename) {
4587 input_codename = get_guessed_code();
4589 printf(
"%s\n", input_codename);
4591 printf(
"%s%s%s%s\n",
4593 iconv != w_iconv16 && iconv != w_iconv32 ?
"" :
4597 input_bom_f ?
" (BOM)" :
"",
4598 input_eol ==
CR ?
" (CR)" :
4599 input_eol ==
LF ?
" (LF)" :
4600 input_eol ==
CRLF ?
" (CRLF)" :
4601 input_eol ==
EOF ?
" (MIXED NL)" :
4635 return hex_getc(
':', f, i_cgetc, i_cungetc);
4641 return (*i_cungetc)(c,
f);
4647 return hex_getc(
'%', f, i_ugetc, i_uungetc);
4653 return (*i_uungetc)(c,
f);
4657 #ifdef NUMCHAR_OPTION 4659 numchar_getc(
FILE *f)
4673 if (buf[i] ==
'x' || buf[i] ==
'X'){
4674 for (j = 0; j < 7; j++){
4686 for (j = 0; j < 8; j++){
4715 return (*i_nungetc)(c,
f);
4719 #ifdef UNICODE_NORMALIZATION 4727 const unsigned char *array;
4728 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4731 if (c ==
EOF || c > 0xFF || (c & 0xc0) == 0x80)
return c;
4733 nkf_buf_push(buf, c);
4735 while (lower <= upper) {
4736 int mid = (lower+upper) / 2;
4739 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[
len]; len++) {
4744 lower = 1, upper = 0;
4747 nkf_buf_push(buf, c);
4749 if (array[len] != nkf_buf_at(buf, len)) {
4750 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4751 else upper = mid - 1;
4760 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4761 nkf_buf_push(buf, array[i]);
4765 }
while (lower <= upper);
4768 c = nkf_buf_pop(buf);
4776 return (*i_nfc_ungetc)(c,
f);
4788 }
else if (c ==
'_') {
4793 }
else if (c >
'/') {
4795 }
else if (c ==
'+' || c ==
'-') {
4807 nkf_char t1, t2, t3, t4, mode, exit_mode;
4813 if (mime_input_state.top != mime_input_state.last) {
4816 if (mime_decode_mode==1 ||mime_decode_mode==
FALSE) {
4817 mime_decode_mode=
FALSE;
4818 unswitch_mime_getc();
4819 return (*i_getc)(
f);
4823 exit_mode = mime_decode_mode;
4826 if (mime_decode_mode ==
'Q') {
4827 if ((c1 = (*i_mgetc)(f)) ==
EOF)
return (
EOF);
4830 if (c1<=
SP ||
DEL<=c1) {
4831 mime_decode_mode = exit_mode;
4834 if (c1!=
'=' && (c1!=
'?' || mimebuf_f ==
FIXED_MIME)) {
4838 mime_decode_mode = exit_mode;
4839 if ((c2 = (*i_mgetc)(f)) ==
EOF)
return (
EOF);
4840 if (c1==
'?'&&c2==
'=' && mimebuf_f !=
FIXED_MIME) {
4842 input_mode = exit_mode;
4844 lwsp_buf = nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4845 while ((c1=(*i_getc)(f))!=
EOF) {
4858 if ((c1=(*i_getc)(f))!=
EOF && c1 ==
LF) {
4874 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4875 if (lwsp_count++>lwsp_size){
4877 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4878 lwsp_buf = lwsp_buf_new;
4884 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4886 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4887 i_ungetc(lwsp_buf[lwsp_count],f);
4893 if (c1==
'='&&c2<
SP) {
4894 while((c1 = (*i_mgetc)(f)) <=
SP) {
4895 if (c1 ==
EOF)
return (
EOF);
4897 mime_decode_mode =
'Q';
4898 goto restart_mime_q;
4901 mime_decode_mode =
'Q';
4905 if ((c3 = (*i_mgetc)(f)) ==
EOF)
return (
EOF);
4906 if (c2<=
SP)
return c2;
4907 mime_decode_mode =
'Q';
4911 if (mime_decode_mode !=
'B') {
4912 mime_decode_mode =
FALSE;
4913 return (*i_mgetc)(
f);
4925 mode = mime_decode_mode;
4926 mime_decode_mode = exit_mode;
4928 while ((c1 = (*i_mgetc)(
f))<=
SP) {
4933 if ((c2 = (*i_mgetc)(
f))<=
SP) {
4940 if ((c1 ==
'?') && (c2 ==
'=')) {
4943 lwsp_buf = nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4944 while ((c1=(*i_getc)(f))!=
EOF) {
4957 if ((c1=(*i_getc)(f))!=
EOF) {
4976 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4977 if (lwsp_count++>lwsp_size){
4979 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4980 lwsp_buf = lwsp_buf_new;
4986 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4988 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4989 i_ungetc(lwsp_buf[lwsp_count],f);
4996 if ((c3 = (*i_mgetc)(f))<=
SP) {
5004 if ((c4 = (*i_mgetc)(f))<=
SP) {
5012 mime_decode_mode = mode;
5016 t1 = 0x3f & base64decode(c1);
5017 t2 = 0x3f & base64decode(c2);
5018 t3 = 0x3f & base64decode(c3);
5019 t4 = 0x3f & base64decode(c4);
5020 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5023 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5026 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5036 static const char basis_64[] =
5037 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5039 #define MIMEOUT_BUF_LENGTH 74 5050 const unsigned char *p;
5053 p = mime_pattern[0];
5054 for(i=0;mime_pattern[i];i++) {
5055 if (mode == mime_encode[i]) {
5056 p = mime_pattern[i];
5060 mimeout_mode = mime_encode_method[i];
5062 if (base64_count>45) {
5063 if (mimeout_state.count>0 &&
nkf_isblank(mimeout_state.buf[i])){
5064 (*o_mputc)(mimeout_state.buf[i]);
5067 put_newline(o_mputc);
5070 if (mimeout_state.count>0 &&
nkf_isspace(mimeout_state.buf[i])) {
5074 for (;i<mimeout_state.count;i++) {
5076 (*o_mputc)(mimeout_state.buf[i]);
5086 j = mimeout_state.count;
5087 mimeout_state.count = 0;
5089 mime_putc(mimeout_state.buf[i]);
5096 if (mimeout_mode > 0){
5098 if (base64_count + mimeout_state.count/3*4> 73){
5099 (*o_base64conv)(
EOF,0);
5100 oconv_newline(o_base64conv);
5101 (*o_base64conv)(0,
SP);
5105 if ((c2 != 0 || c1 >
DEL) && base64_count + mimeout_state.count/3*4> 66) {
5106 (*o_base64conv)(
EOF,0);
5107 oconv_newline(o_base64conv);
5108 (*o_base64conv)(0,
SP);
5114 if (c2 !=
EOF && base64_count + mimeout_state.count/3*4> 60) {
5115 mimeout_mode = (output_mode==
ASCII ||output_mode ==
ISO_8859_1) ?
'Q' :
'B';
5116 open_mime(output_mode);
5117 (*o_base64conv)(
EOF,0);
5118 oconv_newline(o_base64conv);
5119 (*o_base64conv)(0,
SP);
5138 switch(mimeout_mode) {
5143 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4)]);
5149 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2)]);
5154 if (mimeout_mode > 0) {
5157 }
else if (mimeout_mode !=
'Q')
5165 switch(mimeout_mode) {
5172 (*o_mputc)(
bin2hex(((c>>4)&0xf)));
5182 (*o_mputc)(basis_64[c>>2]);
5187 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5193 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5194 (*o_mputc)(basis_64[c & 0x3F]);
5212 if (mimeout_mode ==
'Q'){
5213 if (base64_count > 71){
5214 if (c!=
CR && c!=
LF) {
5216 put_newline(o_mputc);
5221 if (base64_count > 71){
5223 put_newline(o_mputc);
5239 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5240 j = mimeout_state.count;
5241 mimeout_state.count = 0;
5243 if (mimeout_mode > 0) {
5246 if (
nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5249 mimeout_addchar(mimeout_state.buf[i]);
5253 mimeout_addchar(mimeout_state.buf[i]);
5257 mimeout_addchar(mimeout_state.buf[i]);
5263 mimeout_addchar(mimeout_state.buf[i]);
5269 if (mimeout_state.count > 0){
5270 lastchar = mimeout_state.buf[mimeout_state.count - 1];
5275 if (mimeout_mode==
'Q') {
5277 if (c ==
CR || c ==
LF) {
5282 }
else if (c <=
SP) {
5284 if (base64_count > 70) {
5285 put_newline(o_mputc);
5293 if (base64_count > 70) {
5295 put_newline(o_mputc);
5298 open_mime(output_mode);
5313 if (mimeout_mode <= 0) {
5315 output_mode ==
UTF_8)) {
5318 if (mimeout_mode == -1) {
5321 if (c==
CR || c==
LF) {
5323 open_mime(output_mode);
5329 for (i=0;i<mimeout_state.count;i++) {
5330 (*o_mputc)(mimeout_state.buf[i]);
5331 if (mimeout_state.buf[i] ==
CR || mimeout_state.buf[i] ==
LF){
5342 mimeout_state.buf[0] = (char)c;
5343 mimeout_state.count = 1;
5345 if (base64_count > 1
5346 && base64_count + mimeout_state.count > 76
5347 && mimeout_state.buf[0] !=
CR && mimeout_state.buf[0] !=
LF){
5348 static const char *str =
"boundary=\"";
5349 static int len = 10;
5352 for (; i < mimeout_state.count -
len; ++i) {
5353 if (!strncmp((
char *)(mimeout_state.buf+i), str, len)) {
5359 if (i == 0 || i == mimeout_state.count - len) {
5360 put_newline(o_mputc);
5369 for (j = 0; j <= i; ++j) {
5370 (*o_mputc)(mimeout_state.buf[j]);
5372 put_newline(o_mputc);
5374 for (; j <= mimeout_state.count; ++j) {
5375 mimeout_state.buf[j - i] = mimeout_state.buf[j];
5377 mimeout_state.count -= i;
5380 mimeout_state.buf[mimeout_state.count++] = (char)c;
5382 open_mime(output_mode);
5387 if (lastchar==
CR || lastchar ==
LF){
5388 for (i=0;i<mimeout_state.count;i++) {
5389 (*o_mputc)(mimeout_state.buf[i]);
5392 mimeout_state.count = 0;
5395 for (i=0;i<mimeout_state.count-1;i++) {
5396 (*o_mputc)(mimeout_state.buf[i]);
5399 mimeout_state.buf[0] =
SP;
5400 mimeout_state.count = 1;
5402 open_mime(output_mode);
5407 output_mode ==
UTF_8)) {
5408 if (lastchar ==
CR || lastchar ==
LF){
5410 for (i=0;i<mimeout_state.count;i++) {
5411 mimeout_addchar(mimeout_state.buf[i]);
5413 mimeout_state.count = 0;
5416 for (i=0;i<mimeout_state.count;i++) {
5417 (*o_mputc)(mimeout_state.buf[i]);
5420 mimeout_state.count = 0;
5422 mimeout_state.buf[mimeout_state.count++] = (char)c;
5426 for (i=0;i<mimeout_state.count;i++) {
5427 if (
SP<mimeout_state.buf[i] && mimeout_state.buf[i]<
DEL) {
5429 for (i=0;i<mimeout_state.count;i++) {
5430 (*o_mputc)(mimeout_state.buf[i]);
5433 mimeout_state.count = 0;
5436 mimeout_state.buf[mimeout_state.count++] = (char)c;
5439 for (j=0;j<mimeout_state.count;j++) {
5440 (*o_mputc)(mimeout_state.buf[j]);
5443 mimeout_state.count = 0;
5447 if (mimeout_state.count>0 &&
SP<c && c!=
'=') {
5448 mimeout_state.buf[mimeout_state.count++] = (char)c;
5450 j = mimeout_state.count;
5451 mimeout_state.count = 0;
5453 mimeout_addchar(mimeout_state.buf[i]);
5460 if (mimeout_state.count>0) {
5461 j = mimeout_state.count;
5462 mimeout_state.count = 0;
5464 if (mimeout_state.buf[i]==
CR || mimeout_state.buf[i]==
LF)
5466 mimeout_addchar(mimeout_state.buf[i]);
5472 (*o_mputc)(mimeout_state.buf[i]);
5474 open_mime(output_mode);
5483 mime_prechar(c2, c1);
5484 (*o_base64conv)(c2,c1);
5488 typedef struct nkf_iconv_t {
5491 size_t input_buffer_size;
5492 char *output_buffer;
5493 size_t output_buffer_size;
5497 nkf_iconv_new(
char *tocode,
char *fromcode)
5499 nkf_iconv_t converter;
5502 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5503 converter->output_buffer_size =
IOBUF_SIZE * 2;
5504 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5505 converter->cd = iconv_open(tocode, fromcode);
5506 if (converter->cd == (iconv_t)-1)
5510 perror(fprintf(
"iconv doesn't support %s to %s conversion.", fromcode, tocode));
5513 perror(
"can't iconv_open");
5519 nkf_iconv_convert(nkf_iconv_t *converter,
FILE *
input)
5521 size_t invalid = (size_t)0;
5522 char *input_buffer = converter->input_buffer;
5523 size_t input_length = (size_t)0;
5524 char *output_buffer = converter->output_buffer;
5525 size_t output_length = converter->output_buffer_size;
5530 while ((c = (*i_getc)(
f)) !=
EOF) {
5531 input_buffer[input_length++] = c;
5532 if (input_length < converter->input_buffer_size)
break;
5536 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5537 while (output_length-- > 0) {
5538 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5540 if (ret == (
size_t) - 1) {
5543 if (input_buffer != converter->input_buffer)
5544 memmove(converter->input_buffer, input_buffer, input_length);
5547 converter->output_buffer_size *= 2;
5548 output_buffer =
realloc(converter->outbuf, converter->output_buffer_size);
5549 if (output_buffer ==
NULL) {
5550 perror(
"can't realloc");
5553 converter->output_buffer = output_buffer;
5556 perror(
"can't iconv");
5569 nkf_iconv_close(nkf_iconv_t *convert)
5573 iconv_close(converter->cd);
5595 mime_decode_f =
FALSE;
5601 iso2022jp_f =
FALSE;
5602 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 5605 #ifdef UTF8_INPUT_ENABLE 5606 no_cp932ext_f =
FALSE;
5607 no_best_fit_chars_f =
FALSE;
5608 encode_fallback =
NULL;
5609 unicode_subchar =
'?';
5612 #ifdef UTF8_OUTPUT_ENABLE 5613 output_bom_f =
FALSE;
5616 #ifdef UNICODE_NORMALIZATION 5632 #ifdef SHIFTJIS_CP932 5642 for (i = 0; i < 256; i++){
5643 prefix_table[i] = 0;
5647 mimeout_state.count = 0;
5652 fold_preserve_f =
FALSE;
5658 o_zconv = no_connection;
5659 o_fconv = no_connection;
5660 o_eol_conv = no_connection;
5661 o_rot_conv = no_connection;
5662 o_hira_conv = no_connection;
5663 o_base64conv = no_connection;
5664 o_iso2022jp_check_conv = no_connection;
5667 i_ungetc = std_ungetc;
5669 i_bungetc = std_ungetc;
5672 i_mungetc = std_ungetc;
5673 i_mgetc_buf = std_getc;
5674 i_mungetc_buf = std_ungetc;
5675 output_mode =
ASCII;
5677 mime_decode_mode =
FALSE;
5683 z_prev2=0,z_prev1=0;
5685 iconv_for_check = 0;
5687 input_codename =
NULL;
5688 input_encoding =
NULL;
5689 output_encoding =
NULL;
5697 module_connection(
void)
5699 if (input_encoding) set_input_encoding(input_encoding);
5700 if (!output_encoding) {
5701 output_encoding = nkf_default_encoding();
5703 if (!output_encoding) {
5704 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(
ISO_2022_JP);
5707 set_output_encoding(output_encoding);
5711 output_mode =
UTF_8;
5721 if (noout_f || guess_f){
5728 if (mimeout_f ==
TRUE) {
5729 o_base64conv = oconv; oconv = base64_conv;
5734 if (eolmode_f || guess_f) {
5735 o_eol_conv = oconv; oconv = eol_conv;
5738 o_rot_conv = oconv; oconv = rot_conv;
5741 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5744 o_hira_conv = oconv; oconv = hira_conv;
5747 o_fconv = oconv; oconv = fold_conv;
5750 if (alpha_f || x0201_f) {
5751 o_zconv = oconv; oconv = z_conv;
5755 i_ungetc = std_ungetc;
5759 i_cgetc = i_getc; i_getc = cap_getc;
5760 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5763 i_ugetc = i_getc; i_getc = url_getc;
5764 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5767 #ifdef NUMCHAR_OPTION 5769 i_ngetc = i_getc; i_getc = numchar_getc;
5770 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5773 #ifdef UNICODE_NORMALIZATION 5775 i_nfc_getc = i_getc; i_getc = nfc_getc;
5776 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5780 i_mgetc = i_getc; i_getc = mime_getc;
5781 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5784 i_bgetc = i_getc; i_getc = broken_getc;
5785 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5787 if (input_encoding) {
5790 set_iconv(
FALSE, e_iconv);
5806 #if !defined(PERL_XS) && !defined(WIN32DLL) 5813 module_connection();
5814 while ((c = (*i_getc)(f)) !=
EOF)
5821 #define NEXT continue 5822 #define SKIP c2=0;continue 5823 #define MORE c2=c1;continue 5824 #define SEND (void)0 5826 #define set_input_mode(mode) do { \ 5827 input_mode = mode; \ 5829 set_input_codename("ISO-2022-JP"); \ 5830 debug("ISO-2022-JP"); \ 5834 kanji_convert(
FILE *f)
5839 int is_8bit =
FALSE;
5846 output_mode =
ASCII;
5848 if (module_connection() < 0) {
5849 #if !defined(PERL_XS) && !defined(WIN32DLL) 5850 fprintf(stderr,
"no output encoding given\n");
5856 #ifdef UTF8_INPUT_ENABLE 5857 if(iconv == w_iconv32){
5858 while ((c1 = (*i_getc)(f)) !=
EOF &&
5859 (c2 = (*i_getc)(f)) !=
EOF &&
5860 (c3 = (*i_getc)(f)) !=
EOF &&
5861 (c4 = (*i_getc)(f)) !=
EOF) {
5864 if ((c5 = (*i_getc)(f)) !=
EOF &&
5865 (c6 = (*i_getc)(f)) !=
EOF &&
5866 (c7 = (*i_getc)(f)) !=
EOF &&
5867 (c8 = (*i_getc)(f)) !=
EOF) {
5868 if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5873 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5876 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5882 else if (iconv == w_iconv16) {
5883 while ((c1 = (*i_getc)(f)) !=
EOF &&
5884 (c2 = (*i_getc)(f)) !=
EOF) {
5885 size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5887 (c3 = (*i_getc)(f)) !=
EOF &&
5888 (c4 = (*i_getc)(f)) !=
EOF) {
5889 nkf_iconv_utf_16(c1, c2, c3, c4);
5891 if ((c3 = (*i_getc)(f)) !=
EOF &&
5892 (c4 = (*i_getc)(f)) !=
EOF) {
5893 if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5896 nkf_iconv_utf_16_nocombine(c1, c2);
5899 nkf_iconv_utf_16_nocombine(c1, c2);
5907 while ((c1 = (*i_getc)(f)) !=
EOF) {
5908 #ifdef INPUT_CODE_FIX 5909 if (!input_encoding)
5916 if (!estab_f&&!mime_decode_mode) {
5919 if (h_conv(f, c2, c1)==
EOF) {
5950 }
else if (input_codename && input_codename[0] ==
'I' &&
5951 0xA1 <= c1 && c1 <= 0xDF) {
5956 }
else if (c1 >
DEL) {
5958 if (!estab_f && !iso8859_f) {
5967 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5979 }
else if (
SP < c1 && c1 <
DEL) {
5998 }
else if (c1 ==
'=' && mime_f && !mime_decode_mode) {
6000 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6003 }
else if (c1 ==
'?') {
6007 if (mime_begin_strict(f) ==
EOF)
6010 }
else if (mime_begin(f) ==
EOF)
6022 }
else if (c1 ==
SI && (!is_8bit || mime_decode_mode)) {
6025 }
else if (c1 ==
SO && (!is_8bit || mime_decode_mode)) {
6028 }
else if (c1 ==
ESC && (!is_8bit || mime_decode_mode)) {
6029 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6033 else if (c1 ==
'&') {
6035 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6041 else if (c1 ==
'$') {
6043 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6048 }
else if (c1 ==
'@' || c1 ==
'B') {
6052 }
else if (c1 ==
'(') {
6054 if ((c1 = (*i_getc)(f)) ==
EOF) {
6061 }
else if (c1 ==
'@'|| c1 ==
'B') {
6066 }
else if (c1 ==
'D'){
6070 }
else if (c1 ==
'O' || c1 ==
'Q'){
6073 }
else if (c1 ==
'P'){
6084 }
else if (broken_f&0x2) {
6095 }
else if (c1 ==
'(') {
6097 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6103 else if (c1 ==
'I') {
6109 else if (c1 ==
'B' || c1 ==
'J' || c1 ==
'H') {
6114 else if (broken_f&0x2) {
6124 else if (c1 ==
'.') {
6126 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6129 else if (c1 ==
'A') {
6140 else if (c1 ==
'N') {
6159 }
else if (c1 ==
ESC && iconv == s_iconv) {
6161 if ((c1 = (*i_getc)(f)) ==
EOF) {
6164 }
else if (c1 ==
'$') {
6166 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6168 }
else if ((
'E' <= c1 && c1 <=
'G') ||
6169 (
'O' <= c1 && c1 <=
'Q')) {
6177 static const nkf_char jphone_emoji_first_table[7] =
6178 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6180 if ((c1 = (*i_getc)(f)) ==
EOF)
LAST;
6181 while (
SP <= c1 && c1 <=
'z') {
6182 (*oconv)(0, c1 + c3);
6183 if ((c1 = (*i_getc)(f)) ==
EOF)
LAST;
6199 }
else if (c1 ==
LF || c1 ==
CR) {
6203 }
else if (mime_decode_f && !mime_decode_mode){
6205 if ((c1=(*i_getc)(f))!=
EOF && c1 ==
SP) {
6214 if ((c1=(*i_getc)(f))!=
EOF) {
6218 }
else if (c1 ==
LF && (c1=(*i_getc)(f))!=
EOF && c1 ==
SP) {
6238 switch ((*iconv)(c2, c1, 0)) {
6241 if ((c3 = (*i_getc)(f)) !=
EOF) {
6244 if ((c4 = (*i_getc)(f)) !=
EOF) {
6246 (*iconv)(c2, c1, c3|c4);
6252 if ((c3 = (*i_getc)(f)) !=
EOF) {
6253 if ((c4 = (*i_getc)(f)) !=
EOF) {
6254 if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6257 w_iconv_nocombine(c2, c1, 0);
6261 w_iconv_nocombine(c2, c1, 0);
6264 w_iconv_nocombine(c2, c1, 0);
6269 if ((c3 = (*i_getc)(f)) !=
EOF) {
6271 if ((*iconv)(c2, c1, c3) == -3) {
6274 if ((c4 = (*i_getc)(f)) !=
EOF) {
6275 if ((c5 = (*i_getc)(f)) !=
EOF) {
6276 if ((c6 = (*i_getc)(f)) !=
EOF) {
6277 if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6281 w_iconv_nocombine(c2, c1, c3);
6286 w_iconv_nocombine(c2, c1, c3);
6290 w_iconv_nocombine(c2, c1, c3);
6293 w_iconv_nocombine(c2, c1, c3);
6303 0x7F <= c2 && c2 <= 0x92 &&
6304 0x21 <= c1 && c1 <= 0x7E) {
6320 (*oconv)(input_mode, c1);
6331 (*iconv)(
EOF, 0, 0);
6332 if (!input_codename)
6341 set_input_codename(result->
name);
6358 options(
unsigned char *cp)
6362 unsigned char *cp_back =
NULL;
6367 while(*cp && *cp++!=
'-');
6368 while (*cp || cp_back) {
6377 if (!*cp || *cp ==
SP) {
6381 for (i=0;i<(int)(
sizeof(long_option)/
sizeof(long_option[0]));i++) {
6382 p = (
unsigned char *)long_option[i].
name;
6383 for (j=0;*p && *p !=
'=' && *p == cp[j];p++, j++);
6384 if (*p == cp[j] || cp[j] ==
SP){
6391 #if !defined(PERL_XS) && !defined(WIN32DLL) 6392 fprintf(stderr,
"unknown long option: --%s\n", cp);
6396 while(*cp && *cp !=
SP && cp++);
6397 if (long_option[i].
alias[0]){
6399 cp = (
unsigned char *)long_option[i].alias;
6402 if (strcmp(long_option[i].
name,
"help") == 0){
6407 if (strcmp(long_option[i].name,
"ic=") == 0){
6408 enc = nkf_enc_find((
char *)p);
6410 input_encoding = enc;
6413 if (strcmp(long_option[i].name,
"oc=") == 0){
6414 enc = nkf_enc_find((
char *)p);
6417 output_encoding = enc;
6420 if (strcmp(long_option[i].name,
"guess=") == 0){
6421 if (p[0] ==
'0' || p[0] ==
'1') {
6429 if (strcmp(long_option[i].name,
"overwrite") == 0){
6432 preserve_time_f =
TRUE;
6435 if (strcmp(long_option[i].name,
"overwrite=") == 0){
6438 preserve_time_f =
TRUE;
6440 backup_suffix = (
char *)p;
6443 if (strcmp(long_option[i].name,
"in-place") == 0){
6446 preserve_time_f =
FALSE;
6449 if (strcmp(long_option[i].name,
"in-place=") == 0){
6452 preserve_time_f =
FALSE;
6454 backup_suffix = (
char *)p;
6459 if (strcmp(long_option[i].name,
"cap-input") == 0){
6463 if (strcmp(long_option[i].name,
"url-input") == 0){
6468 #ifdef NUMCHAR_OPTION 6469 if (strcmp(long_option[i].name,
"numchar-input") == 0){
6475 if (strcmp(long_option[i].name,
"no-output") == 0){
6479 if (strcmp(long_option[i].name,
"debug") == 0){
6484 if (strcmp(long_option[i].name,
"cp932") == 0){
6485 #ifdef SHIFTJIS_CP932 6489 #ifdef UTF8_OUTPUT_ENABLE 6494 if (strcmp(long_option[i].name,
"no-cp932") == 0){
6495 #ifdef SHIFTJIS_CP932 6499 #ifdef UTF8_OUTPUT_ENABLE 6504 #ifdef SHIFTJIS_CP932 6505 if (strcmp(long_option[i].name,
"cp932inv") == 0){
6512 if (strcmp(long_option[i].name,
"x0212") == 0){
6519 if (strcmp(long_option[i].name,
"exec-in") == 0){
6523 if (strcmp(long_option[i].name,
"exec-out") == 0){
6528 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE) 6529 if (strcmp(long_option[i].name,
"no-cp932ext") == 0){
6530 no_cp932ext_f =
TRUE;
6533 if (strcmp(long_option[i].name,
"no-best-fit-chars") == 0){
6534 no_best_fit_chars_f =
TRUE;
6537 if (strcmp(long_option[i].name,
"fb-skip") == 0){
6538 encode_fallback =
NULL;
6541 if (strcmp(long_option[i].name,
"fb-html") == 0){
6542 encode_fallback = encode_fallback_html;
6545 if (strcmp(long_option[i].name,
"fb-xml") == 0){
6546 encode_fallback = encode_fallback_xml;
6549 if (strcmp(long_option[i].name,
"fb-java") == 0){
6550 encode_fallback = encode_fallback_java;
6553 if (strcmp(long_option[i].name,
"fb-perl") == 0){
6554 encode_fallback = encode_fallback_perl;
6557 if (strcmp(long_option[i].name,
"fb-subchar") == 0){
6558 encode_fallback = encode_fallback_subchar;
6561 if (strcmp(long_option[i].name,
"fb-subchar=") == 0){
6562 encode_fallback = encode_fallback_subchar;
6563 unicode_subchar = 0;
6567 unicode_subchar *= 10;
6568 unicode_subchar +=
hex2bin(p[i]);
6570 }
else if(p[1] ==
'x' || p[1] ==
'X'){
6573 unicode_subchar <<= 4;
6574 unicode_subchar |=
hex2bin(p[i]);
6579 unicode_subchar *= 8;
6580 unicode_subchar +=
hex2bin(p[i]);
6583 w16e_conv(unicode_subchar, &i, &j);
6584 unicode_subchar = i<<8 | j;
6588 #ifdef UTF8_OUTPUT_ENABLE 6589 if (strcmp(long_option[i].name,
"ms-ucs-map") == 0){
6594 #ifdef UNICODE_NORMALIZATION 6595 if (strcmp(long_option[i].name,
"utf8mac-input") == 0){
6600 if (strcmp(long_option[i].name,
"prefix=") == 0){
6603 prefix_table[p[i]] = p[0];
6608 #if !defined(PERL_XS) && !defined(WIN32DLL) 6609 fprintf(stderr,
"unsupported long option: --%s\n", long_option[i].name);
6625 }
else if (*cp==
'2') {
6639 output_encoding = nkf_enc_from_index(
ISO_2022_JP);
6642 output_encoding = nkf_enc_from_index(
EUCJP_NKF);
6645 output_encoding = nkf_enc_from_index(
SHIFT_JIS);
6649 input_encoding = nkf_enc_from_index(
ISO_8859_1);
6652 if (*cp==
'@'||*cp==
'B')
6653 kanji_intro = *cp++;
6657 if (*cp==
'J'||*cp==
'B'||*cp==
'H')
6658 ascii_intro = *cp++;
6665 if (
'9'>= *cp && *cp>=
'0')
6666 hira_f |= (*cp++ -
'0');
6673 #if defined(MSDOS) || defined(__OS2__) 6680 show_configuration();
6688 #ifdef UTF8_OUTPUT_ENABLE 6694 output_encoding = nkf_enc_from_index(
UTF_8N);
6696 output_bom_f =
TRUE;
6697 output_encoding = nkf_enc_from_index(
UTF_8_BOM);
6701 if (
'1'== cp[0] &&
'6'==cp[1]) {
6704 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6708 output_encoding = nkf_enc_from_index(
UTF_8);
6714 output_bom_f =
TRUE;
6715 }
else if (cp[0] ==
'B') {
6717 output_bom_f =
TRUE;
6720 output_bom_f =
FALSE;
6722 enc_idx = enc_idx ==
UTF_16 6726 enc_idx = enc_idx ==
UTF_16 6730 output_encoding = nkf_enc_from_index(enc_idx);
6734 #ifdef UTF8_INPUT_ENABLE 6738 input_encoding = nkf_enc_from_index(
UTF_8);
6741 if (
'1'== cp[0] &&
'6'==cp[1]) {
6745 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6750 input_encoding = nkf_enc_from_index(
UTF_8);
6756 }
else if (cp[0] ==
'B') {
6760 enc_idx = (enc_idx ==
UTF_16 6763 input_encoding = nkf_enc_from_index(enc_idx);
6772 input_encoding = nkf_enc_from_index(
EUCJP_NKF);
6775 input_encoding = nkf_enc_from_index(
SHIFT_JIS);
6785 while (
'0'<= *cp && *cp <=
'4') {
6786 alpha_f |= 1 << (*cp++ -
'0');
6807 fold_preserve_f =
TRUE;
6811 while(
'0'<= *cp && *cp <=
'9') {
6813 fold_len += *cp++ -
'0';
6815 if (!(0<fold_len && fold_len<BUFSIZ))
6820 while(
'0'<= *cp && *cp <=
'9') {
6822 fold_margin += *cp++ -
'0';
6828 if (*cp==
'B'||*cp==
'Q') {
6829 mime_decode_mode = *cp++;
6831 }
else if (*cp==
'N') {
6832 mime_f =
TRUE; cp++;
6833 }
else if (*cp==
'S') {
6835 }
else if (*cp==
'0') {
6836 mime_decode_f =
FALSE;
6837 mime_f =
FALSE; cp++;
6846 }
else if (*cp==
'Q') {
6858 if (
'9'>= *cp && *cp>=
'0')
6859 broken_f |= 1<<(*cp++ -
'0');
6879 eolmode_f =
LF; cp++;
6880 }
else if (*cp==
'm') {
6881 eolmode_f =
CR; cp++;
6882 }
else if (*cp==
'w') {
6883 eolmode_f =
CRLF; cp++;
6884 }
else if (*cp==
'0') {
6885 eolmode_f = 0; cp++;
6890 if (
'2' <= *cp && *cp <=
'9') {
6893 }
else if (*cp ==
'0' || *cp ==
'1') {
6903 while(*cp && *cp++!=
'-');
6906 #if !defined(PERL_XS) && !defined(WIN32DLL) 6907 fprintf(stderr,
"unknown option: -%c\n", *(cp-1));
6917 #include "nkf32dll.c" 6918 #elif defined(PERL_XS) 6926 char *outfname =
NULL;
6930 _BufferSize.y = 400;
6932 #ifdef DEFAULT_CODE_LOCALE 6933 setlocale(LC_CTYPE,
"");
6937 for (argc--,argv++; (argc > 0) && **argv ==
'-'; argc--, argv++) {
6938 cp = (
unsigned char *)*argv;
6943 if (pipe(fds) < 0 || (pid = fork()) < 0){
6954 execvp(argv[1], &argv[1]);
6971 int debug_f_back = debug_f;
6974 int exec_f_back = exec_f;
6977 int x0212_f_back = x0212_f;
6979 int x0213_f_back = x0213_f;
6980 int guess_f_back = guess_f;
6982 guess_f = guess_f_back;
6985 debug_f = debug_f_back;
6988 exec_f = exec_f_back;
6990 x0212_f = x0212_f_back;
6991 x0213_f = x0213_f_back;
6994 if (binmode_f ==
TRUE)
6995 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 6996 if (freopen(
"",
"wb",stdout) ==
NULL)
7003 setbuf(stdout, (
char *)
NULL);
7008 if (binmode_f ==
TRUE)
7009 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7010 if (freopen(
"",
"rb",stdin) == NULL)
return (-1);
7018 kanji_convert(stdin);
7019 if (guess_f) print_guessed_code(NULL);
7023 int is_argument_error =
FALSE;
7025 input_codename =
NULL;
7028 iconv_for_check = 0;
7030 if ((fin = fopen((origfname = *argv++),
"r")) == NULL) {
7032 is_argument_error =
TRUE;
7041 if (file_out_f ==
TRUE) {
7044 outfname = nkf_xmalloc(
strlen(origfname)
7045 +
strlen(
".nkftmpXXXXXX")
7047 strcpy(outfname, origfname);
7051 for (i =
strlen(outfname); i; --i){
7052 if (outfname[i - 1] ==
'/' 7053 || outfname[i - 1] ==
'\\'){
7059 strcat(outfname,
"ntXXXXXX");
7061 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7062 S_IREAD | S_IWRITE);
7064 strcat(outfname,
".nkftmpXXXXXX");
7065 fd = mkstemp(outfname);
7068 || (fd_backup = dup(
fileno(stdout))) < 0
7080 outfname =
"nkf.out";
7083 if(freopen(outfname,
"w", stdout) == NULL) {
7087 if (binmode_f ==
TRUE) {
7088 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7089 if (freopen(
"",
"wb",stdout) == NULL)
7096 if (binmode_f ==
TRUE)
7097 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7098 if (freopen(
"",
"rb",fin) == NULL)
7107 char *filename =
NULL;
7109 if (nfiles > 1) filename = origfname;
7110 if (guess_f) print_guessed_code(filename);
7116 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7127 if (
stat(origfname, &sb)) {
7128 fprintf(stderr,
"Can't stat %s\n", origfname);
7131 if (chmod(outfname, sb.st_mode)) {
7132 fprintf(stderr,
"Can't set permission %s\n", outfname);
7136 if(preserve_time_f){
7137 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7138 tb[0] = tb[1] = sb.st_mtime;
7139 if (
utime(outfname, tb)) {
7140 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7145 if (
utime(outfname, &tb)) {
7146 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7151 char *backup_filename = get_backup_filename(backup_suffix, origfname);
7153 unlink(backup_filename);
7155 if (rename(origfname, backup_filename)) {
7156 perror(backup_filename);
7157 fprintf(stderr,
"Can't rename %s to %s\n",
7158 origfname, backup_filename);
7163 if (unlink(origfname)){
7168 if (rename(outfname, origfname)) {
7170 fprintf(stderr,
"Can't rename %s to %s\n",
7171 outfname, origfname);
7178 if (is_argument_error)
7182 if (file_out_f ==
FALSE)
7183 scanf(
"%d",&end_check);
7187 if (file_out_f ==
TRUE)
#define nkf_char_unicode_new(c)
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
#define OUTPUT_UTF16(val)
#define output_ascii_escape_sequence(mode)
#define OUTPUT_UTF16_BYTES(c1, c2)
nkf_native_encoding NkfEncodingUTF_32
#define NKF_ICONV_INVALID_CODE_RANGE
const unsigned short *const x0212_shiftjis[]
size_t strlen(const char *)
#define NKF_ICONV_WAIT_COMBINING_CHAR
const unsigned short *const *const utf8_to_euc_3bytes_932[]
#define nkf_enc_asciicompat(enc)
#define nkf_enc_name(enc)
nkf_native_encoding NkfEncodingASCII
const unsigned short *const utf8_to_euc_2bytes_932[]
#define nkf_enc_to_iconv(enc)
#define UTF8_INPUT_ENABLE
nkf_encoding nkf_encoding_table[]
const unsigned short cp932inv[2][189]
#define nkf_char_unicode_p(c)
#define UTF16_TO_UTF32(lead, trail)
#define nkf_char_unicode_value_p(c)
#define nkf_buf_length(buf)
const unsigned short *const euc_to_utf8_2bytes[]
#define nkf_noescape_mime(c)
#define nkf_char_unicode_bmp_p(c)
const nkf_native_encoding * base_encoding
#define DEFAULT_CODE_LOCALE
#define MIME_DECODE_DEFAULT
#define NKF_ICONV_NOT_COMBINED
RUBY_EXTERN void * memmove(void *, const void *, size_t)
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
const unsigned short euc_to_utf8_1byte[]
#define UTF8_OUTPUT_ENABLE
struct input_code input_code_list[]
#define NKF_ICONV_NEED_TWO_MORE_BYTES
#define nkf_enc_unicode_p(enc)
#define nkf_buf_empty_p(buf)
nkf_native_encoding NkfEncodingISO_2022_JP
#define set_input_mode(mode)
const unsigned short *const euc_to_utf8_2bytes_ms[]
#define range(low, item, hi)
#define is_ibmext_in_sjis(c2)
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
struct @39 encoding_name_to_id_table[]
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
#define mime_input_buf(n)
#define nkf_enc_to_index(enc)
nkf_native_encoding NkfEncodingUTF_16
const unsigned short *const utf8_to_euc_2bytes_ms[]
#define nkf_byte_jisx0201_katakana_p(c)
register unsigned int len
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
const unsigned short *const x0212_to_utf8_2bytes[]
const unsigned short *const euc_to_utf8_2bytes_x0213[]
#define X0213_SURROGATE_FIND(tbl, size, euc)
const unsigned short shiftjis_x0212[3][189]
#define setvbuffer(fp, buf, size)
int utime(const char *filename, const struct utimbuf *times)
const struct normalization_pair normalization_table[]
const unsigned short *const euc_to_utf8_2bytes_mac[]
const unsigned short *const utf8_to_euc_2bytes[]
const unsigned short shiftjis_cp932[3][189]
#define char_size(c2, c1)
nkf_native_encoding NkfEncodingShift_JIS
#define nkf_enc_cp5022x_p(enc)
int main(int argc, char **argv)
RUBY_EXTERN int dup2(int, int)
const unsigned short *const *const utf8_to_euc_3bytes[]
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
const unsigned short *const utf8_to_euc_2bytes_mac[]
#define nkf_enc_to_oconv(enc)
#define MIMEOUT_BUF_LENGTH
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
nkf_native_encoding NkfEncodingUTF_8
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
const unsigned short *const utf8_to_euc_2bytes_x0213[]
nkf_native_encoding NkfEncodingEUC_JP