23 #define NKF_VERSION "2.1.2"
24 #define NKF_RELEASE_DATE "2011-09-08"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2011, The nkf Project."
38 # define INCL_DOSERRORS
168 {
ASCII,
"US-ASCII", &NkfEncodingASCII},
169 {
ISO_8859_1,
"ISO-8859-1", &NkfEncodingASCII},
170 {
ISO_2022_JP,
"ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {
CP50220,
"CP50220", &NkfEncodingISO_2022_JP},
172 {
CP50221,
"CP50221", &NkfEncodingISO_2022_JP},
173 {
CP50222,
"CP50222", &NkfEncodingISO_2022_JP},
177 {
SHIFT_JIS,
"Shift_JIS", &NkfEncodingShift_JIS},
178 {
WINDOWS_31J,
"Windows-31J", &NkfEncodingShift_JIS},
179 {
CP10001,
"CP10001", &NkfEncodingShift_JIS},
180 {
EUC_JP,
"EUC-JP", &NkfEncodingEUC_JP},
181 {
EUCJP_NKF,
"eucJP-nkf", &NkfEncodingEUC_JP},
182 {
CP51932,
"CP51932", &NkfEncodingEUC_JP},
183 {
EUCJP_MS,
"eucJP-MS", &NkfEncodingEUC_JP},
189 {
UTF_8,
"UTF-8", &NkfEncodingUTF_8},
190 {
UTF_8N,
"UTF-8N", &NkfEncodingUTF_8},
191 {
UTF_8_BOM,
"UTF-8-BOM", &NkfEncodingUTF_8},
192 {
UTF8_MAC,
"UTF8-MAC", &NkfEncodingUTF_8},
193 {
UTF_16,
"UTF-16", &NkfEncodingUTF_16},
194 {
UTF_16BE,
"UTF-16BE", &NkfEncodingUTF_16},
196 {
UTF_16LE,
"UTF-16LE", &NkfEncodingUTF_16},
198 {
UTF_32,
"UTF-32", &NkfEncodingUTF_32},
199 {
UTF_32BE,
"UTF-32BE", &NkfEncodingUTF_32},
201 {
UTF_32LE,
"UTF-32LE", &NkfEncodingUTF_32},
203 {
BINARY,
"BINARY", &NkfEncodingASCII},
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
278 #define is_alnum(c) \
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
308 #define IOBUF_SIZE 16384
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
322 extern POINT _BufferSize;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
353 #ifdef UTF8_INPUT_ENABLE
363 #ifdef UTF8_OUTPUT_ENABLE
381 #if !defined(PERL_XS) && !defined(WIN32DLL)
386 #define NKF_UNSPECIFIED (-TRUE)
405 #ifdef UNICODE_NORMALIZATION
421 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
422 #define CLASS_MASK NKF_INT32_C(0xFF000000)
423 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
424 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
426 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 #ifdef NUMCHAR_OPTION
451 static int exec_f = 0;
454 #ifdef SHIFTJIS_CP932
473 {
"EUC-JP", 0, 0, 0, {0, 0, 0},
e_status,
e_iconv, 0},
474 {
"Shift_JIS", 0, 0, 0, {0, 0, 0},
s_status,
s_iconv, 0},
475 #ifdef UTF8_INPUT_ENABLE
476 {
"UTF-8", 0, 0, 0, {0, 0, 0},
w_status,
w_iconv, 0},
477 {
"UTF-16", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv16, 0},
478 {
"UTF-32", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv32, 0},
501 #define FOLD_MARGIN 10
502 #define DEFAULT_FOLD 60
511 fprintf(stderr,
"nkf internal module connection failure.\n");
561 static const unsigned char cv[]= {
562 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
563 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
564 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
565 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
566 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
567 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
568 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
569 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
570 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
571 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
572 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
573 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
574 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
575 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
576 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
577 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
583 static const unsigned char dv[]= {
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
589 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
590 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
591 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
592 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
593 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
595 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 static const unsigned char ev[]= {
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
616 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
626 static const unsigned char fv[] = {
628 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
629 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
630 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
632 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
633 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
634 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
636 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
657 static int end_check;
665 if (size == 0) size = 1;
669 perror(
"can't malloc");
679 if (size == 0) size = 1;
683 perror(
"can't realloc");
690 #define nkf_xfree(ptr) free(ptr)
696 for (i = 0; src[
i] && target[
i]; i++) {
699 if (src[i] || target[i])
return FALSE;
709 return &nkf_encoding_table[
idx];
716 if (name[0] ==
'X' && *(name+1) ==
'-') name += 2;
730 if (idx < 0)
return 0;
734 #define nkf_enc_name(enc) (enc)->name
735 #define nkf_enc_to_index(enc) (enc)->id
736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
739 #define nkf_enc_asciicompat(enc) (\
740 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
741 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
742 #define nkf_enc_unicode_p(enc) (\
743 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
745 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
746 #define nkf_enc_cp5022x_p(enc) (\
747 nkf_enc_to_index(enc) == CP50220 ||\
748 nkf_enc_to_index(enc) == CP50221 ||\
749 nkf_enc_to_index(enc) == CP50222)
751 #ifdef DEFAULT_CODE_LOCALE
755 #ifdef HAVE_LANGINFO_H
756 return nl_langinfo(CODESET);
757 #elif defined(__WIN32__)
759 sprintf(buf,
"CP%d", GetACP());
761 #elif defined(__OS2__)
762 # if defined(INT_IS_SHORT)
768 ULONG ulCP[1], ulncp;
769 DosQueryCp(
sizeof(ulCP), ulCP, &ulncp);
770 if (ulCP[0] == 932 || ulCP[0] == 943)
773 sprintf(buf,
"CP%lu", ulCP[0]);
794 return &nkf_encoding_table[
UTF_8];
801 #ifdef DEFAULT_CODE_LOCALE
803 #elif defined(DEFAULT_ENCIDX)
835 #define nkf_buf_length(buf) ((buf)->len)
836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
864 return buf->
ptr[--buf->
len];
870 #define fprintf dllprintf
883 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
885 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
886 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
890 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
891 " UTF option is -W[8,[16,32][B,L]]\n"
893 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
897 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
898 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
899 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
902 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
903 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
904 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
905 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
908 " O Output to File (DEFAULT 'nkf.out')\n"
909 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
912 " --ic=<encoding> Specify the input encoding\n"
913 " --oc=<encoding> Specify the output encoding\n"
914 " --hiragana --katakana Hiragana/Katakana Conversion\n"
915 " --katakana-hiragana Converts each other\n"
919 " --{cap, url}-input Convert hex after ':' or '%%'\n"
922 " --numchar-input Convert Unicode Character Reference\n"
925 " --fb-{skip, html, xml, perl, java, subchar}\n"
926 " Specify unassigned character's replacement\n"
931 " --in-place[=SUF] Overwrite original files\n"
932 " --overwrite[=SUF] Preserve timestamp of original files\n"
934 " -g --guess Guess the input code\n"
935 " -v --version Print the version\n"
936 " --help/-V Print this help / configuration\n"
946 " Compile-time options:\n"
947 " Compiled at: " __DATE__
" " __TIME__
"\n"
950 " Default output encoding: "
953 #elif defined(DEFAULT_ENCIDX)
960 " Default output end of line: "
969 " Decode MIME encoded string: "
976 " Convert JIS X 0201 Katakana: "
983 " --help, --version output: "
984 #
if HELP_OUTPUT_HELP_OUTPUT
997 char *backup_filename;
998 int asterisk_count = 0;
1000 int filename_length =
strlen(filename);
1002 for(i = 0; suffix[
i]; i++){
1003 if(suffix[i] ==
'*') asterisk_count++;
1007 backup_filename =
nkf_xmalloc(
strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1008 for(i = 0, j = 0; suffix[
i];){
1009 if(suffix[i] ==
'*'){
1010 backup_filename[j] =
'\0';
1011 strncat(backup_filename, filename, filename_length);
1013 j += filename_length;
1015 backup_filename[j++] = suffix[i++];
1018 backup_filename[j] =
'\0';
1020 j = filename_length +
strlen(suffix);
1022 strcpy(backup_filename, filename);
1023 strcat(backup_filename, suffix);
1024 backup_filename[j] =
'\0';
1026 return backup_filename;
1030 #ifdef UTF8_INPUT_ENABLE
1060 (*oconv)(0, 0x30+(c/10000 )%10);
1062 (*oconv)(0, 0x30+(c/1000 )%10);
1064 (*oconv)(0, 0x30+(c/100 )%10);
1066 (*oconv)(0, 0x30+(c/10 )%10);
1068 (*oconv)(0, 0x30+ c %10);
1120 (*oconv)((c>>8)&0xFF, c&0xFF);
1125 static const struct {
1149 {
"katakana-hiragana",
"h3"},
1157 #ifdef UTF8_OUTPUT_ENABLE
1167 {
"fb-subchar=",
""},
1169 #ifdef UTF8_INPUT_ENABLE
1170 {
"utf8-input",
"W"},
1171 {
"utf16-input",
"W16"},
1172 {
"no-cp932ext",
""},
1173 {
"no-best-fit-chars",
""},
1175 #ifdef UNICODE_NORMALIZATION
1176 {
"utf8mac-input",
""},
1188 #ifdef NUMCHAR_OPTION
1189 {
"numchar-input",
""},
1195 #ifdef SHIFTJIS_CP932
1216 #ifdef SHIFTJIS_CP932
1219 #ifdef UTF8_OUTPUT_ENABLE
1238 #ifdef SHIFTJIS_CP932
1241 #ifdef UTF8_OUTPUT_ENABLE
1247 #ifdef SHIFTJIS_CP932
1250 #ifdef UTF8_OUTPUT_ENABLE
1260 #ifdef SHIFTJIS_CP932
1263 #ifdef UTF8_OUTPUT_ENABLE
1269 #ifdef SHIFTJIS_CP932
1272 #ifdef UTF8_OUTPUT_ENABLE
1278 #ifdef SHIFTJIS_CP932
1281 #ifdef UTF8_OUTPUT_ENABLE
1288 #ifdef SHIFTJIS_CP932
1295 #ifdef SHIFTJIS_CP932
1299 #ifdef UTF8_INPUT_ENABLE
1300 #ifdef UNICODE_NORMALIZATION
1332 #ifdef SHIFTJIS_CP932
1335 #ifdef UTF8_OUTPUT_ENABLE
1341 #ifdef SHIFTJIS_CP932
1344 #ifdef UTF8_OUTPUT_ENABLE
1349 #ifdef SHIFTJIS_CP932
1355 #ifdef SHIFTJIS_CP932
1362 #ifdef SHIFTJIS_CP932
1370 #ifdef UTF8_OUTPUT_ENABLE
1375 #ifdef UTF8_OUTPUT_ENABLE
1381 #ifdef SHIFTJIS_CP932
1384 #ifdef UTF8_OUTPUT_ENABLE
1390 #ifdef SHIFTJIS_CP932
1393 #ifdef UTF8_OUTPUT_ENABLE
1399 #ifdef SHIFTJIS_CP932
1402 #ifdef UTF8_OUTPUT_ENABLE
1409 #ifdef UTF8_OUTPUT_ENABLE
1416 #ifdef UTF8_OUTPUT_ENABLE
1423 #ifdef SHIFTJIS_CP932
1431 #ifdef SHIFTJIS_CP932
1435 #ifdef UTF8_OUTPUT_ENABLE
1485 #ifdef INPUT_CODE_FIX
1486 if (f || !input_encoding)
1494 && (f == -
TRUE || !input_encoding)
1518 if (0x75 <= c && c <= 0x7f){
1519 ret = c + (0x109 - 0x75);
1522 if (0x75 <= c && c <= 0x7f){
1523 ret = c + (0x113 - 0x75);
1534 if (0x7f <= c && c <= 0x88){
1535 ret = c + (0x75 - 0x7f);
1536 }
else if (0x89 <= c && c <= 0x92){
1550 if((0x21 <= ndx && ndx <= 0x2F)){
1551 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1552 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1554 }
else if(0x6E <= ndx && ndx <= 0x7E){
1555 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1556 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1564 const unsigned short *
ptr;
1567 val = ptr[(c1 & 0x7f) - 0x21];
1580 if(0x7F < c2)
return 1;
1581 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1582 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1589 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1592 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1593 if (0xFC < c1)
return 1;
1594 #ifdef SHIFTJIS_CP932
1603 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1604 val =
cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1630 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
1631 c2 =
PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1634 if (0x9E < c1) c2++;
1637 #define SJ0162 0x00e1
1638 #define SJ6394 0x0161
1640 if (0x9E < c1) c2++;
1643 c1 = c1 - ((c1 >
DEL) ?
SP : 0x1F);
1657 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1667 }
else if (val < 0x800){
1668 *p1 = 0xc0 | (val >> 6);
1669 *p2 = 0x80 | (val & 0x3f);
1673 *p1 = 0xe0 | (val >> 12);
1674 *p2 = 0x80 | ((val >> 6) & 0x3f);
1675 *p3 = 0x80 | ( val & 0x3f);
1678 *p1 = 0xf0 | (val >> 18);
1679 *p2 = 0x80 | ((val >> 12) & 0x3f);
1680 *p3 = 0x80 | ((val >> 6) & 0x3f);
1681 *p4 = 0x80 | ( val & 0x3f);
1698 else if (c1 <= 0xC3) {
1702 else if (c1 <= 0xDF) {
1704 wc = (c1 & 0x1F) << 6;
1707 else if (c1 <= 0xEF) {
1709 wc = (c1 & 0x0F) << 12;
1710 wc |= (c2 & 0x3F) << 6;
1713 else if (c2 <= 0xF4) {
1715 wc = (c1 & 0x0F) << 18;
1716 wc |= (c2 & 0x3F) << 12;
1717 wc |= (c3 & 0x3F) << 6;
1727 #ifdef UTF8_INPUT_ENABLE
1730 const unsigned short *
const *pp,
nkf_char psize,
1734 const unsigned short *
p;
1737 if (pp == 0)
return 1;
1740 if (c1 < 0 || psize <= c1)
return 1;
1742 if (p == 0)
return 1;
1745 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0)
return 1;
1747 if (val == 0)
return 1;
1768 const unsigned short *
const *pp;
1769 const unsigned short *
const *
const *ppp;
1770 static const char no_best_fit_chars_table_C2[] =
1771 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1772 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1774 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1775 static const char no_best_fit_chars_table_C2_ms[] =
1776 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1777 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1779 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1780 static const char no_best_fit_chars_table_932_C2[] =
1781 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1782 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1784 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1785 static const char no_best_fit_chars_table_932_C3[] =
1786 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1787 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1788 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1789 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1795 }
else if(c2 < 0xe0){
1800 if(no_best_fit_chars_table_932_C2[c1&0x3F])
return 1;
1803 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1809 if(no_best_fit_chars_table_C2[c1&0x3F])
return 1;
1812 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1816 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F])
return 1;
1840 }
else if(c0 < 0xF0){
1843 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
return 1;
1849 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE)
return 1;
1852 if(c0 == 0x92)
return 1;
1857 if(c1 == 0x80 || c0 == 0x9C)
return 1;
1865 if(c0 == 0x94)
return 1;
1868 if(c0 == 0xBB)
return 1;
1878 if(c0 == 0x95)
return 1;
1881 if(c0 == 0xA5)
return 1;
1888 if(c0 == 0x8D)
return 1;
1894 if(0xA0 <= c0 && c0 <= 0xA5)
return 1;
1908 #ifdef SHIFTJIS_CP932
1911 if (
e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1921 #ifdef UTF8_OUTPUT_ENABLE
1925 const unsigned short *
p;
1942 c2 = (c2&0x7f) - 0x21;
1943 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1950 c2 = (c2&0x7f) - 0x21;
1951 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1960 c1 = (c1 & 0x7f) - 0x21;
1961 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1975 }
else if (0xc0 <= c2 && c2 <= 0xef) {
1977 #ifdef NUMCHAR_OPTION
1988 #ifdef UTF8_INPUT_ENABLE
2027 }
else if (c2 == 0x8f){
2031 if (!
cp51932_f && !
x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2036 c2 = (c2 << 8) | (c1 & 0x7f);
2038 #ifdef SHIFTJIS_CP932
2041 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2062 #ifdef SHIFTJIS_CP932
2063 if (
cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2065 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2089 }
else if ((c2 ==
EOF) || (c2 == 0) || c2 <
SP) {
2091 }
else if (!
x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2093 if(c1 == 0x7F)
return 0;
2098 if (ret)
return ret;
2108 static const char w_iconv_utf8_1st_byte[] =
2110 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2111 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2112 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2113 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2120 if (c1 < 0 || 0xff < c1) {
2121 }
else if (c1 == 0) {
2123 }
else if ((c1 & 0xC0) == 0x80) {
2126 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2128 if (c2 < 0x80 || 0xBF < c2)
return 0;
2131 if (c3 == 0)
return -1;
2132 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2137 if (c3 == 0)
return -1;
2138 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2142 if (c3 == 0)
return -1;
2143 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2147 if (c3 == 0)
return -2;
2148 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2152 if (c3 == 0)
return -2;
2153 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2157 if (c3 == 0)
return -2;
2158 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2166 if (c1 == 0 || c1 ==
EOF){
2167 }
else if ((c1 & 0xf8) == 0xf0) {
2171 ret =
w2e_conv(c1, c2, c3, &c1, &c2);
2179 #define NKF_ICONV_INVALID_CODE_RANGE -13
2189 }
else if ((wc>>11) == 27) {
2192 }
else if (wc < 0xFFFF) {
2194 if (ret)
return ret;
2195 }
else if (wc < 0x10FFFF) {
2205 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2206 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2207 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2219 if (0xD8 <= c1 && c1 <= 0xDB) {
2220 if (0xDC <= c3 && c3 <= 0xDF) {
2227 if (0xD8 <= c2 && c2 <= 0xDB) {
2228 if (0xDC <= c4 && c4 <= 0xDF) {
2265 wc = c2 << 16 | c3 << 8 | c4;
2268 wc = c3 << 16 | c2 << 8 | c1;
2271 wc = c1 << 16 | c4 << 8 | c3;
2274 wc = c4 << 16 | c1 << 8 | c2;
2284 #define output_ascii_escape_sequence(mode) do { \
2285 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2288 (*o_putc)(ascii_intro); \
2289 output_mode = mode; \
2339 #ifdef NUMCHAR_OPTION
2347 c2 = 0x7F + c1 / 94;
2348 c1 = 0x21 + c1 % 94;
2360 else if (c2 ==
EOF) {
2374 (*o_putc)(c2 & 0x7f);
2379 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2380 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1)
return;
2394 if (
x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2398 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2399 c1 = 0x21 + c1 % 94;
2402 (*o_putc)((c2 & 0x7f) | 0x080);
2403 (*o_putc)(c1 | 0x080);
2405 (*o_putc)((c2 & 0x7f) | 0x080);
2406 (*o_putc)(c1 | 0x080);
2418 }
else if (c2 == 0) {
2423 (*o_putc)(
SS2); (*o_putc)(c1|0x80);
2426 (*o_putc)(c1 | 0x080);
2430 #ifdef SHIFTJIS_CP932
2433 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2444 (*o_putc)((c2 & 0x7f) | 0x080);
2445 (*o_putc)(c1 | 0x080);
2448 (*o_putc)((c2 & 0x7f) | 0x080);
2449 (*o_putc)(c1 | 0x080);
2458 (*o_putc)(c2 | 0x080);
2459 (*o_putc)(c1 | 0x080);
2466 #ifdef NUMCHAR_OPTION
2471 if (!
x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2476 c1 += 0x40 + (c1 > 0x3e);
2490 }
else if (c2 == 0) {
2498 (*o_putc)(c1 | 0x080);
2502 if (
e2s_conv(c2, c1, &c2, &c1) == 0){
2515 #ifdef SHIFTJIS_CP932
2517 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2534 #ifdef UTF8_OUTPUT_ENABLE
2557 if (c2) (*o_putc)(c2);
2558 if (c3) (*o_putc)(c3);
2559 if (c4) (*o_putc)(c4);
2570 if (c2) (*o_putc)(c2);
2571 if (c3) (*o_putc)(c3);
2572 if (c4) (*o_putc)(c4);
2598 c2 = (c1 >> 8) & 0xff;
2606 (*o_putc)(c2 & 0xff);
2607 (*o_putc)((c2 >> 8) & 0xff);
2608 (*o_putc)(c1 & 0xff);
2609 (*o_putc)((c1 >> 8) & 0xff);
2611 (*o_putc)((c2 >> 8) & 0xff);
2612 (*o_putc)(c2 & 0xff);
2613 (*o_putc)((c1 >> 8) & 0xff);
2614 (*o_putc)(c1 & 0xff);
2621 c2 = (val >> 8) & 0xff;
2667 (*o_putc)( c1 & 0xFF);
2668 (*o_putc)((c1 >> 8) & 0xFF);
2669 (*o_putc)((c1 >> 16) & 0xFF);
2673 (*o_putc)((c1 >> 16) & 0xFF);
2674 (*o_putc)((c1 >> 8) & 0xFF);
2675 (*o_putc)( c1 & 0xFF);
2680 #define SCORE_L2 (1)
2681 #define SCORE_KANA (SCORE_L2 << 1)
2682 #define SCORE_DEPEND (SCORE_KANA << 1)
2683 #define SCORE_CP932 (SCORE_DEPEND << 1)
2684 #define SCORE_X0212 (SCORE_CP932 << 1)
2685 #define SCORE_NO_EXIST (SCORE_X0212 << 1)
2686 #define SCORE_iMIME (SCORE_NO_EXIST << 1)
2687 #define SCORE_ERROR (SCORE_iMIME << 1)
2689 #define SCORE_INIT (SCORE_iMIME)
2717 ptr->
score &= ~score;
2725 #ifdef UTF8_OUTPUT_ENABLE
2730 }
else if (c2 ==
SS2){
2732 }
else if (c2 == 0x8f){
2734 #ifdef UTF8_OUTPUT_ENABLE
2738 }
else if ((c2 & 0x70) == 0x20){
2740 }
else if ((c2 & 0x70) == 0x70){
2742 }
else if ((c2 & 0x70) >= 0x50){
2803 }
else if (0xa1 <= c && c <= 0xdf){
2808 }
else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2811 }
else if (0xed <= c && c <= 0xee){
2814 #ifdef SHIFTJIS_CP932
2820 }
else if (0xf0 <= c && c <= 0xfc){
2829 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2839 #ifdef SHIFTJIS_CP932
2840 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2852 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2876 }
else if (
SS2 == c || (0xa1 <= c && c <= 0xfe)){
2880 }
else if (0x8f == c){
2889 if (0xa1 <= c && c <= 0xfe){
2899 if (0xa1 <= c && c <= 0xfe){
2909 #ifdef UTF8_INPUT_ENABLE
2922 }
else if (0xc0 <= c && c <= 0xdf){
2925 }
else if (0xe0 <= c && c <= 0xef){
2928 }
else if (0xf0 <= c && c <= 0xf4){
2937 if (0x80 <= c && c <= 0xbf){
2940 int bom = (ptr->
buf[0] == 0xef && ptr->
buf[1] == 0xbb
2941 && ptr->
buf[2] == 0xbf);
2943 &ptr->
buf[0], &ptr->
buf[1]);
2954 if (0x80 <= c && c <= 0xbf){
2971 int action_flag = 1;
2984 }
else if(p->
stat == 0){
2997 }
else if (c <=
DEL){
3017 #define STD_GC_BUFSIZE (256)
3071 hold_buf[hold_count++] = c2;
3072 return ((hold_count >=
HOLD_SIZE*2) ?
EOF : hold_count);
3128 while (hold_index < hold_count){
3129 c1 = hold_buf[hold_index++];
3134 else if (c1 <=
DEL){
3137 }
else if (
iconv ==
s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3141 if (hold_index < hold_count){
3142 c2 = hold_buf[hold_index++];
3152 switch ((*
iconv)(c1, c2, 0)) {
3155 if (hold_index < hold_count){
3156 c3 = hold_buf[hold_index++];
3157 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3162 if (hold_index < hold_count){
3163 c4 = hold_buf[hold_index++];
3164 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3169 (*iconv)(c1, c2, (c3<<8)|c4);
3173 if (hold_index < hold_count){
3174 c3 = hold_buf[hold_index++];
3175 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3181 (*iconv)(c1, c2, c3);
3184 if (c3 ==
EOF)
break;
3196 switch(c2 = (*
i_getc)(f)){
3198 if((c2 = (*
i_getc)(f)) == 0x00){
3199 if((c2 = (*
i_getc)(f)) == 0xFE){
3200 if((c2 = (*
i_getc)(f)) == 0xFF){
3201 if(!input_encoding){
3208 (*i_ungetc)(0xFF,f);
3210 (*i_ungetc)(0xFE,f);
3211 }
else if(c2 == 0xFF){
3212 if((c2 = (*
i_getc)(f)) == 0xFE){
3213 if(!input_encoding){
3220 (*i_ungetc)(0xFF,f);
3222 (*i_ungetc)(0xFF,f);
3224 (*i_ungetc)(0x00,f);
3226 (*i_ungetc)(0x00,f);
3229 if((c2 = (*
i_getc)(f)) == 0xBB){
3230 if((c2 = (*
i_getc)(f)) == 0xBF){
3231 if(!input_encoding){
3237 (*i_ungetc)(0xBF,f);
3239 (*i_ungetc)(0xBB,f);
3241 (*i_ungetc)(0xEF,f);
3244 if((c2 = (*
i_getc)(f)) == 0xFF){
3245 if((c2 = (*
i_getc)(f)) == 0x00){
3246 if((c2 = (*
i_getc)(f)) == 0x00){
3247 if(!input_encoding){
3254 (*i_ungetc)(0x00,f);
3256 (*i_ungetc)(0x00,f);
3258 if(!input_encoding){
3265 (*i_ungetc)(0xFF,f);
3267 (*i_ungetc)(0xFE,f);
3270 if((c2 = (*
i_getc)(f)) == 0xFE){
3271 if((c2 = (*
i_getc)(f)) == 0x00){
3272 if((c2 = (*
i_getc)(f)) == 0x00){
3273 if(!input_encoding){
3280 (*i_ungetc)(0x00,f);
3282 (*i_ungetc)(0x00,f);
3284 if(!input_encoding){
3291 (*i_ungetc)(0xFE,f);
3293 (*i_ungetc)(0xFF,f);
3314 if (c1==
'@'|| c1==
'B') {
3326 if (c1==
'J'|| c1==
'B') {
3352 if (c2 == 0 && c1 ==
LF) {
3366 else if (c2 != 0 || c1 !=
LF) (*o_eol_conv)(c2, c1);
3423 #define char_size(c2,c1) (c2?2:1)
3436 }
else if (c1==
BS) {
3475 }
else if (c1==
'\f') {
3479 }
else if ((c2==0 &&
nkf_isblank(c1)) || (c2 ==
'!' && c1 ==
'!')) {
3506 if (c1==(0xde&0x7f)) fold_state = 1;
3507 else if (c1==(0xdf&0x7f)) fold_state = 1;
3508 else if (c1==(0xa4&0x7f)) fold_state = 1;
3509 else if (c1==(0xa3&0x7f)) fold_state = 1;
3510 else if (c1==(0xa1&0x7f)) fold_state = 1;
3511 else if (c1==(0xb0&0x7f)) fold_state = 1;
3512 else if (
SP<=c1 && c1<=(0xdf&0x7f)) {
3536 }
else if ((prev0==
SP) ||
3546 if (c1==
'"') fold_state = 1;
3547 else if (c1==
'#') fold_state = 1;
3548 else if (c1==
'W') fold_state = 1;
3549 else if (c1==
'K') fold_state = 1;
3550 else if (c1==
'$') fold_state = 1;
3551 else if (c1==
'%') fold_state = 1;
3552 else if (c1==
'\'') fold_state = 1;
3553 else if (c1==
'(') fold_state = 1;
3554 else if (c1==
')') fold_state = 1;
3555 else if (c1==
'*') fold_state = 1;
3556 else if (c1==
'+') fold_state = 1;
3557 else if (c1==
',') fold_state = 1;
3573 switch(fold_state) {
3608 if (c1 == (0xde&0x7f)) {
3612 }
else if (c1 == (0xdf&0x7f) &&
ev[(
z_prev1-
SP)*2]) {
3622 if (
dv[(c1-
SP)*2] ||
ev[(c1-
SP)*2]) {
3628 (*o_zconv)(
cv[(c1-
SP)*2],
cv[(c1-
SP)*2+1]);
3639 if (
alpha_f&1 && c2 == 0x23) {
3642 }
else if (c2 == 0x21) {
3653 }
else if (
alpha_f&1 && 0x20<c1 && c1<0x7f &&
fv[c1-0x20]) {
3661 const char *entity = 0;
3663 case '>': entity =
">";
break;
3664 case '<': entity =
"<";
break;
3665 case '\"': entity =
""";
break;
3666 case '&': entity =
"&";
break;
3669 while (*entity) (*o_zconv)(0, *entity++);
3716 }
else if (c2 == 0x25) {
3718 static const int fullwidth_to_halfwidth[] =
3720 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3721 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3722 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3723 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3724 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3725 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3726 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3727 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3728 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3729 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3730 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3731 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3733 if (fullwidth_to_halfwidth[c1-0x20]){
3734 c2 = fullwidth_to_halfwidth[c1-0x20];
3747 #define rot13(c) ( \
3749 (c <= 'M') ? (c + 13): \
3750 (c <= 'Z') ? (c - 13): \
3752 (c <= 'm') ? (c + 13): \
3753 (c <= 'z') ? (c - 13): \
3757 #define rot47(c) ( \
3759 ( c <= 'O') ? (c + 47) : \
3760 ( c <= '~') ? (c - 47) : \
3773 (*o_rot_conv)(c2,c1);
3781 if (0x20 < c1 && c1 < 0x74) {
3783 (*o_hira_conv)(c2,c1);
3788 (*o_hira_conv)(c2,c1);
3791 }
else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3793 (*o_hira_conv)(c2,c1);
3801 }
else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3803 }
else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3807 (*o_hira_conv)(c2,c1);
3814 #define RANGE_NUM_MAX 18
3838 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3842 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3848 start = range[
i][0];
3851 if (c >= start && c <= end) {
3856 (*o_iso2022jp_check_conv)(c2,c1);
3863 (
const unsigned char *)
"\075?EUC-JP?B?",
3864 (
const unsigned char *)
"\075?SHIFT_JIS?B?",
3865 (
const unsigned char *)
"\075?ISO-8859-1?Q?",
3866 (
const unsigned char *)
"\075?ISO-8859-1?B?",
3867 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
3868 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
3869 (
const unsigned char *)
"\075?ISO-2022-JP?Q?",
3871 (
const unsigned char *)
"\075?UTF-8?B?",
3872 (
const unsigned char *)
"\075?UTF-8?Q?",
3874 (
const unsigned char *)
"\075?US-ASCII?Q?",
3882 #if defined(UTF8_INPUT_ENABLE)
3890 #if defined(UTF8_INPUT_ENABLE)
3898 'B',
'B',
'Q',
'B',
'B',
'B',
'Q',
3899 #if defined(UTF8_INPUT_ENABLE)
3909 #define MIME_BUF_SIZE (1024)
3910 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3911 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3920 #define MAXRECOVER 20
3939 (*i_mungetc_buf)(
c,f);
3997 if (c==
'=' && d==
'?') {
4005 if (!( (c==
'+'||c==
'/'|| c==
'=' || c==
'?' ||
is_alnum(c))))
4024 const unsigned char *
p,*
q;
4030 p = mime_pattern[j];
4033 for(i=2;p[
i]>
SP;i++) {
4037 while (mime_pattern[++j]) {
4038 p = mime_pattern[j];
4040 if (p[k]!=q[k])
break;
4043 p = mime_pattern[j];
4086 if (c1==
LF||c1==
SP||c1==
CR||
4087 c1==
'-'||c1==
'_'||
is_alnum(c1))
continue;
4098 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4099 if (c1==
'b'||c1==
'B') {
4101 }
else if (c1==
'q'||c1==
'Q') {
4107 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4139 fprintf(stderr,
"%s\n", str ? str :
"NULL");
4181 #if !defined(PERL_XS) && !defined(WIN32DLL)
4185 if (filename !=
NULL) printf(
"%s: ", filename);
4254 #ifdef NUMCHAR_OPTION
4270 if (buf[i] ==
'x' || buf[i] ==
'X'){
4271 for (j = 0; j < 7; j++){
4283 for (j = 0; j < 8; j++){
4316 #ifdef UNICODE_NORMALIZATION
4324 const unsigned char *array;
4325 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4328 if (c ==
EOF || c > 0xFF || (c & 0xc0) == 0x80)
return c;
4332 while (lower <= upper) {
4333 int mid = (lower+upper) / 2;
4336 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[
len]; len++) {
4341 lower = 1, upper = 0;
4347 if (array[len] <
nkf_buf_at(buf, len)) lower = mid + 1;
4348 else upper = mid - 1;
4357 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[
i]; i++)
4362 }
while (lower <= upper);
4385 }
else if (c ==
'_') {
4390 }
else if (c >
'/') {
4392 }
else if (c ==
'+' || c ==
'-') {
4427 if (c1<=
SP ||
DEL<=c1) {
4441 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4471 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4472 if (lwsp_count++>lwsp_size){
4474 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4475 lwsp_buf = lwsp_buf_new;
4481 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4483 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4490 if (c1==
'='&&c2<
SP) {
4492 if (c1 ==
EOF)
return (
EOF);
4495 goto restart_mime_q;
4503 if (c2<=
SP)
return c2;
4537 if ((c1 ==
'?') && (c2 ==
'=')) {
4540 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4573 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4574 if (lwsp_count++>lwsp_size){
4576 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4577 lwsp_buf = lwsp_buf_new;
4583 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4585 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4617 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4620 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4623 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4634 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4636 #define MIMEOUT_BUF_LENGTH 74
4647 const unsigned char *
p;
4650 p = mime_pattern[0];
4651 for(i=0;mime_pattern[
i];i++) {
4652 if (mode == mime_encode[i]) {
4653 p = mime_pattern[
i];
4696 (*o_base64conv)(
EOF,0);
4698 (*o_base64conv)(0,
SP);
4703 (*o_base64conv)(
EOF,0);
4705 (*o_base64conv)(0,
SP);
4714 (*o_base64conv)(
EOF,0);
4716 (*o_base64conv)(0,
SP);
4740 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4)]);
4746 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2)]);
4769 (*o_mputc)(
bin2hex(((c>>4)&0xf)));
4779 (*o_mputc)(basis_64[c>>2]);
4784 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4790 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4791 (*o_mputc)(basis_64[c & 0x3F]);
4811 if (c!=
CR && c!=
LF) {
4874 if (c ==
CR || c ==
LF) {
4879 }
else if (c <=
SP) {
4918 if (c==
CR || c==
LF) {
4945 static const char *
str =
"boundary=\"";
4946 static int len = 10;
4966 for (j = 0; j <=
i; ++j) {
4984 if (lastchar==
CR || lastchar ==
LF){
5005 if (lastchar ==
CR || lastchar ==
LF){
5081 (*o_base64conv)(c2,c1);
5085 typedef struct nkf_iconv_t {
5088 size_t input_buffer_size;
5089 char *output_buffer;
5090 size_t output_buffer_size;
5094 nkf_iconv_new(
char *tocode,
char *fromcode)
5096 nkf_iconv_t converter;
5099 converter->input_buffer =
nkf_xmalloc(converter->input_buffer_size);
5100 converter->output_buffer_size =
IOBUF_SIZE * 2;
5101 converter->output_buffer =
nkf_xmalloc(converter->output_buffer_size);
5102 converter->cd = iconv_open(tocode, fromcode);
5103 if (converter->cd == (iconv_t)-1)
5107 perror(fprintf(
"iconv doesn't support %s to %s conversion.", fromcode, tocode));
5110 perror(
"can't iconv_open");
5116 nkf_iconv_convert(nkf_iconv_t *converter,
FILE *
input)
5118 size_t invalid = (size_t)0;
5119 char *input_buffer = converter->input_buffer;
5120 size_t input_length = (size_t)0;
5121 char *output_buffer = converter->output_buffer;
5122 size_t output_length = converter->output_buffer_size;
5128 input_buffer[input_length++] =
c;
5129 if (input_length < converter->input_buffer_size)
break;
5133 size_t ret =
iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5134 while (output_length-- > 0) {
5135 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5137 if (ret == (
size_t) - 1) {
5140 if (input_buffer != converter->input_buffer)
5141 memmove(converter->input_buffer, input_buffer, input_length);
5144 converter->output_buffer_size *= 2;
5145 output_buffer =
realloc(converter->outbuf, converter->output_buffer_size);
5146 if (output_buffer ==
NULL) {
5147 perror(
"can't realloc");
5150 converter->output_buffer = output_buffer;
5153 perror(
"can't iconv");
5166 nkf_iconv_close(nkf_iconv_t *convert)
5170 iconv_close(converter->cd);
5199 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5202 #ifdef UTF8_INPUT_ENABLE
5209 #ifdef UTF8_OUTPUT_ENABLE
5213 #ifdef UNICODE_NORMALIZATION
5229 #ifdef SHIFTJIS_CP932
5239 for (i = 0; i < 256; i++){
5285 input_encoding =
NULL;
5286 output_encoding =
NULL;
5297 if (!output_encoding) {
5300 if (!output_encoding) {
5364 #ifdef NUMCHAR_OPTION
5370 #ifdef UNICODE_NORMALIZATION
5384 if (input_encoding) {
5403 #if !defined(PERL_XS) && !defined(WIN32DLL)
5418 #define NEXT continue
5419 #define SKIP c2=0;continue
5420 #define MORE c2=c1;continue
5421 #define SEND (void)0
5423 #define set_input_mode(mode) do { \
5424 input_mode = mode; \
5426 set_input_codename("ISO-2022-JP"); \
5427 debug("ISO-2022-JP"); \
5436 int is_8bit =
FALSE;
5446 #if !defined(PERL_XS) && !defined(WIN32DLL)
5447 fprintf(stderr,
"no output encoding given\n");
5453 #ifdef UTF8_INPUT_ENABLE
5477 #ifdef INPUT_CODE_FIX
5478 if (!input_encoding)
5520 0xA1 <= c1 && c1 <= 0xDF) {
5525 }
else if (c1 >
DEL) {
5536 else if ((
iconv ==
s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5548 }
else if (
SP < c1 && c1 <
DEL) {
5572 }
else if (c1 ==
'?') {
5602 else if (c1 ==
'&') {
5610 else if (c1 ==
'$') {
5617 }
else if (c1 ==
'@' || c1 ==
'B') {
5621 }
else if (c1 ==
'(') {
5630 }
else if (c1 ==
'@'|| c1 ==
'B') {
5635 }
else if (c1 ==
'D'){
5639 }
else if (c1 ==
'O' || c1 ==
'Q'){
5642 }
else if (c1 ==
'P'){
5664 }
else if (c1 ==
'(') {
5672 else if (c1 ==
'I') {
5677 else if (c1 ==
'B' || c1 ==
'J' || c1 ==
'H') {
5692 else if (c1 ==
'.') {
5697 else if (c1 ==
'A') {
5708 else if (c1 ==
'N') {
5731 }
else if (c1 ==
'$') {
5735 }
else if ((
'E' <= c1 && c1 <=
'G') ||
5736 (
'O' <= c1 && c1 <=
'Q')) {
5744 static const nkf_char jphone_emoji_first_table[7] =
5745 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5748 while (
SP <= c1 && c1 <=
'z') {
5749 (*oconv)(0, c1 + c3);
5765 }
else if (c1 ==
LF || c1 ==
CR) {
5784 }
else if (c1 ==
LF && (c1=(*
i_getc)(f))!=
EOF && c1 ==
SP) {
5804 switch ((*
iconv)(c2, c1, 0)) {
5812 (*iconv)(c2, c1, c3|c4);
5820 (*iconv)(c2, c1, c3);
5828 0x7F <= c2 && c2 <= 0x92 &&
5829 0x21 <= c1 && c1 <= 0x7E) {
5856 (*iconv)(
EOF, 0, 0);
5887 unsigned char *cp_back =
NULL;
5892 while(*cp && *cp++!=
'-');
5893 while (*cp || cp_back) {
5902 if (!*cp || *cp ==
SP) {
5908 for (j=0;*p && *p !=
'=' && *p == cp[j];p++, j++);
5909 if (*p == cp[j] || cp[j] ==
SP){
5916 #if !defined(PERL_XS) && !defined(WIN32DLL)
5917 fprintf(stderr,
"unknown long option: --%s\n", cp);
5921 while(*cp && *cp !=
SP && cp++);
5935 input_encoding =
enc;
5942 output_encoding =
enc;
5946 if (p[0] ==
'0' || p[0] ==
'1') {
5954 if (strcmp(
long_option[i].name,
"overwrite") == 0){
5960 if (strcmp(
long_option[i].name,
"overwrite=") == 0){
5968 if (strcmp(
long_option[i].name,
"in-place") == 0){
5974 if (strcmp(
long_option[i].name,
"in-place=") == 0){
5984 if (strcmp(
long_option[i].name,
"cap-input") == 0){
5988 if (strcmp(
long_option[i].name,
"url-input") == 0){
5993 #ifdef NUMCHAR_OPTION
5994 if (strcmp(
long_option[i].name,
"numchar-input") == 0){
6000 if (strcmp(
long_option[i].name,
"no-output") == 0){
6010 #ifdef SHIFTJIS_CP932
6014 #ifdef UTF8_OUTPUT_ENABLE
6019 if (strcmp(
long_option[i].name,
"no-cp932") == 0){
6020 #ifdef SHIFTJIS_CP932
6024 #ifdef UTF8_OUTPUT_ENABLE
6029 #ifdef SHIFTJIS_CP932
6030 if (strcmp(
long_option[i].name,
"cp932inv") == 0){
6048 if (strcmp(
long_option[i].name,
"exec-out") == 0){
6053 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6054 if (strcmp(
long_option[i].name,
"no-cp932ext") == 0){
6058 if (strcmp(
long_option[i].name,
"no-best-fit-chars") == 0){
6082 if (strcmp(
long_option[i].name,
"fb-subchar") == 0){
6086 if (strcmp(
long_option[i].name,
"fb-subchar=") == 0){
6095 }
else if(p[1] ==
'x' || p[1] ==
'X'){
6113 #ifdef UTF8_OUTPUT_ENABLE
6114 if (strcmp(
long_option[i].name,
"ms-ucs-map") == 0){
6119 #ifdef UNICODE_NORMALIZATION
6120 if (strcmp(
long_option[i].name,
"utf8mac-input") == 0){
6133 #if !defined(PERL_XS) && !defined(WIN32DLL)
6134 fprintf(stderr,
"unsupported long option: --%s\n",
long_option[i].name);
6150 }
else if (*cp==
'2') {
6177 if (*cp==
'@'||*cp==
'B')
6182 if (*cp==
'J'||*cp==
'B'||*cp==
'H')
6190 if (
'9'>= *cp && *cp>=
'0')
6198 #if defined(MSDOS) || defined(__OS2__)
6213 #ifdef UTF8_OUTPUT_ENABLE
6226 if (
'1'== cp[0] &&
'6'==cp[1]) {
6229 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6240 }
else if (cp[0] ==
'B') {
6247 enc_idx = enc_idx ==
UTF_16
6251 enc_idx = enc_idx ==
UTF_16
6259 #ifdef UTF8_INPUT_ENABLE
6266 if (
'1'== cp[0] &&
'6'==cp[1]) {
6270 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6281 }
else if (cp[0] ==
'B') {
6285 enc_idx = (enc_idx ==
UTF_16
6310 while (
'0'<= *cp && *cp <=
'4') {
6311 alpha_f |= 1 << (*cp++ -
'0');
6336 while(
'0'<= *cp && *cp <=
'9') {
6345 while(
'0'<= *cp && *cp <=
'9') {
6353 if (*cp==
'B'||*cp==
'Q') {
6356 }
else if (*cp==
'N') {
6358 }
else if (*cp==
'S') {
6360 }
else if (*cp==
'0') {
6371 }
else if (*cp==
'Q') {
6383 if (
'9'>= *cp && *cp>=
'0')
6405 }
else if (*cp==
'm') {
6407 }
else if (*cp==
'w') {
6409 }
else if (*cp==
'0') {
6415 if (
'2' <= *cp && *cp <=
'9') {
6418 }
else if (*cp ==
'0' || *cp ==
'1') {
6428 while(*cp && *cp++!=
'-');
6431 #if !defined(PERL_XS) && !defined(WIN32DLL)
6432 fprintf(stderr,
"unknown option: -%c\n", *(cp-1));
6442 #include "nkf32dll.c"
6443 #elif defined(PERL_XS)
6451 char *outfname =
NULL;
6455 _BufferSize.y = 400;
6457 #ifdef DEFAULT_CODE_LOCALE
6458 setlocale(LC_CTYPE,
"");
6462 for (argc--,argv++; (argc > 0) && **argv ==
'-'; argc--, argv++) {
6463 cp = (
unsigned char *)*argv;
6468 if (pipe(fds) < 0 || (pid = fork()) < 0){
6479 execvp(argv[1], &argv[1]);
6499 int exec_f_back = exec_f;
6513 exec_f = exec_f_back;
6520 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6521 if (freopen(
"",
"wb",stdout) ==
NULL)
6528 setbuf(stdout, (
char *)
NULL);
6534 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6535 if (freopen(
"",
"rb",stdin) ==
NULL)
return (-1);
6548 int is_argument_error =
FALSE;
6555 if ((fin = fopen((origfname = *argv++),
"r")) ==
NULL) {
6557 is_argument_error =
TRUE;
6570 +
strlen(
".nkftmpXXXXXX")
6572 strcpy(outfname, origfname);
6576 for (i =
strlen(outfname);
i; --
i){
6577 if (outfname[i - 1] ==
'/'
6578 || outfname[i - 1] ==
'\\'){
6584 strcat(outfname,
"ntXXXXXX");
6586 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6587 S_IREAD | S_IWRITE);
6589 strcat(outfname,
".nkftmpXXXXXX");
6590 fd = mkstemp(outfname);
6593 || (fd_backup = dup(
fileno(stdout))) < 0
6605 outfname =
"nkf.out";
6608 if(freopen(outfname,
"w", stdout) ==
NULL) {
6613 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6614 if (freopen(
"",
"wb",stdout) ==
NULL)
6622 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6623 if (freopen(
"",
"rb",fin) ==
NULL)
6632 char *filename =
NULL;
6634 if (nfiles > 1) filename = origfname;
6641 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6652 if (
stat(origfname, &sb)) {
6653 fprintf(stderr,
"Can't stat %s\n", origfname);
6656 if (chmod(outfname, sb.st_mode)) {
6657 fprintf(stderr,
"Can't set permission %s\n", outfname);
6662 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6663 tb[0] = tb[1] = sb.st_mtime;
6664 if (utime(outfname, tb)) {
6665 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
6670 if (utime(outfname, &tb)) {
6671 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
6678 unlink(backup_filename);
6680 if (rename(origfname, backup_filename)) {
6681 perror(backup_filename);
6682 fprintf(stderr,
"Can't rename %s to %s\n",
6683 origfname, backup_filename);
6688 if (unlink(origfname)){
6693 if (rename(outfname, origfname)) {
6695 fprintf(stderr,
"Can't rename %s to %s\n",
6696 outfname, origfname);
6703 if (is_argument_error)
6708 scanf(
"%d",&end_check);
#define nkf_char_unicode_new(c)
static nkf_char(* i_mungetc)(nkf_char c, FILE *f)
static void(* o_fconv)(nkf_char c2, nkf_char c1)
struct normalization_pair normalization_table[]
#define output_ascii_escape_sequence(mode)
static nkf_char nkf_buf_at(nkf_buf_t *buf, int index)
static void status_check(struct input_code *ptr, nkf_char c)
static nkf_char hold_buf[HOLD_SIZE *2]
static void output_escape_sequence(int mode)
nkf_native_encoding NkfEncodingUTF_32
static nkf_char mime_begin(FILE *f)
static void set_iconv(nkf_char f, nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static void encode_fallback_xml(nkf_char c)
#define NKF_ICONV_INVALID_CODE_RANGE
const unsigned short *const x0212_shiftjis[]
static void * nkf_xmalloc(size_t size)
static nkf_char url_ungetc(nkf_char c, FILE *f)
size_t strlen(const char *)
static nkf_encoding * nkf_utf8_encoding()
static nkf_char nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static nkf_char std_getc(FILE *f)
static nkf_char(* i_mungetc_buf)(nkf_char c, FILE *f)
const unsigned short *const *const utf8_to_euc_3bytes_932[]
static nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
static void(* o_base64conv)(nkf_char c2, nkf_char c1)
static void nkf_buf_push(nkf_buf_t *buf, nkf_char c)
static size_t nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static void(* o_eol_conv)(nkf_char c2, nkf_char c1)
SSL_METHOD *(* func)(void)
#define nkf_enc_asciicompat(enc)
#define nkf_enc_name(enc)
nkf_native_encoding NkfEncodingASCII
static void eof_mime(void)
static void s_status(struct input_code *, nkf_char)
static int h_conv(FILE *f, nkf_char c1, nkf_char c2)
static void e_oconv(nkf_char c2, nkf_char c1)
static nkf_char base64decode(nkf_char c)
static nkf_char(* iconv)(nkf_char c2, nkf_char c1, nkf_char c0)
static void j_oconv(nkf_char c2, nkf_char c1)
const unsigned short *const utf8_to_euc_2bytes_932[]
static void base64_conv(nkf_char c2, nkf_char c1)
static nkf_char(* i_nungetc)(nkf_char c, FILE *f)
#define nkf_enc_to_iconv(enc)
#define UTF8_INPUT_ENABLE
nkf_encoding nkf_encoding_table[]
const unsigned short cp932inv[2][189]
static char * backup_suffix
#define nkf_char_unicode_p(c)
static void nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
static void status_push_ch(struct input_code *ptr, nkf_char c)
static void(* encode_fallback)(nkf_char c)
static void status_reinit(struct input_code *ptr)
#define UTF16_TO_UTF32(lead, trail)
static nkf_char numchar_getc(FILE *f)
static nkf_char(* i_nfc_ungetc)(nkf_char c, FILE *f)
#define nkf_char_unicode_value_p(c)
static void w_oconv(nkf_char c2, nkf_char c1)
static const char * input_codename
static int nkf_enc_find_index(const char *name)
#define nkf_buf_length(buf)
static const char * nkf_locale_charmap()
static int kanji_convert(FILE *f)
static nkf_encoding * nkf_locale_encoding()
static void switch_mime_getc(void)
const unsigned short *const euc_to_utf8_2bytes[]
struct @8 encoding_name_to_id_table[]
#define nkf_noescape_mime(c)
#define nkf_char_unicode_bmp_p(c)
const nkf_native_encoding * base_encoding
static nkf_char mime_getc_buf(FILE *f)
static char * get_backup_filename(const char *suffix, const char *filename)
#define DEFAULT_CODE_LOCALE
static void no_connection(nkf_char c2, nkf_char c1)
static void status_clear(struct input_code *ptr)
static const unsigned char dv[]
static void(* o_zconv)(nkf_char c2, nkf_char c1)
#define MIME_DECODE_DEFAULT
static const nkf_char mime_encode_method[]
static void print_guessed_code(char *filename)
static unsigned char ascii_intro
RUBY_EXTERN void * memmove(void *, const void *, size_t)
static void encode_fallback_subchar(nkf_char c)
static nkf_char x0212_shift(nkf_char c)
static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_encoding * input_encoding
static nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
static unsigned char prefix_table[256]
static void s_oconv(nkf_char c2, nkf_char c1)
static void(* o_hira_conv)(nkf_char c2, nkf_char c1)
static nkf_char cap_getc(FILE *f)
static int mime_decode_mode
static nkf_char e2w_conv(nkf_char c2, nkf_char c1)
static nkf_char(* mime_iconv_back)(nkf_char c2, nkf_char c1, nkf_char c0)
static void * nkf_xrealloc(void *ptr, size_t size)
static unsigned char kanji_intro
const unsigned short euc_to_utf8_1byte[]
static void w_oconv32(nkf_char c2, nkf_char c1)
static void set_code_score(struct input_code *ptr, nkf_char score)
static nkf_char(* i_ngetc)(FILE *)
#define UTF8_OUTPUT_ENABLE
static void set_input_encoding(nkf_encoding *enc)
static int fold_preserve_f
static void nkf_state_init(void)
static const nkf_char mime_encode[]
struct input_code input_code_list[]
#define NKF_ICONV_NEED_TWO_MORE_BYTES
static void oconv_newline(void(*func)(nkf_char, nkf_char))
static nkf_char mime_integrity(FILE *f, const unsigned char *p)
#define nkf_enc_unicode_p(enc)
static const nkf_char score_table_F0[]
static void encode_fallback_java(nkf_char c)
#define nkf_buf_empty_p(buf)
nkf_native_encoding NkfEncodingISO_2022_JP
static nkf_char(* i_mgetc_buf)(FILE *)
static int unicode_to_jis_common2(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
static nkf_char mime_getc(FILE *f)
static void mimeout_addchar(nkf_char c)
static nkf_char push_hold_buf(nkf_char c2)
static nkf_char(* i_nfc_getc)(FILE *)
static void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
static void put_newline(void(*func)(nkf_char))
#define set_input_mode(mode)
static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
const unsigned short *const euc_to_utf8_2bytes_ms[]
#define range(low, item, hi)
static const char * get_guessed_code(void)
static unsigned char stdobuf[IOBUF_SIZE]
unsigned char buf[MIME_BUF_SIZE]
static void(* oconv)(nkf_char c2, nkf_char c1)
static nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
static nkf_char cap_ungetc(nkf_char c, FILE *f)
static nkf_char(* i_cungetc)(nkf_char c, FILE *f)
#define is_ibmext_in_sjis(c2)
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static void code_score(struct input_code *ptr)
static nkf_char std_ungetc(nkf_char c, FILE *f)
static struct input_code * find_inputcode_byfunc(nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void set_input_codename(const char *codename)
static int preserve_time_f
static void check_bom(FILE *f)
static void encode_fallback_html(nkf_char c)
static nkf_char(* i_cgetc)(FILE *)
static struct @10 mime_input_state
static nkf_char nfc_getc(FILE *f)
static nkf_char(* i_uungetc)(nkf_char c, FILE *f)
static nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static nkf_char broken_ungetc(nkf_char c, FILE *f)
static nkf_char(* i_ugetc)(FILE *)
static const char basis_64[]
static void hira_conv(nkf_char c2, nkf_char c1)
static nkf_char(* i_getc)(FILE *f)
static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
#define mime_input_buf(n)
static int nkf_str_caseeql(const char *src, const char *target)
static nkf_char url_getc(FILE *f)
static nkf_char(* i_mgetc)(FILE *)
#define nkf_enc_to_index(enc)
static nkf_char noconvert(FILE *f)
static void(* o_rot_conv)(nkf_char c2, nkf_char c1)
static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void mime_input_buf_unshift(nkf_char c)
static void show_configuration(void)
register unsigned int len
static void set_output_encoding(nkf_encoding *enc)
nkf_native_encoding NkfEncodingUTF_16
static nkf_char(* i_bungetc)(nkf_char c, FILE *f)
const unsigned short *const utf8_to_euc_2bytes_ms[]
static nkf_char(* i_ungetc)(nkf_char c, FILE *f)
#define nkf_byte_jisx0201_katakana_p(c)
static nkf_char(* i_bgetc)(FILE *)
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
static void(* o_putc)(nkf_char c)
const unsigned short *const x0212_to_utf8_2bytes[]
static void z_conv(nkf_char c2, nkf_char c1)
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static size_t unicode_iconv(nkf_char wc)
const unsigned short shiftjis_x0212[3][189]
static void w_status(struct input_code *, nkf_char)
static nkf_char(* iconv_for_check)(nkf_char c2, nkf_char c1, nkf_char c0)=0
#define setvbuffer(fp, buf, size)
static void eol_conv(nkf_char c2, nkf_char c1)
static void mime_putc(nkf_char c)
static const nkf_char score_table_A0[]
static nkf_encoding * nkf_enc_find(const char *name)
const unsigned short *const euc_to_utf8_2bytes_mac[]
const unsigned short *const utf8_to_euc_2bytes[]
static const unsigned char cv[]
static const unsigned char fv[]
static nkf_char unicode_subchar
static int module_connection(void)
static void rot_conv(nkf_char c2, nkf_char c1)
static void debug(const char *str)
static void(* o_iso2022jp_check_conv)(nkf_char c2, nkf_char c1)
static size_t nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
const unsigned short shiftjis_cp932[3][189]
static const unsigned char ev[]
#define char_size(c2, c1)
static struct @9 long_option[]
static nkf_char nfc_ungetc(nkf_char c, FILE *f)
nkf_native_encoding NkfEncodingShift_JIS
static nkf_buf_t * nkf_buf_new(int length)
static unsigned char stdibuf[IOBUF_SIZE]
static nkf_char x0212_unshift(nkf_char c)
int main(int argc, char **argv)
static void w_oconv16(nkf_char c2, nkf_char c1)
#define assert(condition)
static void e_status(struct input_code *, nkf_char)
static void unswitch_mime_getc(void)
RUBY_EXTERN int dup2(int, int)
static struct @11 mimeout_state
const unsigned short *const *const utf8_to_euc_3bytes[]
static void mime_prechar(nkf_char c2, nkf_char c1)
static nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char(*g)(FILE *f), nkf_char(*u)(nkf_char c, FILE *f))
static int unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static void clr_code_score(struct input_code *ptr, nkf_char score)
const unsigned short *const utf8_to_euc_2bytes_mac[]
static void version(void)
#define nkf_enc_to_oconv(enc)
#define MIMEOUT_BUF_LENGTH
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
nkf_native_encoding NkfEncodingUTF_8
static nkf_encoding * output_encoding
static nkf_char numchar_ungetc(nkf_char c, FILE *f)
static nkf_encoding * nkf_default_encoding()
static const unsigned char * mime_pattern[]
static void nkf_buf_clear(nkf_buf_t *buf)
static nkf_state_t * nkf_state
static void open_mime(nkf_char mode)
static int no_best_fit_chars_f
static void(* o_mputc)(nkf_char c)
static nkf_char nkf_buf_pop(nkf_buf_t *buf)
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
static ULONG(STDMETHODCALLTYPE AddRef)(IDispatch __RPC_FAR *This)
static void fold_conv(nkf_char c2, nkf_char c1)
static void encode_fallback_perl(nkf_char c)
static nkf_char mime_begin_strict(FILE *f)
static void no_putc(nkf_char c)
static void status_reset(struct input_code *ptr)
static void close_mime(void)
static nkf_char broken_getc(FILE *f)
static void code_status(nkf_char c)
static void std_putc(nkf_char c)
STATIC void unsigned char * cp
static void nkf_each_char_to_hex(void(*f)(nkf_char c2, nkf_char c1), nkf_char c)
static nkf_encoding * nkf_enc_from_index(int idx)
static void status_disable(struct input_code *ptr)
nkf_native_encoding NkfEncodingEUC_JP
static nkf_char mime_ungetc(nkf_char c, FILE *f)