18 #define ENABLE_ECONV_NEWLINE_OPTION 1
32 #ifdef ENABLE_ECONV_NEWLINE_OPTION
45 static unsigned char *
47 const unsigned char *
str,
size_t len,
48 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
80 char ary[
sizeof(double) >
sizeof(
void*) ?
sizeof(double) :
sizeof(
void*)];
84 #define TRANSCODING_READBUF(tc) \
85 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
88 #define TRANSCODING_WRITEBUF(tc) \
89 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
90 (tc)->writebuf.ary : \
92 #define TRANSCODING_WRITEBUF_SIZE(tc) \
93 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
94 sizeof((tc)->writebuf.ary) : \
95 (size_t)(tc)->transcoder->max_output)
96 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
97 #define TRANSCODING_STATE(tc) \
98 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
154 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
178 entry->
sname = sname;
179 entry->
dname = dname;
241 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
258 const char *dname = (
const char *)key;
278 void (*
callback)(
const char *sname,
const char *dname,
int depth,
void *
arg),
335 const char *
enc = dname;
343 enc = (
const char *)val;
351 callback((
const char *)val, enc, --depth, arg);
352 enc = (
const char *)val;
368 const char *
const lib = entry->
lib;
375 memcpy(path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
376 memcpy(path +
sizeof(transcoder_lib_prefix) - 1, lib, len);
394 *repl_encname_ptr =
"UTF-8";
395 return "\xEF\xBF\xBD";
399 *repl_encname_ptr =
"US-ASCII";
408 static const unsigned char *
410 const unsigned char *in_start,
411 const unsigned char *inchar_start,
412 const unsigned char *in_p,
413 size_t *char_len_ptr)
415 const unsigned char *
ptr;
416 if (inchar_start - in_start < tc->recognized_len) {
418 inchar_start,
unsigned char, in_p - inchar_start);
430 const unsigned char *in_stop,
unsigned char *out_stop,
436 ssize_t readagain_len = 0;
438 const unsigned char *inchar_start;
439 const unsigned char *in_p;
441 unsigned char *out_p;
443 in_p = inchar_start = *in_pos;
447 #define SUSPEND(ret, num) \
449 tc->resume_position = (num); \
450 if (0 < in_p - inchar_start) \
451 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
452 inchar_start, unsigned char, in_p - inchar_start); \
455 tc->recognized_len += in_p - inchar_start; \
456 if (readagain_len) { \
457 tc->recognized_len -= readagain_len; \
458 tc->readagain_len = readagain_len; \
461 resume_label ## num:; \
463 #define SUSPEND_OBUF(num) \
465 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
468 #define SUSPEND_AFTER_OUTPUT(num) \
469 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
470 SUSPEND(econv_after_output, num); \
473 #define next_table (tc->next_table)
474 #define next_info (tc->next_info)
475 #define next_byte (tc->next_byte)
476 #define writebuf_len (tc->writebuf_len)
477 #define writebuf_off (tc->writebuf_off)
481 case 1:
goto resume_label1;
482 case 2:
goto resume_label2;
483 case 3:
goto resume_label3;
484 case 4:
goto resume_label4;
485 case 5:
goto resume_label5;
486 case 6:
goto resume_label6;
487 case 7:
goto resume_label7;
488 case 8:
goto resume_label8;
489 case 9:
goto resume_label9;
490 case 10:
goto resume_label10;
491 case 11:
goto resume_label11;
492 case 12:
goto resume_label12;
493 case 13:
goto resume_label13;
494 case 14:
goto resume_label14;
495 case 15:
goto resume_label15;
496 case 16:
goto resume_label16;
497 case 17:
goto resume_label17;
498 case 18:
goto resume_label18;
499 case 19:
goto resume_label19;
500 case 20:
goto resume_label20;
501 case 21:
goto resume_label21;
502 case 22:
goto resume_label22;
503 case 23:
goto resume_label23;
504 case 24:
goto resume_label24;
505 case 25:
goto resume_label25;
506 case 26:
goto resume_label26;
507 case 27:
goto resume_label27;
508 case 28:
goto resume_label28;
509 case 29:
goto resume_label29;
510 case 30:
goto resume_label30;
511 case 31:
goto resume_label31;
512 case 32:
goto resume_label32;
513 case 33:
goto resume_label33;
514 case 34:
goto resume_label34;
524 if (in_stop <= in_p) {
531 #define BYTE_ADDR(index) (tr->byte_array + (index))
532 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
533 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
534 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
535 #define BL_MIN_BYTE (BL_BASE[0])
536 #define BL_MAX_BYTE (BL_BASE[1])
537 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
538 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
551 const unsigned char *
p = inchar_start;
564 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
565 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
567 while (in_p >= in_stop) {
613 const unsigned char *char_start;
637 const unsigned char *char_start;
643 char_start, (
size_t)char_len,
644 out_p, out_stop - out_p);
649 char_start, (
size_t)char_len,
661 const unsigned char *char_start;
668 out_p, out_stop - out_p);
702 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
703 readagain_len = invalid_len - discard_len;
731 out_p, out_stop - out_p);
755 const unsigned char *in_stop,
unsigned char *out_stop,
761 const unsigned char *readagain_pos = readagain_buf;
762 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
771 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
787 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
808 const unsigned char **input_ptr,
const unsigned char *input_stop,
809 unsigned char **output_ptr,
unsigned char *output_stop,
813 input_ptr, output_ptr,
814 input_stop, output_stop,
825 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
840 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
935 for (i = 0; i <
n; i++) {
944 for (i = 0; i <
n; i++) {
995 if (*sname ==
'\0' && *dname ==
'\0') {
1006 if (num_trans < 0) {
1024 #define MAX_ECFLAGS_DECORATORS 32
1047 if (ecflags & ECONV_XML_TEXT_DECORATOR)
1048 decorators_ret[num_decorators++] =
"xml_text_escape";
1049 if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
1050 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1052 decorators_ret[num_decorators++] =
"xml_attr_quote";
1055 decorators_ret[num_decorators++] =
"crlf_newline";
1057 decorators_ret[num_decorators++] =
"cr_newline";
1059 decorators_ret[num_decorators++] =
"universal_newline";
1061 return num_decorators;
1073 if (num_decorators == -1)
1080 for (i = 0; i < num_decorators; i++)
1086 ec->
flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1093 const unsigned char **input_ptr,
const unsigned char *input_stop,
1094 unsigned char **output_ptr,
unsigned char *output_stop,
1101 const unsigned char **ipp, *is, *iold;
1102 unsigned char **opp, *os, *oold;
1108 for (i = start; i < ec->
num_trans; i++) {
1142 flags &= ~ECONV_AFTER_OUTPUT;
1145 f &= ~ECONV_AFTER_OUTPUT;
1149 if (iold != *ipp || oold != *opp)
1174 const unsigned char **input_ptr,
const unsigned char *input_stop,
1175 unsigned char **output_ptr,
unsigned char *output_stop,
1177 int *result_position_ptr)
1180 int needreport_index;
1183 unsigned char empty_buf;
1184 unsigned char *empty_ptr = &empty_buf;
1187 input_ptr = (
const unsigned char **)&empty_ptr;
1188 input_stop = empty_ptr;
1192 output_ptr = &empty_ptr;
1193 output_stop = empty_ptr;
1199 needreport_index = -1;
1200 for (i = ec->
num_trans-1; 0 <= i; i--) {
1208 needreport_index =
i;
1209 goto found_needreport;
1216 rb_bug(
"unexpected transcode last result");
1228 result_position_ptr);
1240 needreport_index =
trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1241 sweep_start = needreport_index + 1;
1242 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1244 for (i = ec->
num_trans-1; 0 <= i; i--) {
1253 if (result_position_ptr)
1254 *result_position_ptr =
i;
1258 if (result_position_ptr)
1259 *result_position_ptr = -1;
1265 const unsigned char **input_ptr,
const unsigned char *input_stop,
1266 unsigned char **output_ptr,
unsigned char *output_stop,
1270 int result_position;
1278 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1279 len = output_stop - *output_ptr;
1281 *output_ptr = output_stop;
1295 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1296 len = output_stop - *output_ptr;
1299 len = input_stop - *input_ptr;
1302 *(*output_ptr)++ = *(*input_ptr)++;
1306 memcpy(*output_ptr, *input_ptr, len);
1309 if (*input_ptr != input_stop)
1321 if (data_start != data_end) {
1323 if (output_stop - *output_ptr < data_end - data_start) {
1324 len = output_stop - *output_ptr;
1325 memcpy(*output_ptr, data_start, len);
1326 *output_ptr = output_stop;
1331 len = data_end - data_start;
1332 memcpy(*output_ptr, data_start, len);
1351 *input_ptr != input_stop) {
1352 input_stop = *input_ptr;
1353 res =
rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1357 else if ((flags & ECONV_AFTER_OUTPUT) ||
1359 res =
rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1364 res =
rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1391 unsigned char utfbuf[1024];
1392 const unsigned char *utf;
1394 int utf_allocated = 0;
1395 char charef_buf[16];
1396 const unsigned char *
p;
1405 utfbuf,
sizeof(utfbuf),
1413 if (utf_len % 4 != 0)
1417 while (4 <= utf_len) {
1423 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1445 const unsigned char **input_ptr,
const unsigned char *input_stop,
1446 unsigned char **output_ptr,
unsigned char *output_stop,
1451 unsigned char empty_buf;
1452 unsigned char *empty_ptr = &empty_buf;
1457 input_ptr = (
const unsigned char **)&empty_ptr;
1458 input_stop = empty_ptr;
1462 output_ptr = &empty_ptr;
1463 output_stop = empty_ptr;
1467 ret =
rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1516 static unsigned char *
1518 const unsigned char *
str,
size_t len,
1519 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1520 size_t *dst_len_ptr)
1522 unsigned char *dst_str;
1529 const unsigned char *sp;
1533 dst_bufsize = caller_dst_bufsize;
1543 dst_str = caller_dst_buf;
1545 dst_str =
xmalloc(dst_bufsize);
1548 dp = dst_str+dst_len;
1550 dst_len = dp - dst_str;
1556 if (dst_str == caller_dst_buf) {
1559 memcpy(tmp, dst_str, dst_bufsize/2);
1563 dst_str =
xrealloc(dst_str, dst_bufsize);
1565 dp = dst_str+dst_len;
1567 dst_len = dp - dst_str;
1573 *dst_len_ptr = dst_len;
1577 if (dst_str != caller_dst_buf)
1586 const unsigned char *
str,
size_t len,
const char *str_encoding)
1589 unsigned char insert_buf[4096];
1590 const unsigned char *insert_str =
NULL;
1593 int last_trans_index;
1596 unsigned char **buf_start_p;
1597 unsigned char **data_start_p;
1598 unsigned char **data_end_p;
1599 unsigned char **buf_end_p;
1614 str, len, insert_buf,
sizeof(insert_buf), &insert_len);
1615 if (insert_str ==
NULL)
1630 tc = ec->
elems[last_trans_index].
tc;
1632 if (need < insert_len)
1634 if (last_trans_index == 0) {
1654 tc = ec->
elems[last_trans_index].
tc;
1657 if (*buf_start_p ==
NULL) {
1660 *data_start_p =
buf;
1662 *buf_end_p = buf+need;
1664 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1665 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1666 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1667 *data_start_p = *buf_start_p;
1668 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1670 size_t s = (*data_end_p - *buf_start_p) + need;
1674 *data_start_p =
buf;
1675 *data_end_p = buf + (*data_end_p - *buf_start_p);
1677 *buf_end_p = buf +
s;
1681 memcpy(*data_end_p, insert_str, insert_len);
1682 *data_end_p += insert_len;
1689 if (insert_str != str && insert_str != insert_buf)
1690 xfree((
void*)insert_str);
1694 if (insert_str != str && insert_str != insert_buf)
1695 xfree((
void*)insert_str);
1744 #if SIZEOF_SIZE_T > SIZEOF_INT
1807 return data.ascii_compat_name;
1813 unsigned const char *sp, *se;
1814 unsigned char *ds, *
dp, *de;
1832 unsigned long new_capa = (
unsigned long)dlen + len + max_output;
1838 sp = (
const unsigned char *)ss;
1844 len -= (
const char *)sp - ss;
1845 ss = (
const char *)sp;
1943 const char *dname = 0;
1947 dname =
"universal_newline";
1950 dname =
"crlf_newline";
1953 dname =
"cr_newline";
1962 for (i=0; i < num_trans; i++) {
1979 int has_description = 0;
1984 if (*sname !=
'\0' || *dname !=
'\0') {
1987 else if (*dname ==
'\0')
1991 has_description = 1;
1998 const char *pre =
"";
1999 if (has_description)
2025 has_description = 1;
2027 if (!has_description) {
2064 else if (readagain_len) {
2065 bytes2 =
rb_str_new(err+error_len, readagain_len);
2101 const char *start, *
end;
2125 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2149 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2151 unsigned char **out_start_ptr,
2152 unsigned char **out_pos,
2153 unsigned char **out_stop_ptr)
2155 size_t len = (*out_pos - *out_start_ptr);
2156 size_t new_len = (len + max_output) * 2;
2157 *out_start_ptr = resize_destination(destination, len, new_len);
2158 *out_pos = *out_start_ptr +
len;
2159 *out_stop_ptr = *out_start_ptr + new_len;
2167 const unsigned char *replacement;
2168 const char *repl_enc;
2169 const char *ins_enc;
2184 replacement = (
unsigned char *)
"?";
2200 unsigned char *str2;
2202 const char *encname2;
2208 MEMCPY(str2, str,
unsigned char, len);
2244 #define hash_fallback rb_hash_aref
2266 const unsigned char *in_stop,
unsigned char *out_stop,
2268 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2269 const char *src_encoding,
2270 const char *dst_encoding,
2277 unsigned char *out_start = *out_pos;
2313 rep = (*fallback_func)(fallback, rep);
2318 if ((
int)ret == -1) {
2334 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2344 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2345 const unsigned char *in_stop,
unsigned char *out_stop,
2347 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2348 const char *src_encoding,
2349 const char *dst_encoding,
2356 unsigned char *out_start = *out_pos;
2357 const unsigned char *
ptr;
2371 unsigned char input_byte;
2372 const unsigned char *
p = &input_byte;
2375 if (ptr < in_stop) {
2386 if (&input_byte != p)
2387 ptr += p - &input_byte;
2398 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2419 static unsigned char *
2472 #ifdef ENABLE_ECONV_NEWLINE_OPTION
2499 int setflags = 0, newlineflag = 0;
2504 newlineflag |= !
NIL_P(v);
2509 newlineflag |= !
NIL_P(v);
2514 newlineflag |= !
NIL_P(v);
2518 ecflags |= setflags;
2531 if (
NIL_P(opthash)) {
2563 if (!
NIL_P(newhash))
2582 if (
NIL_P(opthash)) {
2587 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2591 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2595 if (!
NIL_P(replacement)) {
2641 const char *sname, *dname;
2642 int sencidx, dencidx;
2644 dencidx =
enc_arg(arg1, &dname, &denc);
2652 sencidx =
enc_arg(arg2, &sname, &senc);
2667 volatile VALUE arg1, arg2;
2669 unsigned char *
buf, *
bp, *sp;
2670 const unsigned char *fromp;
2672 const char *sname, *dname;
2674 int explicitly_invalid_replace =
TRUE;
2681 if (!ecflags)
return -1;
2685 explicitly_invalid_replace =
FALSE;
2692 arg2 = argc<=1 ?
Qnil : argv[1];
2699 if (senc && senc == denc) {
2702 if (!
NIL_P(ecopts)) {
2710 return NIL_P(arg2) ? -1 : dencidx;
2718 return NIL_P(arg2) ? -1 : dencidx;
2735 if (fromp != sp+slen) {
2808 if (encidx < 0)
return str;
2809 if (newstr == str) {
2889 int encidx =
str_transcode0(argc, argv, &newstr, ecflags, ecopts);
2897 if (newstr == str) {
2980 const char *arg_name, *result_name;
2983 enc_arg(&arg, &arg_name, &arg_enc);
2987 if (result_name ==
NULL)
2997 volatile VALUE *snamev_p,
volatile VALUE *dnamev_p,
2998 const char **sname_p,
const char **dname_p,
3005 const char *sname, *dname;
3009 argc =
rb_scan_args(argc, argv,
"21:", snamev_p, dnamev_p, &flags_v, &opt);
3011 if (!
NIL_P(flags_v)) {
3018 else if (!
NIL_P(opt)) {
3064 if (num_decorators == -1)
3080 rb_ary_store(convpath, len + num_decorators - 1, pair);
3084 rb_ary_store(convpath, len + num_decorators - 1, pair);
3088 for (i = 0; i < num_decorators; i++)
3100 if (*ary_p ==
Qnil) {
3141 volatile VALUE snamev, dnamev;
3142 const char *sname, *dname;
3148 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3153 if (
NIL_P(convpath))
3173 return RTEST(convpath);
3199 const char **sname_p,
const char **dname_p,
3207 const char *sname, *dname;
3213 volatile VALUE snamev, dnamev;
3220 enc_arg(&snamev, &sname, &senc);
3222 enc_arg(&dnamev, &dname, &denc);
3240 if (ret == -1 || arg.
ret == -1)
3375 volatile VALUE snamev, dnamev;
3376 const char *sname, *dname;
3392 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3438 return rb_sprintf(
"#<%s: uninitialized>", cname);
3688 const unsigned char *
ip, *is;
3689 unsigned char *op, *os;
3690 long output_byteoffset, output_bytesize;
3691 unsigned long output_byteend;
3694 argc =
rb_scan_args(argc, argv,
"23:", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v, &opt);
3696 if (
NIL_P(output_byteoffset_v))
3697 output_byteoffset = 0;
3699 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3701 if (
NIL_P(output_bytesize_v))
3702 output_bytesize = 0;
3704 output_bytesize =
NUM2LONG(output_bytesize_v);
3706 if (!
NIL_P(flags_v)) {
3712 else if (!
NIL_P(opt)) {
3731 if (
NIL_P(output_bytesize_v)) {
3739 if (
NIL_P(output_byteoffset_v))
3742 if (output_byteoffset < 0)
3748 if (output_bytesize < 0)
3751 output_byteend = (
unsigned long)output_byteoffset +
3752 (
unsigned long)output_bytesize;
3754 if (output_byteend < (
unsigned long)output_byteoffset ||
3769 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3770 os = op + output_bytesize;
3774 if (!
NIL_P(input)) {
3780 if (
LONG_MAX / 2 < output_bytesize)
3782 output_bytesize *= 2;
3783 output_byteoffset_v =
Qnil;
3861 rb_bug(
"unexpected result of econv_primitive_convert");
3905 rb_bug(
"unexpected result of econv_primitive_convert");
4047 const char *insert_enc;
4104 if (putbackable < n)
4427 #ifdef ENABLE_ECONV_NEWLINE_OPTION
const char * ascii_incompat_name
#define RB_TYPE_P(obj, type)
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
VALUE(* func_si)(void *, const unsigned char *, size_t)
search_path_queue_t * queue
int rb_enc_get_index(VALUE obj)
static VALUE econv_destination_encoding(VALUE self)
static VALUE sym_undefined_conversion
VALUE rb_eConverterNotFoundError
int(* state_fini_func)(void *)
VALUE rb_ary_entry(VALUE ary, long offset)
unsigned char * in_buf_end
const unsigned char * error_bytes_start
void rb_bug(const char *fmt,...)
rb_econv_result_t last_result
VALUE rb_require_safe(VALUE, int)
const char * dst_encoding
static VALUE sym_invalid_byte_sequence
size_t strlen(const char *)
struct search_path_queue_tag search_path_queue_t
#define DECORATOR_P(sname, dname)
const char * rb_obj_classname(VALUE)
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
#define ECONV_AFTER_OUTPUT
static int max(int a, int b)
int st_lookup(st_table *, st_data_t, st_data_t *)
void st_add_direct(st_table *, st_data_t, st_data_t)
VALUE rb_str_tmp_new(long)
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
static void transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, unsigned char *(*resize_destination)(VALUE, size_t, size_t), const char *src_encoding, const char *dst_encoding, int ecflags, VALUE ecopts)
VALUE rb_eInvalidByteSequenceError
size_t rb_econv_memsize(rb_econv_t *)
static void econv_args(int argc, VALUE *argv, volatile VALUE *snamev_p, volatile VALUE *dnamev_p, const char **sname_p, const char **dname_p, rb_encoding **senc_p, rb_encoding **denc_p, int *ecflags_p, VALUE *ecopts_p)
int(* state_init_func)(void *)
static void rb_transcoding_close(rb_transcoding *tc)
rb_encoding * source_encoding
unsigned char * out_data_start
static int decorate_convpath(VALUE convpath, int ecflags)
static VALUE sym_crlf_newline
#define MAX_ECFLAGS_DECORATORS
static size_t rb_transcoding_memsize(rb_transcoding *tc)
#define rb_check_frozen(obj)
unsigned char * in_data_start
static int str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2, const char **sname_p, rb_encoding **senc_p, const char **dname_p, rb_encoding **denc_p)
rb_encoding * rb_to_encoding(VALUE enc)
VALUE rb_str_new_cstr(const char *)
rb_encoding * destination_encoding
struct rb_transcoding * tc
#define SUSPEND(ret, num)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
static VALUE sym_cr_newline
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
static int str_transcode(int argc, VALUE *argv, VALUE *self)
#define ECONV_NEWLINE_DECORATOR_MASK
VALUE rb_ary_push(VALUE ary, VALUE item)
st_table * st_init_strcasetable(void)
static VALUE econv_last_error(VALUE self)
VALUE rb_hash_freeze(VALUE)
int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags)
struct rb_transcoding * error_tc
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
static rb_econv_t * rb_econv_alloc(int n_hint)
static rb_econv_t * rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
#define MEMMOVE(p1, p2, type, n)
void rb_raise(VALUE exc, const char *fmt,...)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
unsigned int conv_tree_start
VALUE rb_obj_class(VALUE)
static void rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
const rb_transcoder * transcoder
static int output_replacement_character(rb_econv_t *ec)
static rb_econv_result_t rb_trans_conv(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags, int *result_position_ptr)
void callback(ffi_cif *cif, void *resp, void **args, void *ctx)
static rb_econv_result_t transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, rb_transcoding *tc, const int opt)
static VALUE econv_finish(VALUE self)
rb_encoding * rb_utf8_encoding(void)
static VALUE econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
static transcoder_entry_t * make_transcoder_entry(const char *sname, const char *dname)
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
static const rb_transcoder * load_transcoder_entry(transcoder_entry_t *entry)
#define ENC_CODERANGE_BROKEN
static int transcode_search_path(const char *sname, const char *dname, void(*callback)(const char *sname, const char *dname, int depth, void *arg), void *arg)
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
VALUE rb_str_new_frozen(VALUE)
unsigned char * in_buf_start
static rb_econv_t * rb_econv_open0(const char *sname, const char *dname, int ecflags)
static void econv_free(void *ptr)
static VALUE sym_source_buffer_empty
ssize_t(* func_so)(void *, const unsigned char *, size_t, unsigned char *, size_t)
VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags)
static VALUE econv_convert(VALUE self, VALUE source_string)
static VALUE sym_partial_input
static const char transcoder_lib_prefix[]
static rb_econv_t * rb_econv_init_by_convpath(VALUE self, VALUE convpath, const char **sname_p, const char **dname_p, rb_encoding **senc_p, rb_encoding **denc_p)
int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts)
void rb_exc_raise(VALUE mesg)
#define ECONV_ERROR_HANDLER_MASK
static unsigned char * output
static const char * get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
static VALUE str_encode_associate(VALUE str, int encidx)
struct rb_econv_t::@180 last_error
static VALUE ecerr_incomplete_input(VALUE self)
static unsigned char * str_transcoding_resize(VALUE destination, size_t len, size_t new_len)
double dummy_for_alignment
int rb_to_encoding_index(VALUE enc)
static VALUE econv_primitive_errinfo(VALUE self)
ssize_t(* finish_func)(void *, unsigned char *, size_t)
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
unsigned int output_index
#define TRANSCODING_READBUF(tc)
static size_t econv_memsize(const void *ptr)
#define TypedData_Get_Struct(obj, type, data_type, sval)
void Init_transcode(void)
unsigned char * in_data_end
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
size_t rb_str_capacity(VALUE)
static VALUE str_encode_bang(int argc, VALUE *argv, VALUE str)
static VALUE str_encode(int argc, VALUE *argv, VALUE str)
const char * destination_encoding
static int rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
#define SUSPEND_AFTER_OUTPUT(num)
static void Tcl_Interp * ip
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
#define RARRAY_AREF(a, i)
#define StringValueCStr(v)
void rb_econv_binmode(rb_econv_t *ec)
static VALUE econv_get_replacement(VALUE self)
void rb_define_const(VALUE, const char *, VALUE)
VALUE rb_str_cat2(VALUE, const char *)
VALUE rb_method_call(int, VALUE *, VALUE)
static void more_output_buffer(VALUE destination, unsigned char *(*resize_destination)(VALUE, size_t, size_t), int max_output, unsigned char **out_start_ptr, unsigned char **out_pos, unsigned char **out_stop_ptr)
void rb_str_shared_replace(VALUE, VALUE)
void rb_ary_store(VALUE ary, long idx, VALUE val)
static VALUE econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
static st_table * transcoder_table
#define MBCLEN_CHARFOUND_P(ret)
static VALUE make_econv_exception(rb_econv_t *ec)
VALUE rb_cEncodingConverter
static const rb_data_type_t econv_data_type
ssize_t(* func_sio)(void *, const unsigned char *, size_t, VALUE, unsigned char *, size_t)
rb_econv_t * rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags)
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
static VALUE econv_set_replacement(VALUE self, VALUE arg)
#define TypedData_Wrap_Struct(klass, data_type, sval)
VALUE rb_check_hash_type(VALUE)
#define TRANSCODING_STATE(tc)
unsigned char buf[MIME_BUF_SIZE]
static VALUE sym_fallback
char ary[sizeof(double) > sizeof(void *)?sizeof(double):sizeof(void *)]
VALUE rb_enc_associate_index(VALUE obj, int idx)
static VALUE method_fallback(VALUE fallback, VALUE c)
rb_transcoder_asciicompat_type_t asciicompat_type
#define ALLOCA_N(type, n)
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
#define rb_enc_mbc_to_codepoint(p, e, enc)
static VALUE econv_equal(VALUE self, VALUE other)
#define ENC_CODERANGE_SET(obj, cr)
#define ECONV_XML_ATTR_CONTENT_DECORATOR
union rb_transcoding::@178 readbuf
#define SUSPEND_OBUF(num)
static int str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
void rb_register_transcoder(const rb_transcoder *tr)
unsigned char * out_buf_start
static int transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
static VALUE econv_putback(int argc, VALUE *argv, VALUE self)
VALUE rb_str_resize(VALUE, long)
static void search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
static rb_econv_t * check_econv(VALUE self)
VALUE rb_str_scrub(VALUE, VALUE)
static VALUE econv_s_allocate(VALUE klass)
search_path_queue_t ** queue_last_ptr
VALUE rb_sprintf(const char *format,...)
void rb_econv_close(rb_econv_t *ec)
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
static VALUE econv_insert_output(VALUE self, VALUE string)
struct rb_econv_t rb_econv_t
#define RUBY_TYPED_FREE_IMMEDIATELY
static VALUE ecerr_destination_encoding(VALUE self)
unsigned char * out_buf_end
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
static int decorator_names(int ecflags, const char **decorators_ret)
struct rb_transcoding * last_tc
#define STR1_BYTEINDEX(w)
static VALUE aref_fallback(VALUE fallback, VALUE c)
static VALUE make_encobj(const char *name)
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
VALUE rb_assoc_new(VALUE car, VALUE cdr)
const char * source_encoding
int rb_define_dummy_encoding(const char *name)
static VALUE econv_init(int argc, VALUE *argv, VALUE self)
VALUE rb_str_buf_new(long)
static VALUE sym_universal_newline
union rb_transcoding::rb_transcoding_state_t state
rb_hash_aset(hash, RARRAY_AREF(key_value_pair, 0), RARRAY_AREF(key_value_pair, 1))
const char * src_encoding
VALUE rb_obj_encoding(VALUE obj)
VALUE rb_attr_get(VALUE, ID)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
static void trans_open_i(const char *sname, const char *dname, int depth, void *arg)
static VALUE sym_universal
union rb_transcoding::@179 writebuf
int memcmp(const void *s1, const void *s2, size_t len)
static VALUE ecerr_error_char(VALUE self)
const char * ascii_compat_name
static rb_encoding * make_encoding(const char *name)
VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags)
VALUE rb_obj_is_method(VALUE)
RUBY_EXTERN VALUE rb_cString
static VALUE econv_source_encoding(VALUE self)
static VALUE proc_fallback(VALUE fallback, VALUE c)
static VALUE sym_finished
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
#define RSTRING_EMBED_LEN_MAX
VALUE rb_obj_is_proc(VALUE)
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
#define MEMCPY(p1, p2, type, n)
static int make_replacement(rb_econv_t *ec)
#define ENC_CODERANGE_VALID
#define ECONV_XML_ATTR_QUOTE_DECORATOR
static rb_econv_result_t transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, rb_transcoding *tc, const int opt)
void rb_econv_check_error(rb_econv_t *ec)
void rb_str_modify(VALUE)
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
#define ECONV_INVALID_REPLACE
static VALUE sym_after_output
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
static VALUE econv_inspect(VALUE self)
static rb_transcoding * rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
unsigned char * out_data_end
static rb_econv_result_t rb_econv_convert0(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
VALUE rb_proc_call(VALUE, VALUE)
static int asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
#define TRANSCODING_WRITEBUF(tc)
static const unsigned char * transcode_char_start(rb_transcoding *tc, const unsigned char *in_start, const unsigned char *inchar_start, const unsigned char *in_p, size_t *char_len_ptr)
VALUE rb_check_array_type(VALUE ary)
void rb_error_arity(int argc, int min, int max)
static VALUE ecerr_error_bytes(VALUE self)
static rb_econv_result_t rb_transcoding_convert(rb_transcoding *tc, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
VALUE rb_str_catf(VALUE str, const char *format,...)
#define ECONV_PARTIAL_INPUT
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags)
#define RARRAY_LENINT(ary)
VALUE rb_econv_make_exception(rb_econv_t *ec)
static void declare_transcoder(const char *sname, const char *dname, const char *lib)
static int rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
static int rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i)
static VALUE econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
int rb_respond_to(VALUE, ID)
#define RBASIC_SET_CLASS(obj, cls)
const char * rb_econv_asciicompat_encoding(const char *encname)
const char * destination_encoding_name
static VALUE econv_convpath(VALUE self)
static int trans_sweep(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags, int start)
rb_ivar_set(yielder, id_memo, LONG2NUM(++count))
VALUE rb_enc_default_internal(void)
#define ECONV_UNDEF_HEX_CHARREF
VALUE rb_str_new(const char *, long)
static VALUE ecerr_source_encoding(VALUE self)
static int output_hex_charref(rb_econv_t *ec)
VALUE rb_str_drop_bytes(VALUE, long)
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
static VALUE ecerr_readagain_bytes(VALUE self)
#define rb_enc_asciicompat(enc)
VALUE rb_eUndefinedConversionError
const char * rb_id2name(ID id)
const char * replacement_enc
const char * source_encoding_name
int replacement_allocated
VALUE rb_hash_aref(VALUE, VALUE)
struct search_path_queue_tag * next
int rb_enc_find_index(const char *name)
static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx)
static int econv_opts(VALUE opt, int ecflags)
static VALUE sym_destination_buffer_full
static unsigned char * allocate_converted_string(const char *sname, const char *dname, const unsigned char *str, size_t len, unsigned char *caller_dst_buf, size_t caller_dst_bufsize, size_t *dst_len_ptr)
#define ECONV_CRLF_NEWLINE_DECORATOR
const rb_transcoder * transcoder
static transcoder_entry_t * get_transcoder_entry(const char *sname, const char *dname)
#define OBJ_INFECT_RAW(x, s)
RUBY_EXTERN VALUE rb_cData
int rb_econv_putbackable(rb_econv_t *ec)
struct rb_transcoding rb_transcoding
#define ECONV_UNDEF_REPLACE
#define REALLOC_N(var, type, n)
int rb_enc_str_coderange(VALUE)
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
const unsigned char * replacement_str
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
#define STR1_LENGTH(byte_addr)
VALUE(* func_ii)(void *, VALUE)
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
#define encoding_equal(enc1, enc2)
#define TRANSCODING_WRITEBUF_SIZE(tc)
static rb_encoding * make_dummy_encoding(const char *name)
#define ECONV_XML_TEXT_DECORATOR
rb_encoding * rb_enc_find(const char *name)
transcoder_entry_t ** entries
void st_free_table(st_table *)
static VALUE econv_result_to_symbol(rb_econv_result_t res)
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
#define ECONV_CR_NEWLINE_DECORATOR
static VALUE ecerr_source_encoding_name(VALUE self)
rb_encoding * rb_enc_from_index(int index)
static VALUE econv_primitive_convert(int argc, VALUE *argv, VALUE self)
ssize_t(* func_io)(void *, VALUE, const unsigned char *, size_t)
#define ECONV_INVALID_MASK
static VALUE ecerr_destination_encoding_name(VALUE self)
static VALUE sym_incomplete_input
void rb_str_set_len(VALUE, long)