16 #define ENABLE_ECONV_NEWLINE_OPTION 1 25 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback, sym_aref;
26 static VALUE sym_xml, sym_text, sym_attr;
27 static VALUE sym_universal_newline;
28 static VALUE sym_crlf_newline;
29 static VALUE sym_cr_newline;
30 #ifdef ENABLE_ECONV_NEWLINE_OPTION 31 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
33 static VALUE sym_partial_input;
35 static VALUE sym_invalid_byte_sequence;
36 static VALUE sym_undefined_conversion;
37 static VALUE sym_destination_buffer_full;
38 static VALUE sym_source_buffer_empty;
39 static VALUE sym_finished;
40 static VALUE sym_after_output;
41 static VALUE sym_incomplete_input;
43 static unsigned char *
44 allocate_converted_string(
const char *sname,
const char *dname,
45 const unsigned char *str,
size_t len,
46 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
78 char ary[
sizeof(double) >
sizeof(
void*) ?
sizeof(double) :
sizeof(
void*)];
82 #define TRANSCODING_READBUF(tc) \ 83 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \ 86 #define TRANSCODING_WRITEBUF(tc) \ 87 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \ 88 (tc)->writebuf.ary : \ 90 #define TRANSCODING_WRITEBUF_SIZE(tc) \ 91 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \ 92 sizeof((tc)->writebuf.ary) : \ 93 (size_t)(tc)->transcoder->max_output) 94 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t)) 95 #define TRANSCODING_STATE(tc) \ 96 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \ 152 #define DECORATOR_P(sname, dname) (*(sname) == '\0') 164 make_transcoder_entry(
const char *sname,
const char *dname)
176 entry->
sname = sname;
177 entry->
dname = dname;
187 get_transcoder_entry(
const char *sname,
const char *dname)
210 entry = make_transcoder_entry(sname, dname);
220 declare_transcoder(
const char *sname,
const char *dname,
const char *lib)
224 entry = make_transcoder_entry(sname, dname);
228 static const char transcoder_lib_prefix[] =
"enc/trans/";
236 declare_transcoder(enc1, enc2, lib);
239 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0) 256 const char *dname = (
const char *)key;
275 transcode_search_path(
const char *sname,
const char *dname,
276 void (*callback)(
const char *sname,
const char *dname,
int depth,
void *arg),
333 const char *enc = dname;
341 enc = (
const char *)val;
349 callback((
const char *)val, enc, --depth, arg);
350 enc = (
const char *)val;
366 const char *
const lib = entry->
lib;
368 const size_t total_len =
sizeof(transcoder_lib_prefix) - 1 + len;
373 memcpy(path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
374 memcpy(path +
sizeof(transcoder_lib_prefix) - 1, lib, len);
388 get_replacement_character(
const char *encname,
size_t *len_ret,
const char **repl_encname_ptr)
392 *repl_encname_ptr =
"UTF-8";
393 return "\xEF\xBF\xBD";
397 *repl_encname_ptr =
"US-ASCII";
406 static const unsigned char *
408 const unsigned char *in_start,
409 const unsigned char *inchar_start,
410 const unsigned char *in_p,
411 size_t *char_len_ptr)
413 const unsigned char *
ptr;
414 if (inchar_start - in_start < tc->recognized_len) {
416 inchar_start,
unsigned char, in_p - inchar_start);
427 transcode_restartable0(
const unsigned char **in_pos,
unsigned char **out_pos,
428 const unsigned char *in_stop,
unsigned char *out_stop,
434 ssize_t readagain_len = 0;
436 const unsigned char *inchar_start;
437 const unsigned char *in_p;
439 unsigned char *out_p;
441 in_p = inchar_start = *in_pos;
445 #define SUSPEND(ret, num) \ 447 tc->resume_position = (num); \ 448 if (0 < in_p - inchar_start) \ 449 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \ 450 inchar_start, unsigned char, in_p - inchar_start); \ 453 tc->recognized_len += in_p - inchar_start; \ 454 if (readagain_len) { \ 455 tc->recognized_len -= readagain_len; \ 456 tc->readagain_len = readagain_len; \ 459 resume_label ## num:; \ 461 #define SUSPEND_OBUF(num) \ 463 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \ 466 #define SUSPEND_AFTER_OUTPUT(num) \ 467 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \ 468 SUSPEND(econv_after_output, num); \ 471 #define next_table (tc->next_table) 472 #define next_info (tc->next_info) 473 #define next_byte (tc->next_byte) 474 #define writebuf_len (tc->writebuf_len) 475 #define writebuf_off (tc->writebuf_off) 479 case 1:
goto resume_label1;
480 case 2:
goto resume_label2;
481 case 3:
goto resume_label3;
482 case 4:
goto resume_label4;
483 case 5:
goto resume_label5;
484 case 6:
goto resume_label6;
485 case 7:
goto resume_label7;
486 case 8:
goto resume_label8;
487 case 9:
goto resume_label9;
488 case 10:
goto resume_label10;
489 case 11:
goto resume_label11;
490 case 12:
goto resume_label12;
491 case 13:
goto resume_label13;
492 case 14:
goto resume_label14;
493 case 15:
goto resume_label15;
494 case 16:
goto resume_label16;
495 case 17:
goto resume_label17;
496 case 18:
goto resume_label18;
497 case 19:
goto resume_label19;
498 case 20:
goto resume_label20;
499 case 21:
goto resume_label21;
500 case 22:
goto resume_label22;
501 case 23:
goto resume_label23;
502 case 24:
goto resume_label24;
503 case 25:
goto resume_label25;
504 case 26:
goto resume_label26;
505 case 27:
goto resume_label27;
506 case 28:
goto resume_label28;
507 case 29:
goto resume_label29;
508 case 30:
goto resume_label30;
509 case 31:
goto resume_label31;
510 case 32:
goto resume_label32;
511 case 33:
goto resume_label33;
512 case 34:
goto resume_label34;
522 if (in_stop <= in_p) {
529 #define BYTE_ADDR(index) (tr->byte_array + (index)) 530 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index)) 531 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table))) 532 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table))) 533 #define BL_MIN_BYTE (BL_BASE[0]) 534 #define BL_MAX_BYTE (BL_BASE[1]) 535 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE]) 536 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))]) 538 next_byte = (
unsigned char)*in_p++;
546 switch (next_info & 0x1F) {
549 const unsigned char *p = inchar_start;
556 while (writebuf_off < writebuf_len) {
562 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
563 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
565 while (in_p >= in_stop) {
570 next_byte = (
unsigned char)*in_p++;
571 next_table = (
unsigned int)next_info;
611 const unsigned char *char_start;
613 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
621 next_info, out_p, out_stop - out_p);
627 while (writebuf_off < writebuf_len) {
635 const unsigned char *char_start;
639 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
641 char_start, (
size_t)char_len,
642 out_p, out_stop - out_p);
645 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
647 char_start, (
size_t)char_len,
650 while (writebuf_off < writebuf_len) {
659 const unsigned char *char_start;
663 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
665 char_start, (
size_t)char_len, next_info,
666 out_p, out_stop - out_p);
669 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
671 char_start, (
size_t)char_len, next_info,
674 while (writebuf_off < writebuf_len) {
700 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
701 readagain_len = invalid_len - discard_len;
729 out_p, out_stop - out_p);
735 while (writebuf_off < writebuf_len) {
752 transcode_restartable(
const unsigned char **in_pos,
unsigned char **out_pos,
753 const unsigned char *in_stop,
unsigned char *out_stop,
759 const unsigned char *readagain_pos = readagain_buf;
760 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
766 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|
ECONV_PARTIAL_INPUT);
769 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
774 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
785 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
806 const unsigned char **input_ptr,
const unsigned char *input_stop,
807 unsigned char **output_ptr,
unsigned char *output_stop,
810 return transcode_restartable(
811 input_ptr, output_ptr,
812 input_stop, output_stop,
823 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
838 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
851 rb_econv_alloc(
int n_hint)
905 ec->
elems[i].
tc = rb_transcoding_open_by_transcoder(tr, 0);
933 for (i = 0; i < n; i++) {
935 tr = load_transcoder_entry(entries[i]);
940 ec = rb_econv_alloc(n);
942 for (i = 0; i < n; i++) {
944 ret = rb_econv_add_transcoder_at(ec, tr, ec->
num_trans);
960 trans_open_i(
const char *sname,
const char *dname,
int depth,
void *arg)
967 toarg->
entries[depth] = get_transcoder_entry(sname, dname);
971 rb_econv_open0(
const char *sname,
const char *dname,
int ecflags)
993 if (*sname ==
'\0' && *dname ==
'\0') {
1002 num_trans = transcode_search_path(sname, dname, trans_open_i, (
void *)&toarg);
1003 entries = toarg.entries;
1004 if (num_trans < 0) {
1010 ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
1015 ec->
flags = ecflags;
1022 #define MAX_ECFLAGS_DECORATORS 32 1025 decorator_names(
int ecflags,
const char **decorators_ret)
1045 if (ecflags & ECONV_XML_TEXT_DECORATOR)
1046 decorators_ret[num_decorators++] =
"xml_text_escape";
1047 if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
1048 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1050 decorators_ret[num_decorators++] =
"xml_attr_quote";
1053 decorators_ret[num_decorators++] =
"crlf_newline";
1055 decorators_ret[num_decorators++] =
"cr_newline";
1057 decorators_ret[num_decorators++] =
"universal_newline";
1059 return num_decorators;
1070 num_decorators = decorator_names(ecflags, decorators);
1071 if (num_decorators == -1)
1078 for (i = 0; i < num_decorators; i++)
1084 ec->
flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1091 const unsigned char **input_ptr,
const unsigned char *input_stop,
1092 unsigned char **output_ptr,
unsigned char *output_stop,
1099 const unsigned char **ipp, *is, *iold;
1100 unsigned char **opp, *os, *oold;
1106 for (i = start; i < ec->
num_trans; i++) {
1140 flags &= ~ECONV_AFTER_OUTPUT;
1143 f &= ~ECONV_AFTER_OUTPUT;
1146 te->
last_result = res = rb_transcoding_convert(te->
tc, ipp, is, opp, os, f);
1147 if (iold != *ipp || oold != *opp)
1172 const unsigned char **input_ptr,
const unsigned char *input_stop,
1173 unsigned char **output_ptr,
unsigned char *output_stop,
1175 int *result_position_ptr)
1178 int needreport_index;
1181 unsigned char empty_buf;
1182 unsigned char *empty_ptr = &empty_buf;
1185 input_ptr = (
const unsigned char **)&empty_ptr;
1186 input_stop = empty_ptr;
1190 output_ptr = &empty_ptr;
1191 output_stop = empty_ptr;
1197 needreport_index = -1;
1198 for (i = ec->
num_trans-1; 0 <= i; i--) {
1206 needreport_index = i;
1207 goto found_needreport;
1214 rb_bug(
"unexpected transcode last result");
1224 res = rb_trans_conv(ec,
NULL,
NULL, output_ptr, output_stop,
1226 result_position_ptr);
1238 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1239 sweep_start = needreport_index + 1;
1240 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1242 for (i = ec->
num_trans-1; 0 <= i; i--) {
1251 if (result_position_ptr)
1252 *result_position_ptr = i;
1256 if (result_position_ptr)
1257 *result_position_ptr = -1;
1263 const unsigned char **input_ptr,
const unsigned char *input_stop,
1264 unsigned char **output_ptr,
unsigned char *output_stop,
1268 int result_position;
1276 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1277 len = output_stop - *output_ptr;
1279 *output_ptr = output_stop;
1293 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1294 len = output_stop - *output_ptr;
1297 len = input_stop - *input_ptr;
1300 *(*output_ptr)++ = *(*input_ptr)++;
1304 memcpy(*output_ptr, *input_ptr, len);
1307 if (*input_ptr != input_stop)
1319 if (data_start != data_end) {
1321 if (output_stop - *output_ptr < data_end - data_start) {
1322 len = output_stop - *output_ptr;
1323 memcpy(*output_ptr, data_start, len);
1324 *output_ptr = output_stop;
1329 len = data_end - data_start;
1330 memcpy(*output_ptr, data_start, len);
1349 *input_ptr != input_stop) {
1350 input_stop = *input_ptr;
1351 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1355 else if ((flags & ECONV_AFTER_OUTPUT) ||
1357 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1362 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1383 static int output_replacement_character(
rb_econv_t *ec);
1389 unsigned char utfbuf[1024];
1390 const unsigned char *utf;
1392 int utf_allocated = 0;
1393 char charef_buf[16];
1394 const unsigned char *p;
1403 utfbuf,
sizeof(utfbuf),
1411 if (utf_len % 4 != 0)
1415 while (4 <= utf_len) {
1421 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1443 const unsigned char **input_ptr,
const unsigned char *input_stop,
1444 unsigned char **output_ptr,
unsigned char *output_stop,
1449 unsigned char empty_buf;
1450 unsigned char *empty_ptr = &empty_buf;
1455 input_ptr = (
const unsigned char **)&empty_ptr;
1456 input_stop = empty_ptr;
1460 output_ptr = &empty_ptr;
1461 output_stop = empty_ptr;
1465 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1473 if (output_replacement_character(ec) == 0)
1484 if (output_replacement_character(ec) == 0)
1489 if (output_hex_charref(ec) == 0)
1514 static unsigned char *
1515 allocate_converted_string(
const char *sname,
const char *dname,
1516 const unsigned char *str,
size_t len,
1517 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1518 size_t *dst_len_ptr)
1520 unsigned char *dst_str;
1527 const unsigned char *sp;
1531 dst_bufsize = caller_dst_bufsize;
1541 dst_str = caller_dst_buf;
1543 dst_str =
xmalloc(dst_bufsize);
1546 dp = dst_str+dst_len;
1548 dst_len = dp - dst_str;
1554 if (dst_str == caller_dst_buf) {
1557 memcpy(tmp, dst_str, dst_bufsize/2);
1561 dst_str =
xrealloc(dst_str, dst_bufsize);
1563 dp = dst_str+dst_len;
1565 dst_len = dp - dst_str;
1571 *dst_len_ptr = dst_len;
1575 if (dst_str != caller_dst_buf)
1584 const unsigned char *str,
size_t len,
const char *str_encoding)
1587 unsigned char insert_buf[4096];
1588 const unsigned char *insert_str =
NULL;
1591 int last_trans_index;
1594 unsigned char **buf_start_p;
1595 unsigned char **data_start_p;
1596 unsigned char **data_end_p;
1597 unsigned char **buf_end_p;
1611 insert_str = allocate_converted_string(str_encoding, insert_encoding,
1612 str, len, insert_buf,
sizeof(insert_buf), &insert_len);
1613 if (insert_str ==
NULL)
1628 tc = ec->
elems[last_trans_index].
tc;
1630 if (need < insert_len)
1632 if (last_trans_index == 0) {
1652 tc = ec->
elems[last_trans_index].
tc;
1655 if (*buf_start_p ==
NULL) {
1658 *data_start_p =
buf;
1660 *buf_end_p = buf+need;
1662 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1663 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1664 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1665 *data_start_p = *buf_start_p;
1666 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1668 size_t s = (*data_end_p - *buf_start_p) + need;
1672 *data_start_p =
buf;
1673 *data_end_p = buf + (*data_end_p - *buf_start_p);
1675 *buf_end_p = buf + s;
1679 memcpy(*data_end_p, insert_str, insert_len);
1680 *data_end_p += insert_len;
1687 if (insert_str != str && insert_str != insert_buf)
1688 xfree((
void*)insert_str);
1692 if (insert_str != str && insert_str != insert_buf)
1693 xfree((
void*)insert_str);
1706 rb_transcoding_close(ec->
elems[i].
tc);
1725 size += rb_transcoding_memsize(ec->
elems[i].
tc);
1742 #if SIZEOF_SIZE_T > SIZEOF_INT 1773 tr = load_transcoder_entry(entry);
1805 return data.ascii_compat_name;
1811 unsigned const char *sp, *se;
1812 unsigned char *ds, *
dp, *de;
1830 unsigned long new_capa = (
unsigned long)dlen + len + max_output;
1836 sp = (
const unsigned char *)ss;
1842 len -= (
const char *)sp - ss;
1843 ss = (
const char *)sp;
1880 rb_econv_add_converter(
rb_econv_t *ec,
const char *sname,
const char *dname,
int n)
1888 entry = get_transcoder_entry(sname, dname);
1892 tr = load_transcoder_entry(entry);
1895 return rb_econv_add_transcoder_at(ec, tr, n);
1899 rb_econv_decorate_at(
rb_econv_t *ec,
const char *decorator_name,
int n)
1901 return rb_econv_add_converter(ec,
"", decorator_name, n);
1910 return rb_econv_decorate_at(ec, decorator_name, 0);
1916 return rb_econv_decorate_at(ec, decorator_name, 1);
1918 return rb_econv_decorate_at(ec, decorator_name, 0);
1927 return rb_econv_decorate_at(ec, decorator_name, 0);
1933 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans-1);
1935 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans);
1941 const char *dname = 0;
1945 dname =
"universal_newline";
1948 dname =
"crlf_newline";
1951 dname =
"cr_newline";
1960 for (i=0; i < num_trans; i++) {
1962 rb_transcoding_close(ec->
elems[i].
tc);
1975 econv_description(
const char *sname,
const char *dname,
int ecflags,
VALUE mesg)
1977 int has_description = 0;
1982 if (*sname !=
'\0' || *dname !=
'\0') {
1985 else if (*dname ==
'\0')
1989 has_description = 1;
1996 const char *pre =
"";
1997 if (has_description)
2023 has_description = 1;
2025 if (!has_description) {
2037 econv_description(sname, dname, ecflags, mesg);
2062 else if (readagain_len) {
2063 bytes2 =
rb_str_new(err+error_len, readagain_len);
2099 const char *start, *end;
2123 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2147 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2149 unsigned char **out_start_ptr,
2150 unsigned char **out_pos,
2151 unsigned char **out_stop_ptr)
2153 size_t len = (*out_pos - *out_start_ptr);
2154 size_t new_len = (len + max_output) * 2;
2155 *out_start_ptr = resize_destination(destination, len, new_len);
2156 *out_pos = *out_start_ptr +
len;
2157 *out_stop_ptr = *out_start_ptr + new_len;
2165 const unsigned char *replacement;
2166 const char *repl_enc;
2167 const char *ins_enc;
2179 replacement = (
const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
2182 replacement = (
unsigned char *)
"?";
2196 const unsigned char *str,
size_t len,
const char *encname)
2198 unsigned char *str2;
2200 const char *encname2;
2206 MEMCPY(str2, str,
unsigned char, len);
2211 str2 = allocate_converted_string(encname, encname2, str, len,
NULL, 0, &len2);
2231 if (make_replacement(ec) == -1)
2242 #define hash_fallback rb_hash_aref 2263 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2264 const unsigned char *in_stop,
unsigned char *out_stop,
2266 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2267 const char *src_encoding,
2268 const char *dst_encoding,
2275 unsigned char *out_start = *out_pos;
2291 fallback_func = proc_fallback;
2294 fallback_func = method_fallback;
2297 fallback_func = aref_fallback;
2311 rep = (*fallback_func)(fallback, rep);
2316 if ((
int)ret == -1) {
2326 exc = make_econv_exception(ec);
2332 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2342 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2343 const unsigned char *in_stop,
unsigned char *out_stop,
2345 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2346 const char *src_encoding,
2347 const char *dst_encoding,
2354 unsigned char *out_start = *out_pos;
2355 const unsigned char *
ptr;
2369 unsigned char input_byte;
2370 const unsigned char *p = &input_byte;
2373 if (ptr < in_stop) {
2384 if (&input_byte != p)
2385 ptr += p - &input_byte;
2390 exc = make_econv_exception(ec);
2396 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2417 static unsigned char *
2418 str_transcoding_resize(
VALUE destination,
size_t len,
size_t new_len)
2425 econv_opts(
VALUE opt,
int ecflags)
2432 else if (v==sym_replace) {
2442 else if (v==sym_replace) {
2459 else if (v==sym_attr) {
2470 #ifdef ENABLE_ECONV_NEWLINE_OPTION 2474 if (v == sym_universal) {
2477 else if (v == sym_crlf) {
2480 else if (v == sym_cr) {
2483 else if (v == sym_lf) {
2497 int setflags = 0, newlineflag = 0;
2502 newlineflag |= !
NIL_P(v);
2507 newlineflag |= !
NIL_P(v);
2512 newlineflag |= !
NIL_P(v);
2516 ecflags |= setflags;
2529 if (
NIL_P(opthash)) {
2533 ecflags = econv_opts(opthash, ecflags);
2561 if (!
NIL_P(newhash))
2580 if (
NIL_P(opthash)) {
2585 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2589 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2593 if (!
NIL_P(replacement)) {
2639 const char *sname, *dname;
2640 int sencidx, dencidx;
2642 dencidx = enc_arg(arg1, &dname, &denc);
2650 sencidx = enc_arg(arg2, &sname, &senc);
2667 unsigned char *
buf, *
bp, *sp;
2668 const unsigned char *fromp;
2670 const char *sname, *dname;
2672 int explicitly_invalid_replace =
TRUE;
2679 if (!ecflags)
return -1;
2683 explicitly_invalid_replace =
FALSE;
2690 arg2 = argc<=1 ?
Qnil : argv[1];
2691 dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2697 if (senc && senc == denc) {
2700 if (!
NIL_P(ecopts)) {
2704 if (
NIL_P(dest)) dest = str;
2708 return NIL_P(arg2) ? -1 : dencidx;
2716 return NIL_P(arg2) ? -1 : dencidx;
2732 transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2733 if (fromp != sp+slen) {
2752 str_transcode(
int argc,
VALUE *argv,
VALUE *
self)
2762 return str_transcode0(argc, argv,
self, ecflags, ecopts);
2766 str_encode_associate(
VALUE str,
int encidx)
2798 str_encode_bang(
int argc,
VALUE *argv,
VALUE str)
2806 encidx = str_transcode(argc, argv, &newstr);
2808 if (encidx < 0)
return str;
2809 if (newstr == str) {
2814 return str_encode_associate(str, encidx);
2879 int encidx = str_transcode(argc, argv, &newstr);
2880 return encoded_dup(newstr, str, encidx);
2889 int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts);
2890 return encoded_dup(newstr, str, encidx);
2894 encoded_dup(
VALUE newstr,
VALUE str,
int encidx)
2897 if (newstr == str) {
2905 return str_encode_associate(newstr, encidx);
2909 econv_free(
void *
ptr)
2916 econv_memsize(
const void *ptr)
2923 {
NULL, econv_free, econv_memsize,},
2928 econv_s_allocate(
VALUE klass)
2934 make_dummy_encoding(
const char *
name)
2944 make_encoding(
const char *name)
2949 enc = make_dummy_encoding(name);
2954 make_encobj(
const char *name)
2978 econv_s_asciicompat_encoding(
VALUE klass,
VALUE arg)
2980 const char *arg_name, *result_name;
2983 enc_arg(&arg, &arg_name, &arg_enc);
2987 if (result_name ==
NULL)
2990 result_enc = make_encoding(result_name);
2996 econv_args(
int argc,
VALUE *argv,
2998 const char **sname_p,
const char **dname_p,
3003 VALUE opt, flags_v, ecopts;
3005 const char *sname, *dname;
3009 argc =
rb_scan_args(argc, argv,
"21:", snamev_p, dnamev_p, &flags_v, &opt);
3011 if (!
NIL_P(flags_v)) {
3018 else if (!
NIL_P(opt)) {
3051 *ecflags_p = ecflags;
3056 decorate_convpath(
VALUE convpath,
int ecflags)
3063 num_decorators = decorator_names(ecflags, decorators);
3064 if (num_decorators == -1)
3080 rb_ary_store(convpath, len + num_decorators - 1, pair);
3084 rb_ary_store(convpath, len + num_decorators - 1, pair);
3088 for (i = 0; i < num_decorators; i++)
3095 search_convpath_i(
const char *sname,
const char *dname,
int depth,
void *arg)
3100 if (*ary_p ==
Qnil) {
3108 v =
rb_assoc_new(make_encobj(sname), make_encobj(dname));
3139 econv_s_search_convpath(
int argc,
VALUE *argv,
VALUE klass)
3141 VALUE snamev, dnamev;
3142 const char *sname, *dname;
3148 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3151 transcode_search_path(sname, dname, search_convpath_i, &convpath);
3153 if (
NIL_P(convpath))
3156 if (decorate_convpath(convpath, ecflags) == -1) {
3175 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3177 return RTEST(convpath);
3187 rb_econv_init_by_convpath_i(
const char *sname,
const char *dname,
int depth,
void *arg)
3195 ret = rb_econv_add_converter(a->
ec, sname, dname, a->
index);
3202 rb_econv_init_by_convpath(
VALUE self,
VALUE convpath,
3203 const char **sname_p,
const char **dname_p,
3211 const char *sname, *dname;
3217 VALUE snamev, dnamev;
3224 enc_arg(&snamev, &sname, &senc);
3226 enc_arg(&dnamev, &dname, &denc);
3233 ret = rb_econv_add_converter(ec, sname, dname, ec->
num_trans);
3247 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
3248 if (ret == -1 || arg.
ret == -1) {
3249 VALUE msg =
rb_sprintf(
"adding conversion failed: %s to %s", sname, dname);
3384 econv_init(
int argc,
VALUE *argv,
VALUE self)
3387 VALUE snamev, dnamev;
3388 const char *sname, *dname;
3399 ec = rb_econv_init_by_convpath(
self, convpath, &sname, &dname, &senc, &denc);
3404 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3417 senc = make_dummy_encoding(sname);
3419 denc = make_dummy_encoding(dname);
3443 econv_inspect(
VALUE self)
3450 return rb_sprintf(
"#<%s: uninitialized>", cname);
3456 econv_description(sname, dname, ec->
flags, str);
3463 check_econv(
VALUE self)
3481 econv_source_encoding(
VALUE self)
3496 econv_destination_encoding(
VALUE self)
3527 econv_convpath(
VALUE self)
3695 econv_primitive_convert(
int argc,
VALUE *argv,
VALUE self)
3697 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3700 const unsigned char *ip, *is;
3701 unsigned char *op, *os;
3702 long output_byteoffset, output_bytesize;
3703 unsigned long output_byteend;
3706 argc =
rb_scan_args(argc, argv,
"23:", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v, &opt);
3708 if (
NIL_P(output_byteoffset_v))
3709 output_byteoffset = 0;
3711 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3713 if (
NIL_P(output_bytesize_v))
3714 output_bytesize = 0;
3716 output_bytesize =
NUM2LONG(output_bytesize_v);
3718 if (!
NIL_P(flags_v)) {
3724 else if (!
NIL_P(opt)) {
3743 if (
NIL_P(output_bytesize_v)) {
3751 if (
NIL_P(output_byteoffset_v))
3754 if (output_byteoffset < 0)
3760 if (output_bytesize < 0)
3763 output_byteend = (
unsigned long)output_byteoffset +
3764 (
unsigned long)output_bytesize;
3766 if (output_byteend < (
unsigned long)output_byteoffset ||
3781 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3782 os = op + output_bytesize;
3786 if (!
NIL_P(input)) {
3792 if (
LONG_MAX / 2 < output_bytesize)
3794 output_bytesize *= 2;
3795 output_byteoffset_v =
Qnil;
3803 return econv_result_to_symbol(res);
3841 econv_convert(
VALUE self,
VALUE source_string)
3859 ret = econv_primitive_convert(ac, av,
self);
3861 if (ret == sym_invalid_byte_sequence ||
3862 ret == sym_undefined_conversion ||
3863 ret == sym_incomplete_input) {
3864 VALUE exc = make_econv_exception(ec);
3868 if (ret == sym_finished) {
3872 if (ret != sym_source_buffer_empty) {
3873 rb_bug(
"unexpected result of econv_primitive_convert");
3891 econv_finish(
VALUE self)
3907 ret = econv_primitive_convert(ac, av,
self);
3909 if (ret == sym_invalid_byte_sequence ||
3910 ret == sym_undefined_conversion ||
3911 ret == sym_incomplete_input) {
3912 VALUE exc = make_econv_exception(ec);
3916 if (ret != sym_finished) {
3917 rb_bug(
"unexpected result of econv_primitive_convert");
3999 econv_primitive_errinfo(
VALUE self)
4057 econv_insert_output(
VALUE self,
VALUE string)
4059 const char *insert_enc;
4102 econv_putback(
int argc,
VALUE *argv,
VALUE self)
4116 if (putbackable < n)
4151 econv_last_error(
VALUE self)
4156 exc = make_econv_exception(ec);
4175 econv_get_replacement(
VALUE self)
4181 ret = make_replacement(ec);
4227 return make_econv_exception(ec);
4235 exc = make_econv_exception(ec);
4248 ecerr_source_encoding_name(
VALUE self)
4274 ecerr_source_encoding(
VALUE self)
4286 ecerr_destination_encoding_name(
VALUE self)
4298 ecerr_destination_encoding(
VALUE self)
4319 ecerr_error_char(
VALUE self)
4340 ecerr_error_bytes(
VALUE self)
4352 ecerr_readagain_bytes(
VALUE self)
4382 ecerr_incomplete_input(
VALUE self)
4424 sym_invalid_byte_sequence =
ID2SYM(
rb_intern(
"invalid_byte_sequence"));
4426 sym_destination_buffer_full =
ID2SYM(
rb_intern(
"destination_buffer_full"));
4436 #ifdef ENABLE_ECONV_NEWLINE_OPTION RUBY_EXTERN VALUE rb_cString
const char * ascii_incompat_name
int rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
#define ECONV_XML_TEXT_DECORATOR
search_path_queue_t * queue
int rb_enc_get_index(VALUE obj)
void rb_econv_check_error(rb_econv_t *ec)
RUBY_EXTERN VALUE rb_cData
#define MBCLEN_CHARFOUND_P(ret)
VALUE rb_eConverterNotFoundError
void rb_bug(const char *fmt,...)
VALUE rb_ary_entry(VALUE ary, long offset)
#define MBCLEN_CHARFOUND_LEN(ret)
unsigned char * in_buf_end
const unsigned char * error_bytes_start
rb_econv_result_t last_result
#define rb_enc_mbc_to_codepoint(p, e, enc)
VALUE rb_econv_make_exception(rb_econv_t *ec)
#define RUBY_TYPED_FREE_IMMEDIATELY
const char * dst_encoding
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
size_t strlen(const char *)
struct search_path_queue_tag search_path_queue_t
#define DECORATOR_P(sname, dname)
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
VALUE rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
VALUE rb_eInvalidByteSequenceError
#define ECONV_XML_ATTR_CONTENT_DECORATOR
void rb_econv_binmode(rb_econv_t *ec)
void rb_raise(VALUE exc, const char *fmt,...)
rb_encoding * source_encoding
unsigned char * out_data_start
#define TypedData_Wrap_Struct(klass, data_type, sval)
#define MAX_ECFLAGS_DECORATORS
#define ENC_CODERANGE_SET(obj, cr)
#define TypedData_Get_Struct(obj, type, data_type, sval)
int rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
unsigned char * in_data_start
#define ECONV_ERROR_HANDLER_MASK
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
rb_encoding * rb_to_encoding(VALUE enc)
rb_encoding * destination_encoding
#define ECONV_XML_ATTR_QUOTE_DECORATOR
struct rb_transcoding * tc
#define SUSPEND(ret, num)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
VALUE rb_ary_push(VALUE ary, VALUE item)
VALUE rb_obj_is_method(VALUE)
struct rb_transcoding * error_tc
void rb_str_set_len(VALUE, long)
#define RBASIC_SET_CLASS(obj, cls)
int rb_enc_str_coderange(VALUE)
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
ssize_t(* func_sio)(void *, const unsigned char *, size_t, VALUE, unsigned char *, size_t)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
unsigned int conv_tree_start
VALUE rb_exc_new_str(VALUE etype, VALUE str)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
const rb_transcoder * transcoder
VALUE(* func_ii)(void *, VALUE)
int(* state_init_func)(void *)
ssize_t(* func_so)(void *, const unsigned char *, size_t, unsigned char *, size_t)
rb_encoding * rb_utf8_encoding(void)
VALUE rb_str_tmp_new(long)
VALUE(* func_si)(void *, const unsigned char *, size_t)
unsigned char * in_buf_start
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
#define ENC_CODERANGE_7BIT
const char * rb_obj_classname(VALUE)
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
ssize_t(* finish_func)(void *, unsigned char *, size_t)
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
st_table * st_init_strcasetable(void)
VALUE rb_obj_class(VALUE)
call-seq: obj.class -> class
#define RB_TYPE_P(obj, type)
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
double dummy_for_alignment
int rb_to_encoding_index(VALUE enc)
unsigned int output_index
#define TRANSCODING_READBUF(tc)
void Init_transcode(void)
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
unsigned char * in_data_end
const char * destination_encoding
#define ECONV_INVALID_MASK
struct rb_econv_t rb_econv_t
#define SUSPEND_AFTER_OUTPUT(num)
VALUE rb_str_cat2(VALUE, const char *)
#define ECONV_INVALID_REPLACE
void rb_econv_close(rb_econv_t *ec)
int(* state_fini_func)(void *)
#define ECONV_PARTIAL_INPUT
#define ECONV_AFTER_OUTPUT
void rb_define_const(VALUE, const char *, VALUE)
void rb_ary_store(VALUE ary, long idx, VALUE val)
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
VALUE rb_cEncodingConverter
VALUE rb_require_safe(VALUE, int)
#define ALLOCA_N(type, n)
#define TRANSCODING_STATE(tc)
#define MEMCPY(p1, p2, type, n)
ssize_t(* func_io)(void *, VALUE, const unsigned char *, size_t)
#define ENC_CODERANGE_BROKEN
VALUE rb_enc_associate_index(VALUE obj, int idx)
rb_transcoder_asciicompat_type_t asciicompat_type
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
#define ENC_CODERANGE_VALID
#define SUSPEND_OBUF(num)
VALUE rb_str_resize(VALUE, long)
void rb_register_transcoder(const rb_transcoder *tr)
union rb_transcoding::@120 readbuf
unsigned char * out_buf_start
#define REALLOC_N(var, type, n)
VALUE rb_obj_is_proc(VALUE)
search_path_queue_t ** queue_last_ptr
VALUE rb_sprintf(const char *format,...)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
int rb_econv_putbackable(rb_econv_t *ec)
unsigned char * out_buf_end
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
struct rb_transcoding * last_tc
#define MEMMOVE(p1, p2, type, n)
#define STR1_BYTEINDEX(w)
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
VALUE rb_ivar_set(VALUE, ID, VALUE)
VALUE rb_check_hash_type(VALUE hash)
unsigned char buf[MIME_BUF_SIZE]
VALUE rb_assoc_new(VALUE car, VALUE cdr)
char ary[sizeof(double) > sizeof(void *) ? sizeof(double) :sizeof(void *)]
#define ECONV_CRLF_NEWLINE_DECORATOR
const char * source_encoding
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
int rb_define_dummy_encoding(const char *name)
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
union rb_transcoding::rb_transcoding_state_t state
#define ECONV_NEWLINE_DECORATOR_MASK
const char * src_encoding
VALUE rb_obj_encoding(VALUE obj)
#define ECONV_UNDEF_HEX_CHARREF
#define rb_enc_asciicompat(enc)
VALUE rb_str_new_cstr(const char *)
int memcmp(const void *s1, const void *s2, size_t len)
VALUE rb_proc_call(VALUE, VALUE)
const char * ascii_compat_name
#define ECONV_CR_NEWLINE_DECORATOR
#define RARRAY_LENINT(ary)
struct rb_econv_t::@122 last_error
VALUE rb_hash_freeze(VALUE hash)
int rb_respond_to(VALUE, ID)
VALUE rb_method_call(int, const VALUE *, VALUE)
register unsigned int len
#define StringValueCStr(v)
#define ECONV_UNDEF_REPLACE
void rb_str_modify(VALUE)
rb_encoding * rb_enc_get(VALUE obj)
#define RARRAY_AREF(a, i)
unsigned char * out_data_end
size_t rb_econv_memsize(rb_econv_t *ec)
rb_econv_t * rb_econv_open(const char *sname, const char *dname, int ecflags)
#define TRANSCODING_WRITEBUF(tc)
VALUE rb_check_array_type(VALUE ary)
VALUE rb_hash_aref(VALUE hash, VALUE key)
void rb_error_arity(int argc, int min, int max)
VALUE rb_str_catf(VALUE str, const char *format,...)
void rb_str_shared_replace(VALUE, VALUE)
size_t rb_str_capacity(VALUE str)
void InitVM_transcode(void)
const char * destination_encoding_name
VALUE rb_enc_default_internal(void)
const char * rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
VALUE rb_eUndefinedConversionError
const char * replacement_enc
VALUE rb_str_new_frozen(VALUE)
VALUE rb_str_drop_bytes(VALUE, long)
const char * source_encoding_name
int replacement_allocated
struct search_path_queue_tag * next
int rb_enc_find_index(const char *name)
#define rb_check_frozen(obj)
union rb_transcoding::@121 writebuf
const rb_transcoder * transcoder
VALUE rb_str_buf_new(long)
#define OBJ_INFECT_RAW(x, s)
struct rb_transcoding rb_transcoding
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
const unsigned char * replacement_str
#define STR1_LENGTH(byte_addr)
#define encoding_equal(enc1, enc2)
#define TRANSCODING_WRITEBUF_SIZE(tc)
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
rb_encoding * rb_enc_find(const char *name)
transcoder_entry_t ** entries
VALUE rb_to_int(VALUE)
Converts val into Integer.
VALUE rb_attr_get(VALUE, ID)
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
rb_encoding * rb_enc_from_index(int index)
VALUE rb_str_new(const char *, long)