18#define ENABLE_ECONV_NEWLINE_OPTION 1
21static VALUE rb_eUndefinedConversionError;
22static VALUE rb_eInvalidByteSequenceError;
23static VALUE rb_eConverterNotFoundError;
27static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
28static VALUE sym_xml, sym_text, sym_attr;
29static VALUE sym_universal_newline;
30static VALUE sym_crlf_newline;
31static VALUE sym_cr_newline;
32#ifdef ENABLE_ECONV_NEWLINE_OPTION
33static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
35static VALUE sym_partial_input;
37static VALUE sym_invalid_byte_sequence;
38static VALUE sym_undefined_conversion;
39static VALUE sym_destination_buffer_full;
40static VALUE sym_source_buffer_empty;
41static VALUE sym_finished;
42static VALUE sym_after_output;
43static VALUE sym_incomplete_input;
46allocate_converted_string(
const char *sname,
const char *dname,
47 const unsigned char *
str,
size_t len,
48 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
84#define TRANSCODING_READBUF(tc) \
85 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
88#define TRANSCODING_WRITEBUF(tc) \
89 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
90 (tc)->writebuf.ary : \
92#define TRANSCODING_WRITEBUF_SIZE(tc) \
93 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
94 sizeof((tc)->writebuf.ary) : \
95 (size_t)(tc)->transcoder->max_output)
96#define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
97#define TRANSCODING_STATE(tc) \
98 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
154#define DECORATOR_P(sname, dname) (*(sname) == '\0')
166make_transcoder_entry(
const char *sname,
const char *dname)
178 entry->
sname = sname;
179 entry->
dname = dname;
189get_transcoder_entry(
const char *sname,
const char *dname)
207 const char *
const sname =
tr->src_encoding;
208 const char *
const dname =
tr->dst_encoding;
212 entry = make_transcoder_entry(sname, dname);
222declare_transcoder(
const char *sname,
const char *dname,
const char *lib)
226 entry = make_transcoder_entry(sname, dname);
230static const char transcoder_lib_prefix[] =
"enc/trans/";
238 declare_transcoder(enc1, enc2, lib);
241#define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
258 const char *dname = (
const char *)
key;
277transcode_search_path(
const char *sname,
const char *dname,
278 void (*callback)(
const char *sname,
const char *dname,
int depth,
void *
arg),
335 const char *enc = dname;
343 enc = (
const char *)val;
351 callback((
const char *)val, enc, --depth,
arg);
352 enc = (
const char *)val;
368 const char *
const lib = entry->
lib;
370 const size_t total_len =
sizeof(transcoder_lib_prefix) - 1 +
len;
374 memcpy(
path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
388get_replacement_character(
const char *encname,
size_t *len_ret,
const char **repl_encname_ptr)
392 *repl_encname_ptr =
"UTF-8";
393 return "\xEF\xBF\xBD";
397 *repl_encname_ptr =
"US-ASCII";
406static const unsigned char *
408 const unsigned char *in_start,
409 const unsigned char *inchar_start,
410 const unsigned char *in_p,
411 size_t *char_len_ptr)
413 const unsigned char *
ptr;
414 if (inchar_start - in_start < tc->recognized_len) {
416 inchar_start,
unsigned char, in_p - inchar_start);
427transcode_restartable0(
const unsigned char **in_pos,
unsigned char **out_pos,
428 const unsigned char *in_stop,
unsigned char *out_stop,
433 int unitlen =
tr->input_unit_length;
436 const unsigned char *inchar_start;
437 const unsigned char *in_p;
439 unsigned char *out_p;
441 in_p = inchar_start = *in_pos;
445#define SUSPEND(ret, num) \
447 tc->resume_position = (num); \
448 if (0 < in_p - inchar_start) \
449 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
450 inchar_start, unsigned char, in_p - inchar_start); \
453 tc->recognized_len += in_p - inchar_start; \
454 if (readagain_len) { \
455 tc->recognized_len -= readagain_len; \
456 tc->readagain_len = readagain_len; \
459 resume_label ## num:; \
461#define SUSPEND_OBUF(num) \
463 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
466#define SUSPEND_AFTER_OUTPUT(num) \
467 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
468 SUSPEND(econv_after_output, num); \
471#define next_table (tc->next_table)
472#define next_info (tc->next_info)
473#define next_byte (tc->next_byte)
474#define writebuf_len (tc->writebuf_len)
475#define writebuf_off (tc->writebuf_off)
479 case 1:
goto resume_label1;
480 case 2:
goto resume_label2;
481 case 3:
goto resume_label3;
482 case 4:
goto resume_label4;
483 case 5:
goto resume_label5;
484 case 6:
goto resume_label6;
485 case 7:
goto resume_label7;
486 case 8:
goto resume_label8;
487 case 9:
goto resume_label9;
488 case 10:
goto resume_label10;
489 case 11:
goto resume_label11;
490 case 12:
goto resume_label12;
491 case 13:
goto resume_label13;
492 case 14:
goto resume_label14;
493 case 15:
goto resume_label15;
494 case 16:
goto resume_label16;
495 case 17:
goto resume_label17;
496 case 18:
goto resume_label18;
497 case 19:
goto resume_label19;
498 case 20:
goto resume_label20;
499 case 21:
goto resume_label21;
500 case 22:
goto resume_label22;
501 case 23:
goto resume_label23;
502 case 24:
goto resume_label24;
503 case 25:
goto resume_label25;
504 case 26:
goto resume_label26;
505 case 27:
goto resume_label27;
506 case 28:
goto resume_label28;
507 case 29:
goto resume_label29;
508 case 30:
goto resume_label30;
509 case 31:
goto resume_label31;
510 case 32:
goto resume_label32;
511 case 33:
goto resume_label33;
512 case 34:
goto resume_label34;
522 if (in_stop <= in_p) {
529#define BYTE_ADDR(index) (tr->byte_array + (index))
530#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
531#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
532#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
533#define BL_MIN_BYTE (BL_BASE[0])
534#define BL_MAX_BYTE (BL_BASE[1])
535#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
536#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
549 const unsigned char *p = inchar_start;
562 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
563 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
565 while (in_p >= in_stop) {
611 const unsigned char *char_start;
613 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
619 if (
tr->max_output <= out_stop - out_p)
635 const unsigned char *char_start;
638 if (
tr->max_output <= out_stop - out_p) {
639 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
641 char_start, (
size_t)char_len,
642 out_p, out_stop - out_p);
645 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
647 char_start, (
size_t)char_len,
659 const unsigned char *char_start;
662 if (
tr->max_output <= out_stop - out_p) {
663 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
666 out_p, out_stop - out_p);
669 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
700 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
701 readagain_len = invalid_len - discard_len;
725 if (
tr->finish_func) {
727 if (
tr->max_output <= out_stop - out_p) {
729 out_p, out_stop - out_p);
752transcode_restartable(
const unsigned char **in_pos,
unsigned char **out_pos,
753 const unsigned char *in_stop,
unsigned char *out_stop,
759 const unsigned char *readagain_pos = readagain_buf;
760 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
766 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|
ECONV_PARTIAL_INPUT);
769 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
774 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
785 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
787 if (
tr->state_init_func) {
806 const unsigned char **input_ptr,
const unsigned char *input_stop,
807 unsigned char **output_ptr,
unsigned char *output_stop,
810 return transcode_restartable(
811 input_ptr, output_ptr,
812 input_stop, output_stop,
820 if (
tr->state_fini_func) {
823 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
838 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
851rb_econv_alloc(
int n_hint)
905 ec->
elems[
i].
tc = rb_transcoding_open_by_transcoder(
tr, 0);
933 for (
i = 0;
i <
n;
i++) {
940 ec = rb_econv_alloc(
n);
942 for (
i = 0;
i <
n;
i++) {
944 ret = rb_econv_add_transcoder_at(ec,
tr, ec->
num_trans);
960trans_open_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
967 toarg->
entries[depth] = get_transcoder_entry(sname, dname);
971rb_econv_open0(
const char *sname,
const char *dname,
int ecflags)
982 if (*sname ==
'\0' && *dname ==
'\0') {
990 toarg.num_additional = 0;
991 num_trans = transcode_search_path(sname, dname, trans_open_i, (
void *)&toarg);
999 ec = rb_econv_open_by_transcoder_entries(num_trans,
entries);
1004 ec->
flags = ecflags;
1011#define MAX_ECFLAGS_DECORATORS 32
1014decorator_names(
int ecflags,
const char **decorators_ret)
1035 decorators_ret[num_decorators++] =
"xml_text_escape";
1037 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1039 decorators_ret[num_decorators++] =
"xml_attr_quote";
1042 decorators_ret[num_decorators++] =
"crlf_newline";
1044 decorators_ret[num_decorators++] =
"cr_newline";
1046 decorators_ret[num_decorators++] =
"universal_newline";
1048 return num_decorators;
1059 num_decorators = decorator_names(ecflags, decorators);
1060 if (num_decorators == -1)
1067 for (
i = 0;
i < num_decorators;
i++)
1073 ec->
flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1080 const unsigned char **input_ptr,
const unsigned char *input_stop,
1081 unsigned char **output_ptr,
unsigned char *output_stop,
1088 const unsigned char **ipp, *is, *iold;
1089 unsigned char **opp, *os, *oold;
1129 flags &= ~ECONV_AFTER_OUTPUT;
1132 f &= ~ECONV_AFTER_OUTPUT;
1135 te->
last_result = res = rb_transcoding_convert(te->
tc, ipp, is, opp, os,
f);
1136 if (iold != *ipp || oold != *opp)
1161 const unsigned char **input_ptr,
const unsigned char *input_stop,
1162 unsigned char **output_ptr,
unsigned char *output_stop,
1164 int *result_position_ptr)
1167 int needreport_index;
1170 unsigned char empty_buf;
1171 unsigned char *empty_ptr = &empty_buf;
1174 input_ptr = (
const unsigned char **)&empty_ptr;
1175 input_stop = empty_ptr;
1179 output_ptr = &empty_ptr;
1180 output_stop = empty_ptr;
1194 goto found_needreport;
1201 rb_bug(
"unexpected transcode last result");
1211 res = rb_trans_conv(ec,
NULL,
NULL, output_ptr, output_stop,
1213 result_position_ptr);
1225 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1226 sweep_start = needreport_index + 1;
1227 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1238 if (result_position_ptr)
1239 *result_position_ptr =
i;
1243 if (result_position_ptr)
1244 *result_position_ptr = -1;
1250 const unsigned char **input_ptr,
const unsigned char *input_stop,
1251 unsigned char **output_ptr,
unsigned char *output_stop,
1255 int result_position;
1263 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1264 len = output_stop - *output_ptr;
1266 *output_ptr = output_stop;
1280 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1281 len = output_stop - *output_ptr;
1284 len = input_stop - *input_ptr;
1287 *(*output_ptr)++ = *(*input_ptr)++;
1294 if (*input_ptr != input_stop)
1306 if (data_start != data_end) {
1308 if (output_stop - *output_ptr < data_end - data_start) {
1309 len = output_stop - *output_ptr;
1311 *output_ptr = output_stop;
1316 len = data_end - data_start;
1336 *input_ptr != input_stop) {
1337 input_stop = *input_ptr;
1338 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1344 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1349 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1370static int output_replacement_character(
rb_econv_t *ec);
1376 unsigned char utfbuf[1024];
1377 const unsigned char *utf;
1379 int utf_allocated = 0;
1380 char charef_buf[16];
1381 const unsigned char *p;
1390 utfbuf,
sizeof(utfbuf),
1398 if (utf_len % 4 != 0)
1402 while (4 <= utf_len) {
1408 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1430 const unsigned char **input_ptr,
const unsigned char *input_stop,
1431 unsigned char **output_ptr,
unsigned char *output_stop,
1436 unsigned char empty_buf;
1437 unsigned char *empty_ptr = &empty_buf;
1442 input_ptr = (
const unsigned char **)&empty_ptr;
1443 input_stop = empty_ptr;
1447 output_ptr = &empty_ptr;
1448 output_stop = empty_ptr;
1452 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1460 if (output_replacement_character(ec) == 0)
1471 if (output_replacement_character(ec) == 0)
1476 if (output_hex_charref(ec) == 0)
1497 return tr->src_encoding;
1498 return tr->dst_encoding;
1501static unsigned char *
1502allocate_converted_string(
const char *sname,
const char *dname,
1503 const unsigned char *
str,
size_t len,
1504 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1505 size_t *dst_len_ptr)
1507 unsigned char *dst_str;
1514 const unsigned char *sp;
1518 dst_bufsize = caller_dst_bufsize;
1528 dst_str = caller_dst_buf;
1530 dst_str =
xmalloc(dst_bufsize);
1533 dp = dst_str+dst_len;
1535 dst_len =
dp - dst_str;
1541 if (dst_str == caller_dst_buf) {
1544 memcpy(tmp, dst_str, dst_bufsize/2);
1548 dst_str =
xrealloc(dst_str, dst_bufsize);
1550 dp = dst_str+dst_len;
1552 dst_len =
dp - dst_str;
1558 *dst_len_ptr = dst_len;
1562 if (dst_str != caller_dst_buf)
1571 const unsigned char *
str,
size_t len,
const char *str_encoding)
1574 unsigned char insert_buf[4096];
1575 const unsigned char *insert_str =
NULL;
1578 int last_trans_index;
1581 unsigned char **buf_start_p;
1582 unsigned char **data_start_p;
1583 unsigned char **data_end_p;
1584 unsigned char **buf_end_p;
1598 insert_str = allocate_converted_string(str_encoding, insert_encoding,
1599 str,
len, insert_buf,
sizeof(insert_buf), &insert_len);
1600 if (insert_str ==
NULL)
1615 tc = ec->
elems[last_trans_index].
tc;
1617 if (need < insert_len)
1619 if (last_trans_index == 0) {
1639 tc = ec->
elems[last_trans_index].
tc;
1642 if (*buf_start_p ==
NULL) {
1645 *data_start_p =
buf;
1647 *buf_end_p =
buf+need;
1649 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1650 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1651 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1652 *data_start_p = *buf_start_p;
1653 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1655 size_t s = (*data_end_p - *buf_start_p) + need;
1659 *data_start_p =
buf;
1660 *data_end_p =
buf + (*data_end_p - *buf_start_p);
1662 *buf_end_p =
buf + s;
1666 memcpy(*data_end_p, insert_str, insert_len);
1667 *data_end_p += insert_len;
1674 if (insert_str !=
str && insert_str != insert_buf)
1675 xfree((
void*)insert_str);
1679 if (insert_str !=
str && insert_str != insert_buf)
1680 xfree((
void*)insert_str);
1693 rb_transcoding_close(ec->
elems[
i].
tc);
1729#if SIZEOF_SIZE_T > SIZEOF_INT
1760 tr = load_transcoder_entry(entry);
1798 unsigned const char *sp, *se;
1799 unsigned char *ds, *
dp, *de;
1817 unsigned long new_capa = (
unsigned long)dlen +
len + max_output;
1823 sp = (
const unsigned char *)ss;
1829 len -= (
const char *)sp - ss;
1830 ss = (
const char *)sp;
1866rb_econv_add_converter(
rb_econv_t *ec,
const char *sname,
const char *dname,
int n)
1874 entry = get_transcoder_entry(sname, dname);
1878 tr = load_transcoder_entry(entry);
1881 return rb_econv_add_transcoder_at(ec,
tr,
n);
1885rb_econv_decorate_at(
rb_econv_t *ec,
const char *decorator_name,
int n)
1887 return rb_econv_add_converter(ec,
"", decorator_name,
n);
1896 return rb_econv_decorate_at(ec, decorator_name, 0);
1902 return rb_econv_decorate_at(ec, decorator_name, 1);
1904 return rb_econv_decorate_at(ec, decorator_name, 0);
1913 return rb_econv_decorate_at(ec, decorator_name, 0);
1919 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans-1);
1921 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans);
1927 const char *dname = 0;
1931 dname =
"universal_newline";
1934 dname =
"crlf_newline";
1937 dname =
"cr_newline";
1946 for (
i=0;
i < num_trans;
i++) {
1948 rb_transcoding_close(ec->
elems[
i].
tc);
1957 ec->
flags &= ~ECONV_NEWLINE_DECORATOR_MASK;
1961econv_description(
const char *sname,
const char *dname,
int ecflags,
VALUE mesg)
1963 int has_description = 0;
1968 if (*sname !=
'\0' || *dname !=
'\0') {
1971 else if (*dname ==
'\0')
1975 has_description = 1;
1982 const char *pre =
"";
1983 if (has_description)
2009 has_description = 1;
2011 if (!has_description) {
2023 econv_description(sname, dname, ecflags, mesg);
2048 else if (readagain_len) {
2085 const char *start, *end;
2109 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2133 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2135 unsigned char **out_start_ptr,
2136 unsigned char **out_pos,
2137 unsigned char **out_stop_ptr)
2139 size_t len = (*out_pos - *out_start_ptr);
2140 size_t new_len = (
len + max_output) * 2;
2141 *out_start_ptr = resize_destination(destination,
len, new_len);
2142 *out_pos = *out_start_ptr +
len;
2143 *out_stop_ptr = *out_start_ptr + new_len;
2151 const unsigned char *replacement;
2152 const char *repl_enc;
2153 const char *ins_enc;
2165 replacement = (
const unsigned char *)get_replacement_character(ins_enc, &
len, &repl_enc);
2168 replacement = (
unsigned char *)
"?";
2182 const unsigned char *
str,
size_t len,
const char *encname)
2184 unsigned char *str2;
2186 const char *encname2;
2197 str2 = allocate_converted_string(encname, encname2,
str,
len,
NULL, 0, &len2);
2217 if (make_replacement(ec) == -1)
2228#define hash_fallback rb_hash_aref
2249transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2250 const unsigned char *in_stop,
unsigned char *out_stop,
2252 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2253 const char *src_encoding,
2254 const char *dst_encoding,
2261 unsigned char *out_start = *out_pos;
2297 rep = (*fallback_func)(fallback, rep);
2302 if ((
int)ret == -1) {
2312 exc = make_econv_exception(ec);
2318 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2328transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2329 const unsigned char *in_stop,
unsigned char *out_stop,
2331 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2332 const char *src_encoding,
2333 const char *dst_encoding,
2340 unsigned char *out_start = *out_pos;
2341 const unsigned char *
ptr;
2355 unsigned char input_byte;
2356 const unsigned char *p = &input_byte;
2359 if (
ptr < in_stop) {
2370 if (&input_byte != p)
2371 ptr += p - &input_byte;
2376 exc = make_econv_exception(ec);
2382 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2403static unsigned char *
2404str_transcoding_resize(
VALUE destination,
size_t len,
size_t new_len)
2411econv_opts(
VALUE opt,
int ecflags)
2418 else if (
v==sym_replace) {
2428 else if (
v==sym_replace) {
2445 else if (
v==sym_attr) {
2456#ifdef ENABLE_ECONV_NEWLINE_OPTION
2459 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2460 if (
v == sym_universal) {
2463 else if (
v == sym_crlf) {
2466 else if (
v == sym_cr) {
2469 else if (
v == sym_lf) {
2483 int setflags = 0, newlineflag = 0;
2488 newlineflag |= !
NIL_P(
v);
2493 newlineflag |= !
NIL_P(
v);
2498 newlineflag |= !
NIL_P(
v);
2501 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2502 ecflags |= setflags;
2515 if (
NIL_P(opthash)) {
2519 ecflags = econv_opts(opthash, ecflags);
2547 if (!
NIL_P(newhash))
2566 if (
NIL_P(opthash)) {
2571 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2575 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2579 if (!
NIL_P(replacement)) {
2625 const char *sname, *dname;
2626 int sencidx, dencidx;
2628 dencidx = enc_arg(arg1, &dname, &denc);
2636 sencidx = enc_arg(arg2, &sname, &senc);
2653 unsigned char *
buf, *
bp, *sp;
2654 const unsigned char *fromp;
2656 const char *sname, *dname;
2658 int explicitly_invalid_replace =
TRUE;
2665 if (!ecflags)
return -1;
2669 explicitly_invalid_replace =
FALSE;
2677 dencidx = str_transcode_enc_args(
str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2683 if (senc && senc == denc) {
2686 if (!
NIL_P(ecopts)) {
2694 return NIL_P(arg2) ? -1 : dencidx;
2702 return NIL_P(arg2) ? -1 : dencidx;
2718 transcode_loop(&fromp, &
bp, (sp+slen), (
bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2719 if (fromp != sp+slen) {
2748 return str_transcode0(
argc,
argv,
self, ecflags, ecopts);
2752str_encode_associate(
VALUE str,
int encidx)
2792 encidx = str_transcode(
argc,
argv, &newstr);
2794 if (encidx < 0)
return str;
2795 if (newstr ==
str) {
2800 return str_encode_associate(
str, encidx);
2865 int encidx = str_transcode(
argc,
argv, &newstr);
2866 return encoded_dup(newstr,
str, encidx);
2875 int encidx = str_transcode0(
argc,
argv, &newstr, ecflags, ecopts);
2876 return encoded_dup(newstr,
str, encidx);
2883 if (newstr ==
str) {
2891 return str_encode_associate(newstr, encidx);
2900econv_free(
void *
ptr)
2907econv_memsize(
const void *
ptr)
2914 {
NULL, econv_free, econv_memsize,},
2925make_dummy_encoding(
const char *
name)
2935make_encoding(
const char *
name)
2940 enc = make_dummy_encoding(
name);
2945make_encobj(
const char *
name)
2971 const char *arg_name, *result_name;
2974 enc_arg(&
arg, &arg_name, &arg_enc);
2978 if (result_name ==
NULL)
2981 result_enc = make_encoding(result_name);
2989 const char **sname_p,
const char **dname_p,
2994 VALUE opt, flags_v, ecopts;
2996 const char *sname, *dname;
3002 if (!
NIL_P(flags_v)) {
3009 else if (!
NIL_P(opt)) {
3042 *ecflags_p = ecflags;
3047decorate_convpath(
VALUE convpath,
int ecflags)
3054 num_decorators = decorator_names(ecflags, decorators);
3055 if (num_decorators == -1)
3079 for (
i = 0;
i < num_decorators;
i++)
3086search_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3091 if (*ary_p ==
Qnil) {
3132 VALUE snamev, dnamev;
3133 const char *sname, *dname;
3139 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3142 transcode_search_path(sname, dname, search_convpath_i, &convpath);
3144 if (
NIL_P(convpath)) {
3151 if (decorate_convpath(convpath, ecflags) == -1) {
3170 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3172 return RTEST(convpath);
3182rb_econv_init_by_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3190 ret = rb_econv_add_converter(a->
ec, sname, dname, a->
index);
3197rb_econv_init_by_convpath(
VALUE self,
VALUE convpath,
3198 const char **sname_p,
const char **dname_p,
3206 const char *sname, *dname;
3212 VALUE snamev, dnamev;
3219 enc_arg(&snamev, &sname, &senc);
3221 enc_arg(&dnamev, &dname, &denc);
3242 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &
arg);
3243 if (
ret == -1 ||
arg.ret == -1) {
3244 VALUE msg =
rb_sprintf(
"adding conversion failed: %s to %s", sname, dname);
3382 VALUE snamev, dnamev;
3383 const char *sname, *dname;
3394 ec = rb_econv_init_by_convpath(
self, convpath, &sname, &dname, &senc, &denc);
3399 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3412 senc = make_dummy_encoding(sname);
3414 denc = make_dummy_encoding(dname);
3438econv_inspect(
VALUE self)
3445 return rb_sprintf(
"#<%s: uninitialized>", cname);
3451 econv_description(sname, dname,
ec->
flags,
str);
3458check_econv(
VALUE self)
3476econv_source_encoding(
VALUE self)
3491econv_destination_encoding(
VALUE self)
3522econv_convpath(
VALUE self)
3692 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3695 const unsigned char *ip, *is;
3696 unsigned char *op, *os;
3697 long output_byteoffset, output_bytesize;
3698 unsigned long output_byteend;
3703 if (
NIL_P(output_byteoffset_v))
3704 output_byteoffset = 0;
3706 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3708 if (
NIL_P(output_bytesize_v))
3709 output_bytesize = 0;
3711 output_bytesize =
NUM2LONG(output_bytesize_v);
3713 if (!
NIL_P(flags_v)) {
3719 else if (!
NIL_P(opt)) {
3738 if (
NIL_P(output_bytesize_v)) {
3746 if (
NIL_P(output_byteoffset_v))
3749 if (output_byteoffset < 0)
3755 if (output_bytesize < 0)
3758 output_byteend = (
unsigned long)output_byteoffset +
3759 (
unsigned long)output_bytesize;
3761 if (output_byteend < (
unsigned long)output_byteoffset ||
3776 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3777 os = op + output_bytesize;
3786 if (
LONG_MAX / 2 < output_bytesize)
3788 output_bytesize *= 2;
3789 output_byteoffset_v =
Qnil;
3797 return econv_result_to_symbol(res);
3835econv_convert(
VALUE self,
VALUE source_string)
3853 ret = econv_primitive_convert(ac, av,
self);
3855 if (
ret == sym_invalid_byte_sequence ||
3856 ret == sym_undefined_conversion ||
3857 ret == sym_incomplete_input) {
3862 if (
ret == sym_finished) {
3866 if (
ret != sym_source_buffer_empty) {
3867 rb_bug(
"unexpected result of econv_primitive_convert");
3885econv_finish(
VALUE self)
3901 ret = econv_primitive_convert(ac, av,
self);
3903 if (
ret == sym_invalid_byte_sequence ||
3904 ret == sym_undefined_conversion ||
3905 ret == sym_incomplete_input) {
3910 if (
ret != sym_finished) {
3911 rb_bug(
"unexpected result of econv_primitive_convert");
3993econv_primitive_errinfo(
VALUE self)
4053 const char *insert_enc;
4109 if (putbackable <
n)
4144econv_last_error(
VALUE self)
4149 exc = make_econv_exception(
ec);
4168econv_get_replacement(
VALUE self)
4174 ret = make_replacement(
ec);
4176 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4211 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4220 return make_econv_exception(
ec);
4228 exc = make_econv_exception(
ec);
4241ecerr_source_encoding_name(
VALUE self)
4267ecerr_source_encoding(
VALUE self)
4279ecerr_destination_encoding_name(
VALUE self)
4291ecerr_destination_encoding(
VALUE self)
4312ecerr_error_char(
VALUE self)
4333ecerr_error_bytes(
VALUE self)
4345ecerr_readagain_bytes(
VALUE self)
4375ecerr_incomplete_input(
VALUE self)
4416 sym_invalid_byte_sequence =
ID2SYM(
rb_intern(
"invalid_byte_sequence"));
4418 sym_destination_buffer_full =
ID2SYM(
rb_intern(
"destination_buffer_full"));
4428#ifdef ENABLE_ECONV_NEWLINE_OPTION
4552 rb_define_method(rb_eUndefinedConversionError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4553 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4554 rb_define_method(rb_eUndefinedConversionError,
"source_encoding", ecerr_source_encoding, 0);
4555 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding", ecerr_destination_encoding, 0);
4556 rb_define_method(rb_eUndefinedConversionError,
"error_char", ecerr_error_char, 0);
4558 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4559 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4560 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding", ecerr_source_encoding, 0);
4561 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding", ecerr_destination_encoding, 0);
4562 rb_define_method(rb_eInvalidByteSequenceError,
"error_bytes", ecerr_error_bytes, 0);
4563 rb_define_method(rb_eInvalidByteSequenceError,
"readagain_bytes", ecerr_readagain_bytes, 0);
4564 rb_define_method(rb_eInvalidByteSequenceError,
"incomplete_input?", ecerr_incomplete_input, 0);
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
int rb_enc_get_index(VALUE obj)
int rb_to_encoding_index(VALUE enc)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_encoding * rb_utf8_encoding(void)
rb_encoding * rb_enc_from_index(int index)
rb_encoding * rb_enc_get(VALUE obj)
rb_encoding * rb_enc_find(const char *name)
int rb_define_dummy_encoding(const char *name)
VALUE rb_enc_default_internal(void)
VALUE rb_obj_encoding(VALUE obj)
rb_encoding * rb_to_encoding(VALUE enc)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
VALUE rb_enc_associate_index(VALUE obj, int idx)
int rb_enc_find_index(const char *name)
#define ECONV_XML_ATTR_QUOTE_DECORATOR
#define ECONV_AFTER_OUTPUT
#define ENC_CODERANGE_7BIT
#define ENC_CODERANGE_VALID
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
#define ECONV_XML_ATTR_CONTENT_DECORATOR
#define ECONV_INVALID_MASK
#define ECONV_CRLF_NEWLINE_DECORATOR
@ econv_undefined_conversion
@ econv_source_buffer_empty
@ econv_destination_buffer_full
@ econv_invalid_byte_sequence
#define ECONV_UNDEF_REPLACE
int rb_enc_str_coderange(VALUE)
#define ECONV_XML_TEXT_DECORATOR
#define ECONV_CR_NEWLINE_DECORATOR
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
#define ECONV_INVALID_REPLACE
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define MBCLEN_CHARFOUND_LEN(ret)
#define rb_enc_asciicompat(enc)
struct rb_econv_t rb_econv_t
#define ECONV_PARTIAL_INPUT
#define ECONV_ERROR_HANDLER_MASK
#define ENC_CODERANGE_BROKEN
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
#define MBCLEN_CHARFOUND_P(ret)
#define ECONV_UNDEF_HEX_CHARREF
#define ECONV_NEWLINE_DECORATOR_MASK
#define ENC_CODERANGE_SET(obj, cr)
char str[HTML_ESCAPE_MAX_LEN+1]
VALUE rb_define_class_under(VALUE, const char *, VALUE)
Defines a class under the namespace of outer.
VALUE rb_cData
Data class.
void rb_raise(VALUE exc, const char *fmt,...)
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
void rb_bug(const char *fmt,...)
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
VALUE rb_exc_new_str(VALUE, VALUE)
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
VALUE rb_to_int(VALUE)
Converts val into Integer.
void st_free_table(st_table *tab)
void st_add_direct(st_table *tab, st_data_t key, st_data_t value)
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
st_table * st_init_strcasetable(void)
size_t rb_str_capacity(VALUE str)
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
const char * ascii_compat_name
const char * ascii_incompat_name
unsigned char * out_data_start
struct rb_transcoding * tc
unsigned char * out_buf_start
rb_econv_result_t last_result
unsigned char * out_buf_end
unsigned char * out_data_end
rb_encoding * destination_encoding
unsigned char * in_buf_start
const char * source_encoding_name
unsigned char * in_buf_end
struct rb_transcoding * error_tc
unsigned char * in_data_start
rb_encoding * source_encoding
const char * replacement_enc
const char * source_encoding
struct rb_econv_t::@230 last_error
int replacement_allocated
const char * destination_encoding
const unsigned char * replacement_str
struct rb_transcoding * last_tc
unsigned char * in_data_end
const unsigned char * error_bytes_start
const char * destination_encoding_name
const char * dst_encoding
const char * src_encoding
rb_transcoder_asciicompat_type_t asciicompat_type
union rb_transcoding::@228 readbuf
unsigned int output_index
const rb_transcoder * transcoder
union rb_transcoding::rb_transcoding_state_t state
union rb_transcoding::@229 writebuf
search_path_queue_t * queue
search_path_queue_t ** queue_last_ptr
struct search_path_queue_tag * next
transcoder_entry_t ** entries
const rb_transcoder * transcoder
#define TRANSCODING_WRITEBUF(tc)
rb_econv_t * rb_econv_open(const char *sname, const char *dname, int ecflags)
VALUE rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
#define TRANSCODING_STATE(tc)
int rb_econv_putbackable(rb_econv_t *ec)
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
#define SUSPEND_AFTER_OUTPUT(num)
#define SUSPEND_OBUF(num)
VALUE rb_cEncodingConverter
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
#define TRANSCODING_WRITEBUF_SIZE(tc)
size_t rb_econv_memsize(rb_econv_t *ec)
#define DECORATOR_P(sname, dname)
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
#define SUSPEND(ret, num)
void rb_econv_binmode(rb_econv_t *ec)
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
VALUE rb_econv_make_exception(rb_econv_t *ec)
void rb_econv_check_error(rb_econv_t *ec)
int rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
#define TRANSCODING_READBUF(tc)
void Init_transcode(void)
#define MAX_ECFLAGS_DECORATORS
int rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
struct rb_transcoding rb_transcoding
void InitVM_transcode(void)
void rb_econv_close(rb_econv_t *ec)
struct search_path_queue_tag search_path_queue_t
#define encoding_equal(enc1, enc2)
void rb_register_transcoder(const rb_transcoder *tr)
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
const char * rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
#define STR1_LENGTH(byte_addr)
#define STR1_BYTEINDEX(w)
double dummy_for_alignment
char ary[sizeof(double) > sizeof(void *) ? sizeof(double) :sizeof(void *)]
VALUE(* fallback_func)(VALUE obj, VALUE name)
MJIT_STATIC void rb_error_arity(int argc, int min, int max)