25#define BEG(no) (regs->beg[(no)])
26#define END(no) (regs->end[(no)])
36#if defined HAVE_CRYPT_R
37# if defined HAVE_CRYPT_H
40#elif !defined HAVE_CRYPT
42# define HAVE_CRYPT_R 1
46#undef rb_usascii_str_new
50#undef rb_tainted_str_new_cstr
51#undef rb_usascii_str_new_cstr
52#undef rb_utf8_str_new_cstr
53#undef rb_enc_str_new_cstr
54#undef rb_external_str_new_cstr
55#undef rb_locale_str_new_cstr
56#undef rb_str_dup_frozen
57#undef rb_str_buf_new_cstr
86#define RUBY_MAX_CHAR_LEN 16
87#define STR_SHARED_ROOT FL_USER5
88#define STR_BORROWED FL_USER6
89#define STR_TMPLOCK FL_USER7
90#define STR_NOFREE FL_USER18
91#define STR_FAKESTR FL_USER19
93#define STR_SET_NOEMBED(str) do {\
94 FL_SET((str), STR_NOEMBED);\
95 STR_SET_EMBED_LEN((str), 0);\
97#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
98#define STR_SET_EMBED_LEN(str, n) do { \
100 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
101 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
104#define STR_SET_LEN(str, n) do { \
105 if (STR_EMBED_P(str)) {\
106 STR_SET_EMBED_LEN((str), (n));\
109 RSTRING(str)->as.heap.len = (n);\
113#define STR_DEC_LEN(str) do {\
114 if (STR_EMBED_P(str)) {\
115 long n = RSTRING_LEN(str);\
117 STR_SET_EMBED_LEN((str), n);\
120 RSTRING(str)->as.heap.len--;\
124#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
125#define TERM_FILL(ptr, termlen) do {\
126 char *const term_fill_ptr = (ptr);\
127 const int term_fill_len = (termlen);\
128 *term_fill_ptr = '\0';\
129 if (UNLIKELY(term_fill_len > 1))\
130 memset(term_fill_ptr, 0, term_fill_len);\
133#define RESIZE_CAPA(str,capacity) do {\
134 const int termlen = TERM_LEN(str);\
135 RESIZE_CAPA_TERM(str,capacity,termlen);\
137#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
138 if (STR_EMBED_P(str)) {\
139 if (!STR_EMBEDDABLE_P(capacity, termlen)) {\
140 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
141 const long tlen = RSTRING_LEN(str);\
142 memcpy(tmp, RSTRING_PTR(str), tlen);\
143 RSTRING(str)->as.heap.ptr = tmp;\
144 RSTRING(str)->as.heap.len = tlen;\
145 STR_SET_NOEMBED(str);\
146 RSTRING(str)->as.heap.aux.capa = (capacity);\
150 assert(!FL_TEST((str), STR_SHARED)); \
151 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
152 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
153 RSTRING(str)->as.heap.aux.capa = (capacity);\
157#define STR_SET_SHARED(str, shared_str) do { \
158 if (!FL_TEST(str, STR_FAKESTR)) { \
159 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
160 FL_SET((str), STR_SHARED); \
161 FL_SET((shared_str), STR_SHARED_ROOT); \
162 if (RBASIC_CLASS((shared_str)) == 0) \
163 FL_SET_RAW((shared_str), STR_BORROWED); \
167#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
168#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
170#define STR_ENC_GET(str) get_encoding(str)
172#if !defined SHARABLE_MIDDLE_SUBSTRING
173# define SHARABLE_MIDDLE_SUBSTRING 0
175#if !SHARABLE_MIDDLE_SUBSTRING
176#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
178#define SHARABLE_SUBSTRING_P(beg, len, end) 1
181#define STR_EMBEDDABLE_P(len, termlen) \
182 ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen))
188static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
189static inline void str_modifiable(
VALUE str);
197 str_make_independent_expand((
str),
len, 0
L, termlen);
201static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
204get_actual_encoding(
const int encidx,
VALUE str)
206 const unsigned char *q;
212 if (q[0] == 0xFE && q[1] == 0xFF) {
215 if (q[0] == 0xFF && q[1] == 0xFE) {
222 if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
225 if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
265#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
296 str_make_independent(
str);
305 *
key = *value = *fstr =
str;
337 fstr = register_fstring(
str);
340 str_replace_shared_without_enc(
str, fstr);
367setup_fake_str(
struct RString *fake_str,
const char *
name,
long len,
int encidx)
378 return (
VALUE)fake_str;
426 const char *aptr, *bptr;
429 return (alen != blen ||
431 memcmp(aptr, bptr, alen) != 0);
454static inline const char *
455search_nonascii(
const char *p,
const char *e)
459#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
460# if SIZEOF_UINTPTR_T == 8
461# define NONASCII_MASK UINT64_C(0x8080808080808080)
462# elif SIZEOF_UINTPTR_T == 4
463# define NONASCII_MASK UINT32_C(0x80808080)
465# error "don't know what to do."
468# if SIZEOF_UINTPTR_T == 8
469# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
470# elif SIZEOF_UINTPTR_T == 4
471# define NONASCII_MASK 0x80808080UL
473# error "don't know what to do."
478#if !UNALIGNED_WORD_ACCESS
485 case 7:
if (p[-7]&0x80)
return p-7;
486 case 6:
if (p[-6]&0x80)
return p-6;
487 case 5:
if (p[-5]&0x80)
return p-5;
488 case 4:
if (p[-4]&0x80)
return p-4;
490 case 3:
if (p[-3]&0x80)
return p-3;
491 case 2:
if (p[-2]&0x80)
return p-2;
492 case 1:
if (p[-1]&0x80)
return p-1;
497#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
498#define aligned_ptr(value) \
499 __builtin_assume_aligned((value), sizeof(uintptr_t))
501#define aligned_ptr(value) (uintptr_t *)(value)
507 if (*s & NONASCII_MASK) {
508#ifdef WORDS_BIGENDIAN
509 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
511 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
521 case 7:
if (e[-7]&0x80)
return e-7;
522 case 6:
if (e[-6]&0x80)
return e-6;
523 case 5:
if (e[-5]&0x80)
return e-5;
524 case 4:
if (e[-4]&0x80)
return e-4;
526 case 3:
if (e[-3]&0x80)
return e-3;
527 case 2:
if (e[-2]&0x80)
return e-2;
528 case 1:
if (e[-1]&0x80)
return e-1;
536 const char *e = p +
len;
540 p = search_nonascii(p, e);
545 p = search_nonascii(p, e);
552 p = search_nonascii(p, e);
577 p = search_nonascii(p, e);
582 p = search_nonascii(p, e);
595 p = search_nonascii(p, e);
625 str_enc_copy(dest,
src);
652 str_enc_copy(dest,
src);
690str_mod_check(
VALUE s,
const char *p,
long len)
698str_capacity(
VALUE str,
const int termlen)
718must_not_null(
const char *
ptr)
736 return str_alloc(
klass);
898 rb_warning(
"rb_tainted_str_new is deprecated and will be removed in Ruby 3.2.");
905 rb_warning(
"rb_tainted_str_new_cstr is deprecated and will be removed in Ruby 3.2.");
909static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
911 int ecflags,
VALUE ecopts);
922 if (from == to)
return str;
934 from, to, ecflags, ecopts);
949 if (ofs < -olen || olen < ofs)
951 if (ofs < 0) ofs += olen;
958 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
973str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
975 int ecflags,
VALUE ecopts)
981 const unsigned char *start, *sp;
982 unsigned char *dest, *
dp;
983 size_t converted_output = (
size_t)ofs;
990 if (!ec)
return Qnil;
993 sp = (
unsigned char*)
ptr;
995 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
996 (
dp = dest + converted_output),
1000 size_t converted_input = sp - start;
1001 size_t rest =
len - converted_input;
1002 converted_output =
dp - dest;
1004 if (converted_input && converted_output &&
1005 rest < (
LONG_MAX / converted_output)) {
1006 rest = (rest * converted_output) / converted_input;
1011 olen += rest < 2 ? 2 : rest;
1053 if (!ienc || eenc == ienc) {
1149 char *ptr2 =
RSTRING(str2)->as.ary;
1167 rb_fatal(
"about to free a possible shared root");
1185 str_replace_shared_without_enc(str2,
str);
1186 rb_enc_cr_str_exact_copy(str2,
str);
1193 return str_replace_shared(str_alloc(
klass),
str);
1213 return str_new_frozen(0, orig);
1234 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1258 if ((ofs > 0) || (rest > 0) ||
1285 RBASIC(orig)->flags &= ~STR_NOFREE;
1292 rb_enc_cr_str_exact_copy(
str, orig);
1311#define STR_BUF_MIN_SIZE 63
1345 return str_new(0, 0,
len);
1387static inline void str_discard(
VALUE str);
1393 if (
str != str2) str_shared_replace(
str, str2);
1472 rb_enc_cr_str_exact_copy(
str, str2);
1475 str_replace_shared(
str, str2);
1485 const VALUE flag_mask =
1550 static ID keyword_ids[2];
1551 VALUE orig, opt, venc, vcapa;
1556 if (!keyword_ids[0]) {
1558 CONST_ID(keyword_ids[1],
"capacity");
1583 if (orig ==
str)
n = 0;
1585 str_modifiable(
str);
1587 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1595 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1608 rb_enc_cr_str_exact_copy(
str, orig);
1628#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1644count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1649 d = (d>>6) | (~d>>7);
1650 d &= NONASCII_MASK >> 7;
1653#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1655 return rb_popcount_intptr(d);
1659# if SIZEOF_VOIDP == 8
1668enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
1674 long diff = (
long)(e - p);
1680 if ((
int)
sizeof(
uintptr_t) * 2 < e - p) {
1685 while (p < (
const char *)s) {
1686 if (is_utf8_lead_byte(*p))
len++;
1690 len += count_utf8_lead_bytes_with_word(s);
1693 p = (
const char *)s;
1696 if (is_utf8_lead_byte(*p))
len++;
1707 q = search_nonascii(p, e);
1720 q = search_nonascii(p, e);
1733 for (c=0; p<e; c++) {
1757 long diff = (
long)(e - p);
1764 q = search_nonascii(p, e);
1787 for (c=0; p<e; c++) {
1824 return enc_strlen(p, e, enc, cr);
1898 char *ptr1, *ptr2, *ptr3;
1910 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
1912 memcpy(ptr3, ptr1, len1);
1913 memcpy(ptr3+len1, ptr2, len2);
1939 else if (enc2 < 0) {
1942 else if (enc1 != enc2) {
2007 while (
n <=
len/2) {
2015 rb_enc_cr_str_copy_for_substr(str2,
str);
2057 rb_check_lockedtmp(
str);
2075 str_modifiable(
str);
2076 return !str_dependent_p(
str);
2080str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2116 if (!str_independent(
str))
2117 str_make_independent(
str);
2134 if (!str_independent(
str)) {
2135 str_make_independent_expand(
str,
len, expand, termlen);
2137 else if (expand > 0) {
2147 if (!str_independent(
str))
2148 str_make_independent(
str);
2157 str_modifiable(
str);
2193zero_filled(
const char *s,
int n)
2195 for (;
n > 0; --
n) {
2202str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2204 const char *e = s +
len;
2207 if (zero_filled(s, minlen))
return s;
2213str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2218 if (str_dependent_p(
str)) {
2219 if (!zero_filled(s +
len, termlen))
2220 str_make_independent_expand(
str,
len, 0
L, termlen);
2232 long capa = str_capacity(
str, oldtermlen) + oldtermlen;
2237 rb_check_lockedtmp(
str);
2238 str_make_independent_expand(
str,
len, 0
L, termlen);
2240 else if (str_dependent_p(
str)) {
2241 if (termlen > oldtermlen)
2242 str_make_independent_expand(
str,
len, 0
L, termlen);
2250 if (termlen > oldtermlen) {
2268 if (str_null_char(s,
len, minlen, enc)) {
2271 return str_fill_term(
str, s,
len, minlen);
2278 s = str_fill_term(
str, s,
len, minlen);
2287 return str_null_check(
str, &w);
2295 char *s = str_null_check(
str, &w);
2310 return str_fill_term(
str, s,
len, newminlen);
2338str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2348 const char *p2, *e2;
2351 while (p < e && 0 < nth) {
2358 p2 = search_nonascii(p, e2);
2378 while (p < e && nth--) {
2390 return str_nth_len(p, e, &nth, enc);
2394str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2399 p = str_nth_len(p, e, &nth, enc);
2408str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2410 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2411 if (!pp)
return e - p;
2424str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2432 while (p < (
const char *)s) {
2433 if (is_utf8_lead_byte(*p)) nth--;
2437 nth -= count_utf8_lead_bytes_with_word(s);
2443 if (is_utf8_lead_byte(*p)) {
2444 if (nth == 0)
break;
2454str_utf8_offset(
const char *p,
const char *e,
long nth)
2456 const char *pp = str_utf8_nth(p, e, &nth);
2465 if (single_byte_optimizable(
str) || pos < 0)
2482 RSTRING(str2)->as.heap.ptr += beg;
2483 olen =
RSTRING(str2)->as.heap.len;
2491 rb_enc_cr_str_copy_for_substr(str2,
str);
2505 if (
len < 0)
return 0;
2509 if (single_byte_optimizable(
str)) {
2510 if (beg > blen)
return 0;
2513 if (beg < 0)
return 0;
2515 if (
len > blen - beg)
2517 if (
len < 0)
return 0;
2522 if (
len > -beg)
len = -beg;
2534 slen = str_strlen(
str, enc);
2536 if (beg < 0)
return 0;
2538 if (
len == 0)
goto end;
2545 if (beg > str_strlen(
str, enc))
return 0;
2551 p = str_utf8_nth(s, e, &beg);
2552 if (beg > 0)
return 0;
2553 len = str_utf8_offset(p, e,
len);
2559 p = s + beg * char_sz;
2563 else if (
len * char_sz > e - p)
2568 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2569 if (beg > 0)
return 0;
2573 len = str_offset(p, e,
len, enc, 0);
2590str_substr(
VALUE str,
long beg,
long len,
int empty)
2595 if (!p)
return Qnil;
2601 RSTRING(str2)->as.heap.ptr += ofs;
2606 if (!
len && !empty)
return Qnil;
2610 rb_enc_cr_str_copy_for_substr(str2,
str);
2662#define rb_str_dup_frozen rb_str_new_frozen
2697 str_modifiable(
str);
2701 if (
len > (
capa = (
long)str_capacity(
str, termlen)) ||
len < 0) {
2718 independent = str_independent(
str);
2726 if (
len == slen)
return str;
2732 str_make_independent_expand(
str, slen,
len - slen, termlen);
2737 if (slen >
len) slen =
len;
2744 else if (!independent) {
2745 if (
len == slen)
return str;
2746 str_make_independent_expand(
str, slen,
len - slen, termlen);
2754 else if (
len == slen)
return str;
2764 long capa, total, olen, off = -1;
2770 if (
ptr >= sptr &&
ptr <= sptr + olen) {
2774 if (
len == 0)
return 0;
2793 while (total >
capa) {
2809#define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
2814 if (
len == 0)
return str;
2834 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
2843 if (str_encindex == ptr_encindex) {
2862 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
2871 *ptr_cr_ret = ptr_cr;
2873 if (str_encindex != ptr_encindex &&
2884 res_encindex = str_encindex;
2889 res_encindex = str_encindex;
2893 res_encindex = ptr_encindex;
2898 res_encindex = str_encindex;
2905 res_encindex = str_encindex;
2921 return rb_enc_cr_str_buf_cat(
str,
ptr,
len,
2938 unsigned int c = (
unsigned char)*
ptr;
2971#define MIN_PRE_ALLOC_SIZE 48
2994 for (
i = s;
i < num; ++
i) {
3031 str_modifiable(
str);
3036 else if (
argc > 1) {
3144 str_modifiable(
str);
3149 else if (
argc > 1) {
3176 const char *ptr1, *ptr2;
3179 return (len1 != len2 ||
3181 memcmp(ptr1, ptr2, len1) != 0);
3200#define lesser(a,b) (((a)>(b))?(b):(a))
3212 if (idx1 == idx2)
return TRUE;
3231 const char *ptr1, *ptr2;
3234 if (str1 == str2)
return 0;
3237 if (ptr1 == ptr2 || (retval =
memcmp(ptr1, ptr2,
lesser(len1, len2))) == 0) {
3246 if (len1 > len2)
return 1;
3249 if (retval > 0)
return 1;
3269 if (str1 == str2)
return Qtrue;
3276 return rb_str_eql_internal(str1, str2);
3289 if (str1 == str2)
return Qtrue;
3291 return rb_str_eql_internal(str1, str2);
3361 return str_casecmp(str1, s);
3369 char *p1, *p1end, *p2, *p2end;
3378 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3379 while (p1 < p1end && p2 < p2end) {
3381 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3382 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3384 return INT2FIX(c1 < c2 ? -1 : 1);
3391 while (p1 < p1end && p2 < p2end) {
3395 if (0 <= c1 && 0 <= c2) {
3399 return INT2FIX(c1 < c2 ? -1 : 1);
3405 len = l1 < l2 ? l1 : l2;
3408 return INT2FIX(r < 0 ? -1 : 1);
3410 return INT2FIX(l1 < l2 ? -1 : 1);
3448 return str_casecmp_p(str1, s);
3455 VALUE folded_str1, folded_str2;
3456 VALUE fold_opt = sym_fold;
3463 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3464 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3470strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3471 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3473 const char *search_start = str_ptr;
3474 long pos, search_len = str_len - offset;
3478 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3479 if (pos < 0)
return pos;
3481 if (t == search_start + pos)
break;
3482 search_len -= t - search_start;
3483 if (search_len <= 0)
return -1;
3484 offset += t - search_start;
3487 return pos + offset;
3490#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3495 const char *str_ptr, *str_ptr_end, *sub_ptr;
3496 long str_len, sub_len;
3497 int single_byte = single_byte_optimizable(
str);
3509 if (str_len < sub_len)
return -1;
3512 long str_len_char, sub_len_char;
3513 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(
str, enc);
3514 sub_len_char = in_byte ? sub_len : str_strlen(
sub, enc);
3516 offset += str_len_char;
3517 if (offset < 0)
return -1;
3519 if (str_len_char - offset < sub_len_char)
return -1;
3520 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3523 if (sub_len == 0)
return offset;
3526 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3573 if (pos > str_strlen(
str,
NULL))
3600 if (pos == -1)
return Qnil;
3608 char *hit, *adjusted;
3610 long slen, searchlen;
3614 if (slen == 0)
return pos;
3619 searchlen = s - sbeg + 1;
3622 hit =
memrchr(sbeg, c, searchlen);
3625 if (hit != adjusted) {
3626 searchlen = adjusted - sbeg;
3629 if (
memcmp(hit, t, slen) == 0)
3631 searchlen = adjusted - sbeg;
3632 }
while (searchlen > 0);
3649 if (
memcmp(s, t, slen) == 0) {
3652 if (pos == 0)
break;
3671 singlebyte = single_byte_optimizable(
str);
3673 slen = str_strlen(
sub, enc);
3676 if (
len < slen)
return -1;
3677 if (
len - pos < slen) pos =
len - slen;
3678 if (
len == 0)
return pos;
3690 return str_rindex(
str,
sub, s, pos, enc);
3718 long pos,
len = str_strlen(
str, enc);
3731 if (pos >
len) pos =
len;
3742 enc, single_byte_optimizable(
str));
3746 if (pos >= 0)
return LONG2NUM(pos);
3762 pos = rb_str_rindex(
str,
sub, pos);
3763 if (pos >= 0)
return LONG2NUM(pos);
3875 re = get_pat(
argv[0]);
3909 for (
i =
len-1; 0 <=
i && (
unsigned char)p[
i] == 0xff;
i--)
3913 ++((
unsigned char*)p)[
i];
3927 for (len2 =
len-1; 0 < len2; len2--) {
3962 for (
i =
len-1; 0 <=
i && (
unsigned char)p[
i] == 0;
i--)
3966 --((
unsigned char*)p)[
i];
3980 for (len2 =
len-1; 0 < len2; len2--) {
4000enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4010 const int max_gaps = 1;
4021 for (
try = 0;
try <= max_gaps; ++
try) {
4022 ret = enc_succ_char(p,
len, enc);
4033 ret = enc_pred_char(p,
len, enc);
4057 enc_succ_char(carry,
len, enc);
4094 rb_enc_cr_str_copy_for_substr(
str, orig);
4095 return str_succ(
str);
4102 char *sbeg, *s, *e, *last_alnum = 0;
4103 int found_alnum = 0;
4106 long carry_pos = 0, carry_len = 1;
4110 if (slen == 0)
return str;
4114 s = e = sbeg + slen;
4126 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4137 carry_pos = s - sbeg;
4149 neighbor = enc_succ_char(tmp, l, enc);
4163 enc_succ_char(s, l, enc);
4166 MEMCPY(carry, s,
char, l);
4169 carry_pos = s - sbeg;
4175 s = sbeg + carry_pos;
4176 memmove(s + carry_len, s, slen - carry_pos);
4203all_digits_p(
const char *s,
long len)
4255 VALUE end, exclusive;
4265 VALUE current, after_end;
4279 if (c > e || (excl && c == e))
return beg;
4282 if (!excl && c == e)
break;
4284 if (excl && c == e)
break;
4304 if (excl && bi == ei)
break;
4310 ID op = excl ?
'<' :
idLE;
4324 if (
n > 0 || (excl &&
n == 0))
return beg;
4332 if ((*each)(current,
arg))
break;
4333 if (
NIL_P(next))
break;
4378 if ((*each)(current,
arg))
break;
4421 if (b <=
v &&
v < e)
return Qtrue;
4482 return str_substr(
str, idx, 1,
FALSE);
4569 return rb_str_aref(
str,
argv[0]);
4578 str_modifiable(
str);
4579 if (
len > olen)
len = olen;
4607 if (beg == 0 && vlen == 0) {
4612 str_modify_keep_cr(
str);
4628 slen - (beg +
len));
4630 if (vlen < beg &&
len < 0) {
4648 int singlebyte = single_byte_optimizable(
str);
4655 slen = str_strlen(
str, enc);
4662 if (beg + slen < 0) {
4669 if (
len > slen - beg) {
4672 str_modify_keep_cr(
str);
4680 rb_str_splice_0(
str, beg,
len, val);
4687#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
4694 long start, end,
len;
4723 rb_str_splice_0(
str, start,
len, val);
4742 rb_str_subpat_set(
str, indx,
INT2FIX(0), val);
4872 str_modify_keep_cr(
str);
4874 if (!
NIL_P(result)) {
4907get_pat_quoted(
VALUE pat,
int check)
4934rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
4937 pos = rb_strseq_index(
str, pat, pos, 1);
4938 if (set_backref_str) {
4987 pat = get_pat_quoted(
argv[0], 1);
4989 str_modifiable(
str);
4990 beg = rb_pat_search(pat,
str, 0, 1);
5013 if (iter || !
NIL_P(hash)) {
5023 str_mod_check(
str, p,
len);
5060 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
5144 long beg, beg0, end0;
5145 long offset, blen, slen,
len,
last;
5146 enum {STR, ITER, MAP} mode = STR;
5148 int need_backref = -1;
5170 pat = get_pat_quoted(
argv[0], 1);
5171 beg = rb_pat_search(pat,
str, 0, need_backref);
5173 if (bang)
return Qnil;
5209 str_mod_check(
str, sp, slen);
5214 else if (need_backref) {
5216 if (need_backref < 0) {
5217 need_backref = val != repl;
5224 len = beg0 - offset;
5241 offset = end0 +
len;
5245 beg = rb_pat_search(pat,
str, offset, need_backref);
5250 rb_pat_search(pat,
str,
last, 1);
5252 str_shared_replace(
str, dest);
5279 str_modify_keep_cr(
str);
5365 str_modifiable(
str);
5366 if (
str == str2)
return str;
5370 return str_replace(
str, str2);
5443 char *head, *left = 0;
5448 if (pos < -
len ||
len <= pos)
5455 unsigned char byte =
NUM2INT(w) & 0xFF;
5457 if (!str_independent(
str))
5458 str_make_independent(
str);
5461 ptr = (
unsigned char *)&head[pos];
5495str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
5501 if (beg >
n ||
len < 0)
return Qnil;
5504 if (beg < 0)
return Qnil;
5509 if (!empty)
return Qnil;
5519 RSTRING(str2)->as.heap.ptr += beg;
5526 str_enc_copy(str2,
str);
5570 return str_byte_substr(
str, idx, 1,
FALSE);
5602 return str_byte_substr(
str, beg, end,
TRUE);
5605 return str_byte_aref(
str,
argv[0]);
5633 if (single_byte_optimizable(
str)) {
5661 str_enc_copy(rev,
str);
5679 if (single_byte_optimizable(
str)) {
5682 str_modify_keep_cr(
str);
5692 str_shared_replace(
str, rb_str_reverse(
str));
5696 str_modify_keep_cr(
str);
5812#define CHAR_ESC_LEN 13
5827 else if (c < 0x10000) {
5851 case '\0':
return "\\0";
5852 case '\n':
return "\\n";
5853 case '\r':
return "\\r";
5854 case '\t':
return "\\t";
5855 case '\f':
return "\\f";
5856 case '\013':
return "\\v";
5857 case '\010':
return "\\b";
5858 case '\007':
return "\\a";
5859 case '\033':
return "\\e";
5860 case '\x7f':
return "\\c?";
5872 const char *prev = p;
5883 if (p > prev) str_buf_cat(result, prev, p - prev);
5886 n = (
int)(pend - p);
5899 if (p -
n > prev) str_buf_cat(result, prev, p -
n - prev);
5906 if (p -
n > prev) str_buf_cat(result, prev, p -
n - prev);
5911 if (p > prev) str_buf_cat(result, prev, p - prev);
5934 const char *p, *pend, *prev;
5948 actenc = get_actual_encoding(encidx,
str);
5949 if (actenc != enc) {
5959 if (p > prev) str_buf_cat(result, prev, p - prev);
5962 n = (
int)(pend - p);
5973 if ((asciicompat || unicode_p) &&
5974 (c ==
'"'|| c ==
'\\' ||
5979 (
cc ==
'$' ||
cc ==
'@' ||
cc ==
'{'))))) {
5980 if (p -
n > prev) str_buf_cat(result, prev, p -
n - prev);
5982 if (asciicompat || enc == resenc) {
5988 case '\n':
cc =
'n';
break;
5989 case '\r':
cc =
'r';
break;
5990 case '\t':
cc =
't';
break;
5991 case '\f':
cc =
'f';
break;
5992 case '\013':
cc =
'v';
break;
5993 case '\010':
cc =
'b';
break;
5994 case '\007':
cc =
'a';
break;
5995 case 033:
cc =
'e';
break;
5996 default:
cc = 0;
break;
5999 if (p -
n > prev) str_buf_cat(result, prev, p -
n - prev);
6002 str_buf_cat(result,
buf, 2);
6011 if (p -
n > prev) str_buf_cat(result, prev, p -
n - prev);
6017 if (p > prev) str_buf_cat(result, prev, p - prev);
6023#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6047 const char *p, *pend;
6051 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6062 unsigned char c = *p++;
6065 case '"':
case '\\':
6066 case '\n':
case '\r':
6067 case '\t':
case '\f':
6068 case '\013':
case '\010':
case '\007':
case '\033':
6081 if (
u8 && c > 0x7F) {
6087 else if (
cc <= 0xFFFFF)
6112 unsigned char c = *p++;
6114 if (c ==
'"' || c ==
'\\') {
6118 else if (c ==
'#') {
6119 if (
IS_EVSTR(p, pend)) *q++ =
'\\';
6122 else if (c ==
'\n') {
6126 else if (c ==
'\r') {
6130 else if (c ==
'\t') {
6134 else if (c ==
'\f') {
6138 else if (c ==
'\013') {
6142 else if (c ==
'\010') {
6146 else if (c ==
'\007') {
6150 else if (c ==
'\033') {
6189unescape_ascii(
unsigned int c)
6214undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6216 const char *s = *ss;
6220 unsigned char buf[6];
6238 *
buf = unescape_ascii(*s);
6251 if (*penc != enc_utf8) {
6270 if (hexlen == 0 || hexlen > 6) {
6276 if (0xd800 <= c && c <= 0xdfff) {
6289 if (0xd800 <= c && c <= 0xdfff) {
6340 bool binary =
false;
6344 if (rb_str_is_ascii_only_p(
str) ==
Qfalse) {
6347 if (!str_null_check(
str, &w)) {
6351 if (*s !=
'"')
goto invalid_format;
6369 static const char force_encoding_suffix[] =
".force_encoding(\"";
6370 static const char dup_suffix[] =
".dup";
6371 const char *encname;
6376 size =
sizeof(dup_suffix) - 1;
6379 size =
sizeof(force_encoding_suffix) - 1;
6380 if (s_end - s <=
size)
goto invalid_format;
6381 if (
memcmp(s, force_encoding_suffix,
size) != 0)
goto invalid_format;
6389 s =
memchr(s,
'"', s_end-s);
6391 if (!s)
goto invalid_format;
6392 if (s_end - s != 2)
goto invalid_format;
6393 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
6409 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
6418 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
6434 rb_str_check_dummy_enc(enc);
6445 if (
argv[0]==sym_turkic) {
6448 if (
argv[1]==sym_lithuanian)
6454 else if (
argv[0]==sym_lithuanian) {
6457 if (
argv[1]==sym_turkic)
6465 else if (
argv[0]==sym_ascii)
6467 else if (
argv[0]==sym_fold) {
6487#define CASE_MAPPING_ADDITIONAL_LENGTH 20
6488#ifndef CASEMAP_DEBUG
6489# define CASEMAP_DEBUG 0
6501mapping_buffer_free(
void *p)
6505 while (current_buffer) {
6506 previous_buffer = current_buffer;
6507 current_buffer = current_buffer->
next;
6514 {0, mapping_buffer_free,}
6522 const OnigUChar *source_current, *source_end;
6523 int target_length = 0;
6524 VALUE buffer_anchor;
6527 size_t buffer_count = 0;
6528 int buffer_length_or_invalid;
6537 while (source_current < source_end) {
6544 *pre_buffer = current_buffer;
6545 pre_buffer = ¤t_buffer->
next;
6548 buffer_length_or_invalid = enc->
case_map(flags,
6549 (
const OnigUChar**)&source_current, source_end,
6550 current_buffer->
space,
6551 current_buffer->
space+current_buffer->
capa,
6553 if (buffer_length_or_invalid < 0) {
6554 current_buffer =
DATA_PTR(buffer_anchor);
6556 mapping_buffer_free(current_buffer);
6559 target_length += current_buffer->
used = buffer_length_or_invalid;
6565 if (buffer_count==1) {
6569 char *target_current;
6573 current_buffer =
DATA_PTR(buffer_anchor);
6574 while (current_buffer) {
6576 target_current += current_buffer->
used;
6577 current_buffer = current_buffer->
next;
6580 current_buffer =
DATA_PTR(buffer_anchor);
6582 mapping_buffer_free(current_buffer);
6585 str_enc_copy(target, source);
6594 const OnigUChar *source_current, *source_end;
6597 int length_or_invalid;
6599 if (old_length == 0)
return Qnil;
6603 if (source == target) {
6604 target_current = (
OnigUChar*)source_current;
6613 &source_current, source_end,
6614 target_current, target_end, enc);
6615 if (length_or_invalid < 0)
6619 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
6621 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
6624 str_enc_copy(target, source);
6633 bool modified =
false;
6636 unsigned int c = *(
unsigned char*)s;
6639 *s =
'A' + (c -
'a');
6664 flags = check_case_options(
argc,
argv, flags);
6665 str_modify_keep_cr(
str);
6666 enc = str_true_enc(
str);
6667 if (case_option_single_p(flags, enc,
str)) {
6668 if (upcase_single(
str))
6672 rb_str_ascii_casemap(
str,
str, &flags, enc);
6674 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
6701 flags = check_case_options(
argc,
argv, flags);
6702 enc = str_true_enc(
str);
6703 if (case_option_single_p(flags, enc,
str)) {
6705 str_enc_copy(ret,
str);
6710 rb_str_ascii_casemap(
str, ret, &flags, enc);
6713 ret = rb_str_casemap(
str, &flags, enc);
6723 bool modified =
false;
6726 unsigned int c = *(
unsigned char*)s;
6729 *s =
'a' + (c -
'A');
6755 flags = check_case_options(
argc,
argv, flags);
6756 str_modify_keep_cr(
str);
6757 enc = str_true_enc(
str);
6758 if (case_option_single_p(flags, enc,
str)) {
6759 if (downcase_single(
str))
6763 rb_str_ascii_casemap(
str,
str, &flags, enc);
6765 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
6829 flags = check_case_options(
argc,
argv, flags);
6830 enc = str_true_enc(
str);
6831 if (case_option_single_p(flags, enc,
str)) {
6833 str_enc_copy(ret,
str);
6834 downcase_single(ret);
6838 rb_str_ascii_casemap(
str, ret, &flags, enc);
6841 ret = rb_str_casemap(
str, &flags, enc);
6872 flags = check_case_options(
argc,
argv, flags);
6873 str_modify_keep_cr(
str);
6874 enc = str_true_enc(
str);
6877 rb_str_ascii_casemap(
str,
str, &flags, enc);
6879 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
6908 flags = check_case_options(
argc,
argv, flags);
6909 enc = str_true_enc(
str);
6913 rb_str_ascii_casemap(
str, ret, &flags, enc);
6916 ret = rb_str_casemap(
str, &flags, enc);
6940 flags = check_case_options(
argc,
argv, flags);
6941 str_modify_keep_cr(
str);
6942 enc = str_true_enc(
str);
6944 rb_str_ascii_casemap(
str,
str, &flags, enc);
6946 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
6974 flags = check_case_options(
argc,
argv, flags);
6975 enc = str_true_enc(
str);
6979 rb_str_ascii_casemap(
str, ret, &flags, enc);
6982 ret = rb_str_casemap(
str, &flags, enc);
7003 if (t->
p == t->
pend)
return -1;
7011 if (t->
p < t->
pend) {
7015 if (t->
now < 0x80 && c < 0x80) {
7017 "invalid range \"%c-%c\" in string transliteration",
7054 const unsigned int errc = -1;
7055 unsigned int trans[256];
7057 struct tr trsrc, trrepl;
7059 unsigned int c, c0,
last = 0;
7060 int modify = 0,
i, l;
7061 unsigned char *s, *send;
7063 int singlebyte = single_byte_optimizable(
str);
7067#define CHECK_IF_ASCII(c) \
7068 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7069 (cr = ENC_CODERANGE_VALID) : 0)
7075 return rb_str_delete_bang(1, &
src,
str);
7090 trsrc.
p + l < trsrc.
pend) {
7096 trsrc.
gen = trrepl.
gen = 0;
7097 trsrc.
now = trrepl.
now = 0;
7098 trsrc.
max = trrepl.
max = 0;
7101 for (
i=0;
i<256;
i++) {
7104 while ((c = trnext(&trsrc, enc)) != errc) {
7113 while ((c = trnext(&trrepl, enc)) != errc)
7116 for (
i=0;
i<256;
i++) {
7117 if (trans[
i] != errc) {
7125 for (
i=0;
i<256;
i++) {
7128 while ((c = trnext(&trsrc, enc)) != errc) {
7129 r = trnext(&trrepl, enc);
7130 if (r == errc) r = trrepl.
now;
7144 str_modify_keep_cr(
str);
7150 unsigned int save = -1;
7151 unsigned char *
buf =
ALLOC_N(
unsigned char, max + termlen), *t =
buf;
7166 if (cflag) c =
last;
7169 else if (cflag) c = errc;
7175 if (c != (
unsigned int)-1) {
7187 if (enc != e1) may_modify = 1;
7189 if ((offset = t -
buf) + tlen > max) {
7191 max = offset + tlen + (send - s);
7196 if (may_modify &&
memcmp(s, t, tlen) != 0) {
7213 c = (
unsigned char)*s;
7214 if (trans[c] != errc) {
7231 long offset, max = (
long)((send - s) * 1.2);
7232 unsigned char *
buf =
ALLOC_N(
unsigned char, max + termlen), *t =
buf;
7245 if (cflag) c =
last;
7248 else if (cflag) c = errc;
7252 c = cflag ?
last : errc;
7260 if (enc != e1) may_modify = 1;
7262 if ((offset = t -
buf) + tlen > max) {
7264 max = offset + tlen + (
long)((send - s) * 1.2);
7270 if (may_modify &&
memcmp(s, t, tlen) != 0) {
7310 return tr_trans(
str,
src, repl, 0);
7353 tr_trans(
str,
src, repl, 0);
7357#define TR_TABLE_SIZE 257
7362 const unsigned int errc = -1;
7366 VALUE table = 0, ptable = 0;
7367 int i, l, cflag = 0;
7377 for (
i=0;
i<256;
i++) {
7380 stable[256] = cflag;
7382 else if (stable[256] && !cflag) {
7385 for (
i=0;
i<256;
i++) {
7389 while ((c = trnext(&
tr, enc)) != errc) {
7391 buf[c & 0xff] = !cflag;
7396 if (!table && (first || *tablep || stable[256])) {
7413 for (
i=0;
i<256;
i++) {
7414 stable[
i] = stable[
i] &&
buf[
i];
7416 if (!table && !cflag) {
7426 return table[c] != 0;
7458 VALUE del = 0, nodel = 0;
7460 int i, ascompat, cr;
7469 tr_setup_table(s, squeez,
i==0, &del, &nodel, enc);
7472 str_modify_keep_cr(
str);
7481 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
7494 if (tr_find(c, squeez, del, nodel)) {
7509 if (modify)
return str;
7550 VALUE del = 0, nodel = 0;
7551 unsigned char *s, *send, *t;
7553 int ascompat, singlebyte = single_byte_optimizable(
str);
7565 if (singlebyte && !single_byte_optimizable(s))
7567 tr_setup_table(s, squeez,
i==0, &del, &nodel, enc);
7571 str_modify_keep_cr(
str);
7580 unsigned int c = *s++;
7581 if (c != save || (
argc > 0 && !squeez[c])) {
7591 if (ascompat && (c = *s) < 0x80) {
7592 if (c != save || (
argc > 0 && !squeez[c])) {
7600 if (c != save || (
argc > 0 && !tr_find(c, squeez, del, nodel))) {
7616 if (modify)
return str;
7656 return tr_trans(
str,
src, repl, 1);
7677 tr_trans(
str,
src, repl, 1);
7714 VALUE del = 0, nodel = 0, tstr;
7738 if (*(
unsigned char*)s++ == c)
n++;
7744 tr_setup_table(tstr, table,
TRUE, &del, &nodel, enc);
7749 tr_setup_table(tstr, table,
FALSE, &del, &nodel, enc);
7760 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
7769 if (tr_find(c, table, del, nodel)) {
7780rb_fs_check(
VALUE val)
7784 if (
NIL_P(val))
return 0;
7789static const char isspacetable[256] = {
7790 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
7791 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7792 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7793 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7794 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7795 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7796 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7797 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7798 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7799 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7800 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7801 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7802 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7803 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7804 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7805 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
7808#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
7813 if (empty_count >= 0 &&
len == 0) {
7814 return empty_count + 1;
7816 if (empty_count > 0) {
7821 }
while (--empty_count > 0);
7826 }
while (--empty_count > 0);
7900 enum {awk, string, regexp, chars} split_type;
7901 long beg, end,
i = 0, empty_count = -1;
7908 if (lim <= 0) limit =
Qnil;
7909 else if (lim == 1) {
7921 if (
NIL_P(limit) && !lim) empty_count = 0;
7924 split_type = regexp;
7926 spat = get_pat_quoted(spat, 0);
7931 else if (!(spat = rb_fs_check(spat))) {
7935 rb_warn(
"$; is set to non-nil value");
7937 if (split_type != awk) {
7941 mustnot_broken(spat);
7942 split_type = string;
7962#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
7968 if (split_type == awk) {
7975 while (
ptr < eptr) {
7984 if (!
NIL_P(limit) && lim <=
i)
break;
7999 while (
ptr < eptr) {
8005 if (rb_isspace(c)) {
8011 if (!
NIL_P(limit) && lim <=
i)
break;
8014 else if (rb_isspace(c)) {
8026 else if (split_type ==
string) {
8027 char *str_start =
ptr;
8028 char *substr_start =
ptr;
8032 mustnot_broken(
str);
8034 while (
ptr < eptr &&
8038 if (t !=
ptr + end) {
8042 SPLIT_STR(substr_start - str_start, (
ptr+end) - substr_start);
8045 if (!
NIL_P(limit) && lim <= ++
i)
break;
8047 beg =
ptr - str_start;
8049 else if (split_type == chars) {
8050 char *str_start =
ptr;
8053 mustnot_broken(
str);
8055 while (
ptr < eptr &&
8059 if (!
NIL_P(limit) && lim <= ++
i)
break;
8061 beg =
ptr - str_start;
8076 if (start == end &&
BEG(0) ==
END(0)) {
8081 else if (last_null == 1) {
8096 beg = start =
END(0);
8100 for (idx=1; idx < regs->
num_regs; idx++) {
8101 if (
BEG(idx) == -1)
continue;
8104 if (!
NIL_P(limit) && lim <= ++
i)
break;
8112 return result ? result :
str;
8122 return rb_str_split_m(1, &sep,
str);
8125#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8140#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8143chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8160 const char *
ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8161 long pos,
len, rslen;
8167 static ID keywords[1];
8200 const char *eol =
NULL;
8202 while (subend < pend) {
8208 if (eol == subend)
break;
8210 if (subptr) eol = subend;
8213 if (!subptr) subptr = subend;
8217 }
while (subend < pend);
8220 subend - subptr + (chomp ? 0 : rslen));
8224 subptr = eol =
NULL;
8243 while (subptr < pend) {
8244 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
8248 if (hit != adjusted) {
8252 subend = hit += rslen;
8255 subend = chomp_newline(subptr, subend, enc);
8268 if (subptr != pend) {
8271 pend = chomp_newline(subptr, pend, enc);
8273 else if (pend - subptr >= rslen &&
8274 memcmp(pend - rslen, rsptr, rslen) == 0) {
8341 return rb_str_enumerate_lines(
argc,
argv,
str, 0);
8409 return rb_str_enumerate_bytes(
str, 0);
8427 return rb_str_enumerate_bytes(
str,
ary);
8450 for (
i = 0;
i <
len;
i +=
n) {
8456 for (
i = 0;
i <
len;
i +=
n) {
8487 return rb_str_enumerate_chars(
str, 0);
8505 return rb_str_enumerate_chars(
str,
ary);
8514 const char *
ptr, *end;
8517 if (single_byte_optimizable(
str))
8518 return rb_str_enumerate_bytes(
str,
ary);
8561 return rb_str_enumerate_codepoints(
str, 0);
8580 return rb_str_enumerate_codepoints(
str,
ary);
8592 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
8594 if (!reg_grapheme_cluster) {
8598 size_t source_len =
sizeof(source_ascii) - 1;
8600#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
8601#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
8602#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
8603#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
8604#define CASE_UTF(e) \
8605 case ENCINDEX_UTF_##e: { \
8606 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
8607 source = source_UTF_##e; \
8608 source_len = sizeof(source_UTF_##e); \
8618 int r =
onig_new(®_grapheme_cluster, source, source + source_len,
8623 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
8626 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
8629 return reg_grapheme_cluster;
8635 size_t grapheme_cluster_count = 0;
8638 const char *
ptr, *end;
8644 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
8652 if (
len <= 0)
break;
8653 grapheme_cluster_count++;
8657 return SIZET2NUM(grapheme_cluster_count);
8666 const char *ptr0, *
ptr, *end;
8669 return rb_str_enumerate_chars(
str,
ary);
8673 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
8681 if (
len <= 0)
break;
8708rb_str_each_grapheme_cluster(
VALUE str)
8711 return rb_str_enumerate_grapheme_clusters(
str, 0);
8729 return rb_str_enumerate_grapheme_clusters(
str,
ary);
8736 const char *p, *p2, *beg, *end;
8740 if (beg >= end)
return 0;
8762 str_modify_keep_cr(
str);
8765 len = chopped_length(
str);
8807 char *pp, *e, *rsptr;
8812 if (
len == 0)
return 0;
8833 if (--e > p && *(e-1) ==
'\r') {
8863 while (e > p && *(e-1) ==
'\n') {
8865 if (e > p && *(e-1) ==
'\r')
8871 if (rslen >
len)
return len;
8874 newline = rsptr[rslen-1];
8877 if (newline ==
'\n')
8891 if (p[
len-1] == newline &&
8893 memcmp(rsptr, pp, rslen) == 0)) {
8924 long len = chompped_length(
str, rs);
8926 str_modify_keep_cr(
str);
8948 str_modifiable(
str);
8989 const char *
const start = s;
8991 if (!s || s >= e)
return 0;
8994 if (single_byte_optimizable(
str)) {
9002 if (!rb_isspace(
cc))
break;
9031 str_modify_keep_cr(
str);
9034 loffset = lstrip_offset(
str, start, start+olen, enc);
9036 long len = olen-loffset;
9037 s = start + loffset;
9040#if !SHARABLE_MIDDLE_SUBSTRING
9078 rb_str_check_dummy_enc(enc);
9079 if (!s || s >= e)
return 0;
9083 if (single_byte_optimizable(
str)) {
9085 while (s < t && ((c = *(t-1)) ==
'\0' ||
ascii_isspace(c))) t--;
9092 if (c && !rb_isspace(c))
break;
9121 str_modify_keep_cr(
str);
9124 roffset = rstrip_offset(
str, start, start+olen, enc);
9126 long len = olen - roffset;
9129#if !SHARABLE_MIDDLE_SUBSTRING
9160 roffset = rstrip_offset(
str, start, start+olen, enc);
9184 long olen, loffset, roffset;
9187 str_modify_keep_cr(
str);
9190 loffset = lstrip_offset(
str, start, start+olen, enc);
9191 roffset = rstrip_offset(
str, start+loffset, start+olen, enc);
9193 if (loffset > 0 || roffset > 0) {
9194 long len = olen-roffset;
9200#if !SHARABLE_MIDDLE_SUBSTRING
9228 long olen, loffset, roffset;
9232 loffset = lstrip_offset(
str, start, start+olen, enc);
9233 roffset = rstrip_offset(
str, start+loffset, start+olen, enc);
9240scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9242 VALUE result, match;
9245 long end, pos = rb_pat_search(pat,
str, *start, set_backref_str);
9271 if (!regs || regs->
num_regs == 1) {
9326 long last = -1, prev = 0;
9329 pat = get_pat_quoted(pat, 1);
9330 mustnot_broken(
str);
9334 while (!
NIL_P(result = scan_once(
str, pat, &start, 0))) {
9344 while (!
NIL_P(result = scan_once(
str, pat, &start, 1))) {
9348 str_mod_check(
str, p,
len);
9466# define CRYPT_END() ALLOCV_END(databuf)
9468 extern char *
crypt(
const char *,
const char *);
9469# define CRYPT_END() (void)0
9472 const char *s, *saltp;
9475 char salt_8bit_clean[3];
9480 mustnot_wchar(salt);
9488 if (!saltp[0] || !saltp[1])
goto short_salt;
9490 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
9491 salt_8bit_clean[0] = saltp[0] & 0x7f;
9492 salt_8bit_clean[1] = saltp[1] & 0x7f;
9493 salt_8bit_clean[2] =
'\0';
9494 saltp = salt_8bit_clean;
9499# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
9500 data->initialized = 0;
9502 res =
crypt_r(s, saltp, data);
9504 res =
crypt(s, saltp);
9549 char *
ptr, *p, *pend;
9552 unsigned long sum0 = 0;
9567 sum0 += (
unsigned char)*p;
9578 if (bits < (
int)
sizeof(
long)*
CHAR_BIT) {
9579 sum0 &= (((
unsigned long)1)<<bits)-1;
9603 long width,
len, flen = 1, fclen = 1;
9606 const char *
f =
" ";
9607 long n,
size, llen, rlen, llen2 = 0, rlen2 = 0;
9609 int singlebyte = 1, cr;
9621 fclen = str_strlen(pad, enc);
9622 singlebyte = single_byte_optimizable(pad);
9623 if (flen == 0 || fclen == 0) {
9627 len = str_strlen(
str, enc);
9630 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ?
n :
n/2);
9634 llen2 = str_offset(
f,
f + flen, llen % fclen, enc, singlebyte);
9635 rlen2 = str_offset(
f,
f + flen, rlen % fclen, enc, singlebyte);
9638 if ((
len = llen / fclen + rlen / fclen) >=
LONG_MAX / flen ||
9651 while (llen >= fclen) {
9668 while (rlen >= fclen) {
9770 sep = get_pat_quoted(sep, 0);
9778 if (pos == 0 &&
RSTRING_LEN(sep) == 0)
goto failed;
9782 if (pos < 0)
goto failed;
9825 pos = rb_str_rindex(
str, sep, pos);
9927 char *strptr, *prefixptr;
9928 long olen, prefixlen;
9936 if (prefixlen <= 0)
return 0;
9938 if (olen < prefixlen)
return 0;
9941 if (
memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
9961 str_modify_keep_cr(
str);
9963 prefixlen = deleted_prefix_length(
str, prefix);
9964 if (prefixlen <= 0)
return Qnil;
9984 prefixlen = deleted_prefix_length(
str, prefix);
10002 char *strptr, *suffixptr, *s;
10003 long olen, suffixlen;
10012 if (suffixlen <= 0)
return 0;
10014 if (olen < suffixlen)
return 0;
10017 s = strptr + olen - suffixlen;
10018 if (
memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10038 long olen, suffixlen,
len;
10039 str_modifiable(
str);
10041 suffixlen = deleted_suffix_length(
str, suffix);
10042 if (suffixlen <= 0)
return Qnil;
10045 str_modify_keep_cr(
str);
10046 len = olen - suffixlen;
10070 suffixlen = deleted_suffix_length(
str, suffix);
10088 val = rb_fs_check(val);
10091 "value of %"PRIsVALUE" must be String or Regexp",
10111 str_modifiable(
str);
10128 str_replace_shared_without_enc(str2,
str);
10187 static const char ellipsis[] =
"...";
10188 const long ellipsislen =
sizeof(ellipsis) - 1;
10192 VALUE estr, ret = 0;
10199 else if (
len <= ellipsislen ||
10263 return enc_str_scrub(enc,
str, repl, cr);
10271 const char *rep, *p, *e, *p1, *sp;
10284 if (!
NIL_P(repl)) {
10285 repl = str_compat_and_valid(repl, enc);
10293#define DEFAULT_REPLACE_CHAR(str) do { \
10294 static const char replace[sizeof(str)-1] = str; \
10295 rep = replace; replen = (int)sizeof(replace); \
10310 else if (!
NIL_P(repl)) {
10325 p = search_nonascii(p, e);
10349 if (e - p < clen) clen = e - p;
10356 for (; clen > 1; clen--) {
10369 str_mod_check(
str, sp, slen);
10370 repl = str_compat_and_valid(repl, enc);
10377 p = search_nonascii(p, e);
10404 str_mod_check(
str, sp, slen);
10405 repl = str_compat_and_valid(repl, enc);
10418 else if (!
NIL_P(repl)) {
10452 if (e - p < clen) clen = e - p;
10453 if (clen <= mbminlen * 2) {
10458 for (; clen > mbminlen; clen-=mbminlen) {
10470 str_mod_check(
str, sp, slen);
10471 repl = str_compat_and_valid(repl, enc);
10497 str_mod_check(
str, sp, slen);
10498 repl = str_compat_and_valid(repl, enc);
10553static ID id_normalize;
10554static ID id_normalized_p;
10555static VALUE mUnicodeNormalize;
10560 static int UnicodeNormalizeRequired = 0;
10563 if (!UnicodeNormalizeRequired) {
10564 rb_require(
"unicode_normalize/normalize.rb");
10565 UnicodeNormalizeRequired = 1;
10597 return unicode_normalize_common(
argc,
argv,
str, id_normalize);
10633 return unicode_normalize_common(
argc,
argv,
str, id_normalized_p);
10676#define sym_equal rb_obj_equal
10679sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
10706 if ((resenc != enc && !rb_str_is_ascii_only_p(
sym)) ||
len != (
long)
strlen(
ptr) ||
10727 if ((resenc != enc && !rb_str_is_ascii_only_p(
str)) ||
11204#define rb_intern(str) rb_intern_const(str)
11358 id_normalized_p =
rb_intern(
"normalized?");
char * crypt_r(const char *key, const char *setting, struct crypt_data *data)
#define range(low, item, hi)
#define ENCINDEX_UTF_32BE
#define ENCINDEX_UTF_32LE
#define ENCINDEX_UTF_16BE
#define ENCINDEX_UTF_16LE
int rb_enc_find_index2(const char *name, long len)
#define ENCINDEX_US_ASCII
int rb_enc_dummy_p(rb_encoding *enc)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
int rb_enc_get_index(VALUE obj)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_encoding * rb_utf8_encoding(void)
rb_encoding * rb_enc_check_str(VALUE str1, VALUE str2)
rb_encoding * rb_ascii8bit_encoding(void)
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
rb_encoding * rb_enc_from_index(int index)
rb_encoding * rb_filesystem_encoding(void)
rb_encoding * rb_default_internal_encoding(void)
int rb_utf8_encindex(void)
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
rb_encoding * rb_enc_get(VALUE obj)
int rb_ascii8bit_encindex(void)
rb_encoding * rb_enc_get_from_index(int index)
int rb_enc_unicode_p(rb_encoding *enc)
void rb_enc_copy(VALUE obj1, VALUE obj2)
int rb_enc_to_index(rb_encoding *enc)
void rb_enc_set_index(VALUE obj, int idx)
rb_encoding * rb_default_external_encoding(void)
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
rb_encoding * rb_locale_encoding(void)
VALUE rb_obj_encoding(VALUE obj)
rb_encoding * rb_to_encoding(VALUE enc)
rb_encoding * rb_usascii_encoding(void)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
VALUE rb_enc_associate_index(VALUE obj, int idx)
int rb_enc_codelen(int c, rb_encoding *enc)
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
int rb_usascii_encindex(void)
#define ENCODING_SET_INLINED(obj, i)
#define ENC_CODERANGE_7BIT
#define ENC_CODERANGE_VALID
#define rb_enc_left_char_head(s, p, e, enc)
#define rb_enc_mbcput(c, buf, enc)
#define ENC_CODERANGE_CLEAN_P(cr)
#define rb_enc_isctype(c, t, enc)
#define ENC_CODERANGE_AND(a, b)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
@ econv_destination_buffer_full
#define rb_enc_step_back(s, p, e, n, enc)
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
#define rb_enc_prev_char(s, p, e, enc)
int rb_enc_symname2_p(const char *, long, rb_encoding *)
#define ENC_CODERANGE(obj)
#define ENC_CODERANGE_UNKNOWN
#define rb_enc_isascii(c, enc)
#define rb_enc_mbmaxlen(enc)
#define ENCODING_GET(obj)
#define ENC_CODERANGE_MASK
#define rb_enc_mbc_to_codepoint(p, e, enc)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
#define MBCLEN_CHARFOUND_LEN(ret)
#define rb_enc_asciicompat(enc)
#define rb_enc_codepoint(p, e, enc)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
#define MBCLEN_INVALID_P(ret)
#define rb_enc_code_to_mbclen(c, enc)
#define rb_enc_isprint(c, enc)
#define MBCLEN_NEEDMORE_P(ret)
#define rb_enc_mbminlen(enc)
#define ENC_CODERANGE_BROKEN
#define MBCLEN_CHARFOUND_P(ret)
void rb_econv_close(rb_econv_t *ec)
#define rb_enc_right_char_head(s, p, e, enc)
#define ENCODING_GET_INLINED(obj)
#define ENC_CODERANGE_CLEAR(obj)
#define ENCODING_IS_ASCII8BIT(obj)
#define ENC_CODERANGE_SET(obj, cr)
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
#define rb_enc_is_newline(p, end, enc)
char str[HTML_ESCAPE_MAX_LEN+1]
#define rb_intern_str(string)
void rb_include_module(VALUE, VALUE)
VALUE rb_define_class(const char *, VALUE)
Defines a top-level class.
VALUE rb_define_module(const char *)
void rb_undef_method(VALUE, const char *)
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *)
union RString::@156::@157::@158 aux
VALUE rb_cObject
Object class.
char ary[RSTRING_EMBED_LEN_MAX+1]
VALUE rb_to_symbol(VALUE name)
struct RString::@156::@157 heap
void rb_syserr_fail(int e, const char *mesg)
void rb_raise(VALUE exc, const char *fmt,...)
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
void rb_fatal(const char *fmt,...)
void rb_warn(const char *fmt,...)
VALUE rb_ensure(VALUE(*)(VALUE), VALUE, VALUE(*)(VALUE), VALUE)
An equivalent to ensure clause.
VALUE rb_any_to_s(VALUE)
Default implementation of #to_s.
VALUE rb_obj_alloc(VALUE)
Allocates an instance of klass.
VALUE rb_obj_frozen_p(VALUE obj)
Determines if the object is frozen.
double rb_str_to_dbl(VALUE, int)
Parses a string representation of a floating point number.
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
VALUE rb_convert_type_with_id(VALUE, int, const char *, ID)
VALUE rb_equal(VALUE, VALUE)
Same as Object#===, case equality.
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
VALUE rb_str_escape(VALUE str)
VALUE rb_check_convert_type_with_id(VALUE, int, const char *, ID)
VALUE rb_to_int(VALUE)
Converts val into Integer.
#define __msan_unpoison_string(x)
#define ONIGENC_CTYPE_DIGIT
ONIG_EXTERN int onig_error_code_to_str(OnigUChar *s, OnigPosition err_code,...)
#define ONIGENC_CASE_ASCII_ONLY
unsigned int OnigCaseFoldType
#define ONIG_MAX_ERROR_MESSAGE_LEN
ONIG_EXTERN int onig_new(OnigRegex *, const OnigUChar *pattern, const OnigUChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
#define ONIGENC_CASE_MODIFIED
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)
#define ONIGENC_CTYPE_ALPHA
#define ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_MBCLEN_CHARFOUND_P(r)
#define ONIGENC_CASE_UPCASE
#define ONIGENC_CASE_FOLD
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
#define ONIGENC_CASE_DOWNCASE
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
ONIG_EXTERN int onigenc_ascii_only_case_map(OnigCaseFoldType *flagP, const OnigUChar **pp, const OnigUChar *end, OnigUChar *to, OnigUChar *to_end, const struct OnigEncodingTypeST *enc)
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
#define ONIGENC_CASE_FOLD_TURKISH_AZERI
#define ONIGENC_CASE_TITLECASE
#define ONIGENC_CASE_FOLD_LITHUANIAN
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define ONIG_OPTION_DEFAULT
VALUE rb_reg_regcomp(VALUE)
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
long rb_reg_search(VALUE, VALUE, long, int)
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
int st_delete(st_table *tab, st_data_t *key, st_data_t *value)
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
int st_update(st_table *tab, st_data_t key, st_update_callback_func *func, st_data_t arg)
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc)
VALUE rb_str_initialize(VALUE str, const char *ptr, long len, rb_encoding *enc)
#define STR_SET_LEN(str, n)
#define STR_EMBEDDABLE_P(len, termlen)
struct mapping_buffer mapping_buffer
int rb_str_symname_p(VALUE sym)
VALUE rb_str_include_range_p(VALUE beg, VALUE end, VALUE val, VALUE exclusive)
void rb_str_free(VALUE str)
VALUE rb_str_times(VALUE str, VALUE times)
#define SHARABLE_SUBSTRING_P(beg, len, end)
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
#define STR_HEAP_PTR(str)
VALUE rb_str_new_shared(VALUE str)
const char * ruby_escaped_char(int c)
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
VALUE rb_str_cat_cstr(VALUE str, const char *ptr)
VALUE rb_str_new_frozen(VALUE orig)
#define aligned_ptr(value)
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
#define STR_SET_NOEMBED(str)
#define STR_SET_SHARED(str, shared_str)
VALUE rb_str_buf_append(VALUE str, VALUE str2)
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
VALUE rb_utf8_str_new(const char *ptr, long len)
VALUE rb_filesystem_str_new(const char *ptr, long len)
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
#define rb_str_splice(str, beg, len, val)
VALUE rb_str_export(VALUE str)
#define DEFAULT_REPLACE_CHAR(str)
char * rb_string_value_cstr(volatile VALUE *ptr)
VALUE rb_fstring_cstr(const char *ptr)
VALUE rb_utf8_str_new_cstr(const char *ptr)
VALUE rb_sym_to_s(VALUE sym)
void rb_str_shared_replace(VALUE str, VALUE str2)
VALUE rb_external_str_new(const char *ptr, long len)
VALUE rb_str_tmp_new(long len)
char * rb_str_fill_terminator(VALUE str, const int newminlen)
long rb_str_offset(VALUE str, long pos)
char * rb_str_subpos(VALUE str, long beg, long *lenp)
VALUE rb_str_succ(VALUE orig)
#define CASE_MAPPING_ADDITIONAL_LENGTH
int rb_str_hash_cmp(VALUE str1, VALUE str2)
VALUE rb_str_subseq(VALUE str, long beg, long len)
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
#define RUBY_MAX_CHAR_LEN
VALUE rb_str_new_static(const char *ptr, long len)
int rb_enc_str_coderange(VALUE str)
VALUE rb_str_chomp_string(VALUE str, VALUE rs)
#define ENUM_ELEM(ary, e)
MJIT_FUNC_EXPORTED VALUE rb_str_opt_plus(VALUE str1, VALUE str2)
VALUE rb_str_ord(VALUE s)
VALUE rb_str_upto_each(VALUE beg, VALUE end, int excl, int(*each)(VALUE, VALUE), VALUE arg)
size_t rb_str_capacity(VALUE str)
MJIT_FUNC_EXPORTED VALUE rb_id_quote_unprintable(ID id)
VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len, rb_encoding *from, int ecflags, VALUE ecopts)
#define STR_SET_EMBED(str)
const struct st_hash_type rb_fstring_hash_type
#define BARE_STRING_P(str)
VALUE rb_str_dup(VALUE str)
void rb_str_modify(VALUE str)
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
VALUE rb_str_to_str(VALUE str)
st_index_t rb_str_hash(VALUE str)
VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc)
long rb_str_strlen(VALUE str)
VALUE rb_str_resurrect(VALUE str)
VALUE rb_str_quote_unprintable(VALUE str)
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc)
VALUE rb_usascii_str_new(const char *ptr, long len)
VALUE rb_usascii_str_new_cstr(const char *ptr)
#define WANTARRAY(m, size)
VALUE rb_filesystem_str_new_cstr(const char *ptr)
#define rb_str_index(str, sub, offset)
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
VALUE rb_str_plus(VALUE str1, VALUE str2)
long rb_str_sublen(VALUE str, long pos)
VALUE rb_str_equal(VALUE str1, VALUE str2)
VALUE rb_str_tmp_frozen_acquire(VALUE orig)
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
VALUE rb_str_replace(VALUE str, VALUE str2)
VALUE rb_check_string_type(VALUE str)
void rb_str_set_len(VALUE str, long len)
VALUE rb_str_export_locale(VALUE str)
VALUE rb_str_inspect(VALUE str)
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
VALUE rb_str_buf_new_cstr(const char *ptr)
MJIT_FUNC_EXPORTED VALUE rb_obj_as_string_result(VALUE str, VALUE obj)
#define CHECK_IF_ASCII(c)
RUBY_ALIAS_FUNCTION(rb_str_dup_frozen(VALUE str), rb_str_new_frozen,(str))
VALUE rb_tainted_str_new(const char *ptr, long len)
void rb_str_setter(VALUE val, ID id, VALUE *var)
VALUE rb_str_length(VALUE str)
RUBY_FUNC_EXPORTED VALUE rb_fstring(VALUE str)
RUBY_FUNC_EXPORTED VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
int rb_str_comparable(VALUE str1, VALUE str2)
MJIT_FUNC_EXPORTED VALUE rb_fstring_new(const char *ptr, long len)
#define str_buf_cat2(str, ptr)
#define MIN_PRE_ALLOC_SIZE
VALUE rb_str_append(VALUE str, VALUE str2)
#define RESIZE_CAPA_TERM(str, capacity, termlen)
VALUE rb_str_freeze(VALUE str)
VALUE rb_string_value(volatile VALUE *ptr)
void rb_str_modify_expand(VALUE str, long expand)
#define RESIZE_CAPA(str, capacity)
VALUE rb_str_new(const char *ptr, long len)
MJIT_FUNC_EXPORTED VALUE rb_sym_proc_call(ID mid, int argc, const VALUE *argv, int kw_splat, VALUE passed_proc)
VALUE rb_str_scrub(VALUE str, VALUE repl)
int rb_enc_str_asciionly_p(VALUE str)
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
#define STR_SET_EMBED_LEN(str, n)
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
VALUE rb_locale_str_new_cstr(const char *ptr)
#define rb_str_dup_frozen
int rb_str_cmp(VALUE str1, VALUE str2)
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
void rb_str_update(VALUE str, long beg, long len, VALUE val)
char * rb_str_to_cstr(VALUE str)
RUBY_FUNC_EXPORTED size_t rb_str_memsize(VALUE str)
VALUE rb_str_substr(VALUE str, long beg, long len)
VALUE rb_str_unlocktmp(VALUE str)
VALUE rb_str_new_cstr(const char *ptr)
VALUE rb_str_upto_endless_each(VALUE beg, int(*each)(VALUE, VALUE), VALUE arg)
VALUE rb_str_resize(VALUE str, long len)
void rb_must_asciicompat(VALUE str)
VALUE rb_utf8_str_new_static(const char *ptr, long len)
MJIT_FUNC_EXPORTED VALUE rb_str_concat_literals(size_t num, const VALUE *strary)
VALUE rb_str_split(VALUE str, const char *sep0)
VALUE rb_tainted_str_new_cstr(const char *ptr)
MJIT_FUNC_EXPORTED VALUE rb_str_eql(VALUE str1, VALUE str2)
char * rb_string_value_ptr(volatile VALUE *ptr)
VALUE rb_str_dump(VALUE str)
VALUE rb_str_concat(VALUE str1, VALUE str2)
void rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
VALUE rb_locale_str_new(const char *ptr, long len)
VALUE rb_str_buf_new(long capa)
STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX)
VALUE rb_external_str_new_cstr(const char *ptr)
#define SPLIT_STR(beg, len)
#define STR_HEAP_SIZE(str)
VALUE rb_str_drop_bytes(VALUE str, long len)
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
#define TERM_FILL(ptr, termlen)
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
VALUE rb_usascii_str_new_static(const char *ptr, long len)
VALUE rb_obj_as_string(VALUE obj)
int(* case_map)(OnigCaseFoldType *flagP, const OnigUChar **pp, const OnigUChar *end, OnigUChar *to, OnigUChar *to_end, const struct OnigEncodingTypeST *enc)
OnigUChar space[FLEX_ARY_LEN]
struct mapping_buffer * next
#define scan_hex(s, l, e)
st_table * rb_vm_fstring_table(void)
MJIT_STATIC void rb_error_arity(int argc, int min, int max)