14#include "ruby/internal/config.h"
24#include "debug_counter.h"
29#include "internal/array.h"
30#include "internal/compar.h"
31#include "internal/compilers.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
46#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_usascii_str_new_cstr
67#undef rb_utf8_str_new_cstr
68#undef rb_enc_str_new_cstr
69#undef rb_external_str_new_cstr
70#undef rb_locale_str_new_cstr
71#undef rb_str_dup_frozen
72#undef rb_str_buf_new_cstr
103#define RUBY_MAX_CHAR_LEN 16
104#define STR_SHARED_ROOT FL_USER5
105#define STR_BORROWED FL_USER6
106#define STR_TMPLOCK FL_USER7
107#define STR_NOFREE FL_USER18
108#define STR_FAKESTR FL_USER19
110#define STR_SET_NOEMBED(str) do {\
111 FL_SET((str), STR_NOEMBED);\
113 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
116 STR_SET_EMBED_LEN((str), 0);\
119#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
121# define STR_SET_EMBED_LEN(str, n) do { \
122 assert(str_embed_capa(str) > (n));\
123 RSTRING(str)->as.embed.len = (n);\
126# define STR_SET_EMBED_LEN(str, n) do { \
128 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
129 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
133#define STR_SET_LEN(str, n) do { \
134 if (STR_EMBED_P(str)) {\
135 STR_SET_EMBED_LEN((str), (n));\
138 RSTRING(str)->as.heap.len = (n);\
142#define STR_DEC_LEN(str) do {\
143 if (STR_EMBED_P(str)) {\
144 long n = RSTRING_LEN(str);\
146 STR_SET_EMBED_LEN((str), n);\
149 RSTRING(str)->as.heap.len--;\
154str_enc_fastpath(
VALUE str)
158 case ENCINDEX_ASCII_8BIT:
160 case ENCINDEX_US_ASCII:
167#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
168#define TERM_FILL(ptr, termlen) do {\
169 char *const term_fill_ptr = (ptr);\
170 const int term_fill_len = (termlen);\
171 *term_fill_ptr = '\0';\
172 if (UNLIKELY(term_fill_len > 1))\
173 memset(term_fill_ptr, 0, term_fill_len);\
176#define RESIZE_CAPA(str,capacity) do {\
177 const int termlen = TERM_LEN(str);\
178 RESIZE_CAPA_TERM(str,capacity,termlen);\
180#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
181 if (STR_EMBED_P(str)) {\
182 if (str_embed_capa(str) < capacity + termlen) {\
183 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
184 const long tlen = RSTRING_LEN(str);\
185 memcpy(tmp, RSTRING_PTR(str), tlen);\
186 RSTRING(str)->as.heap.ptr = tmp;\
187 RSTRING(str)->as.heap.len = tlen;\
188 STR_SET_NOEMBED(str);\
189 RSTRING(str)->as.heap.aux.capa = (capacity);\
193 assert(!FL_TEST((str), STR_SHARED)); \
194 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
195 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
196 RSTRING(str)->as.heap.aux.capa = (capacity);\
200#define STR_SET_SHARED(str, shared_str) do { \
201 if (!FL_TEST(str, STR_FAKESTR)) { \
202 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
203 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
204 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
205 FL_SET((str), STR_SHARED); \
206 FL_SET((shared_str), STR_SHARED_ROOT); \
207 if (RBASIC_CLASS((shared_str)) == 0) \
208 FL_SET_RAW((shared_str), STR_BORROWED); \
212#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
213#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
216#define STR_ENC_GET(str) get_encoding(str)
218#if !defined SHARABLE_MIDDLE_SUBSTRING
219# define SHARABLE_MIDDLE_SUBSTRING 0
221#if !SHARABLE_MIDDLE_SUBSTRING
222#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
224#define SHARABLE_SUBSTRING_P(beg, len, end) 1
229str_embed_capa(
VALUE str)
232 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.
ary);
239rb_str_reembeddable_p(
VALUE str)
241 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
245rb_str_embed_size(
long capa)
251rb_str_size_as_embedded(
VALUE str)
255 if (STR_EMBED_P(str)) {
256 real_size = rb_str_embed_size(
RSTRING(str)->as.embed.len) + TERM_LEN(str);
260 else if (rb_str_reembeddable_p(str)) {
261 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
265 real_size =
sizeof(
struct RString);
273STR_EMBEDDABLE_P(
long len,
long termlen)
276 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
284static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
285static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
287static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
288static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_EMBED_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
333 if (new_root == old_root) {
339 if (!STR_EMBED_P(new_root)) {
343 size_t offset = (size_t)((uintptr_t)
RSTRING(str)->as.heap.ptr - (uintptr_t)
RSTRING(old_root)->as.embed.ary);
346 RSTRING(str)->as.heap.ptr =
RSTRING(new_root)->as.embed.ary + offset;
350rb_debug_rstring_null_ptr(
const char *func)
352 fprintf(stderr,
"%s is returning NULL!! "
353 "SIGSEGV is highly expected to follow immediately.\n"
354 "If you could reproduce, attach your debugger here, "
355 "and look at the passed string.\n",
360static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
363get_encoding(
VALUE str)
369mustnot_broken(
VALUE str)
371 if (is_broken_string(str)) {
377mustnot_wchar(
VALUE str)
380 if (rb_enc_mbminlen(enc) > 1) {
387static VALUE register_fstring(
VALUE str,
bool copy);
394#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
402fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
412 if (rb_objspace_garbage_object_p(str)) {
424 rb_enc_copy(new_str, str);
437 if (STR_SHARED_P(str)) {
439 str_make_independent(str);
442 if (!BARE_STRING_P(str)) {
446 RBASIC(str)->flags |= RSTRING_FSTR;
448 *key = *value = arg->fstr = str;
462 if (
FL_TEST(str, RSTRING_FSTR))
465 bare = BARE_STRING_P(str);
467 if (STR_EMBED_P(str)) {
471 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_SHARED_ROOT|STR_SHARED) == (STR_NOEMBED|STR_SHARED_ROOT)) {
480 fstr = register_fstring(str, FALSE);
483 str_replace_shared_without_enc(str, fstr);
491register_fstring(
VALUE str,
bool copy)
498 st_table *frozen_strings = rb_vm_fstring_table();
501 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
502 }
while (UNDEF_P(args.fstr));
509 assert(RBASIC_CLASS(args.fstr) ==
rb_cString);
514setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
530 return (
VALUE)fake_str;
537rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
rb_encoding *enc)
539 return setup_fake_str(fake_str, name, len, rb_enc_to_index(enc));
547MJIT_FUNC_EXPORTED
VALUE
548rb_fstring_new(
const char *ptr,
long len)
551 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
558 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
562rb_fstring_cstr(
const char *
ptr)
564 return rb_fstring_new(
ptr, strlen(
ptr));
568fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
578 const char *aptr, *bptr;
581 return (alen != blen ||
583 memcmp(aptr, bptr, alen) != 0);
587single_byte_optimizable(
VALUE str)
595 enc = STR_ENC_GET(str);
596 if (rb_enc_mbmaxlen(enc) == 1)
606static inline const char *
607search_nonascii(
const char *p,
const char *e)
609 const uintptr_t *s, *t;
611#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
612# if SIZEOF_UINTPTR_T == 8
613# define NONASCII_MASK UINT64_C(0x8080808080808080)
614# elif SIZEOF_UINTPTR_T == 4
615# define NONASCII_MASK UINT32_C(0x80808080)
617# error "don't know what to do."
620# if SIZEOF_UINTPTR_T == 8
621# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
622# elif SIZEOF_UINTPTR_T == 4
623# define NONASCII_MASK 0x80808080UL
625# error "don't know what to do."
629 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
630#if !UNALIGNED_WORD_ACCESS
631 if ((uintptr_t)p % SIZEOF_VOIDP) {
632 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
637 case 7:
if (p[-7]&0x80)
return p-7;
638 case 6:
if (p[-6]&0x80)
return p-6;
639 case 5:
if (p[-5]&0x80)
return p-5;
640 case 4:
if (p[-4]&0x80)
return p-4;
642 case 3:
if (p[-3]&0x80)
return p-3;
643 case 2:
if (p[-2]&0x80)
return p-2;
644 case 1:
if (p[-1]&0x80)
return p-1;
649#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
650#define aligned_ptr(value) \
651 __builtin_assume_aligned((value), sizeof(uintptr_t))
653#define aligned_ptr(value) (uintptr_t *)(value)
656 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
659 if (*s & NONASCII_MASK) {
660#ifdef WORDS_BIGENDIAN
661 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
663 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
673 case 7:
if (e[-7]&0x80)
return e-7;
674 case 6:
if (e[-6]&0x80)
return e-6;
675 case 5:
if (e[-5]&0x80)
return e-5;
676 case 4:
if (e[-4]&0x80)
return e-4;
678 case 3:
if (e[-3]&0x80)
return e-3;
679 case 2:
if (e[-2]&0x80)
return e-2;
680 case 1:
if (e[-1]&0x80)
return e-1;
688 const char *e = p +
len;
690 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
692 p = search_nonascii(p, e);
696 if (rb_enc_asciicompat(enc)) {
697 p = search_nonascii(p, e);
700 int ret = rb_enc_precise_mbclen(p, e, enc);
704 p = search_nonascii(p, e);
710 int ret = rb_enc_precise_mbclen(p, e, enc);
726 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
729 p = search_nonascii(p, e);
733 else if (rb_enc_asciicompat(enc)) {
734 p = search_nonascii(p, e);
740 int ret = rb_enc_precise_mbclen(p, e, enc);
747 p = search_nonascii(p, e);
753 int ret = rb_enc_precise_mbclen(p, e, enc);
772rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
777 str_enc_copy(dest, src);
778 if (RSTRING_LEN(dest) == 0) {
779 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
790 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
791 search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
802rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
804 str_enc_copy(dest, src);
811 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
817 return enc_coderange_scan(str, enc);
826 cr = enc_coderange_scan(str, get_encoding(str));
837 if (!rb_enc_asciicompat(enc))
839 else if (is_ascii_string(str))
845str_mod_check(
VALUE s,
const char *p,
long len)
847 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
853str_capacity(
VALUE str,
const int termlen)
855 if (STR_EMBED_P(str)) {
857 return str_embed_capa(str) - termlen;
862 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
863 return RSTRING(str)->as.heap.len;
866 return RSTRING(str)->as.heap.aux.capa;
873 return str_capacity(str, TERM_LEN(str));
877must_not_null(
const char *
ptr)
887 size_t size = rb_str_embed_size(
capa);
889 assert(rb_gc_size_allocatable_p(size));
891 assert(size <=
sizeof(
struct RString));
894 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
901str_alloc_heap(
VALUE klass)
903 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
910empty_str_alloc(
VALUE klass)
912 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
913 VALUE str = str_alloc_embed(klass, 0);
914 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
919str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
927 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
929 if (STR_EMBEDDABLE_P(
len, termlen)) {
930 str = str_alloc_embed(klass,
len + termlen);
936 str = str_alloc_heap(klass);
942 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
945 memcpy(RSTRING_PTR(str),
ptr,
len);
947 STR_SET_LEN(str,
len);
948 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
955 return str_new0(klass,
ptr,
len, 1);
976 rb_enc_associate_index(str, rb_utf8_encindex());
988 rb_enc_associate(str, enc);
1000 __msan_unpoison_string(
ptr);
1016 rb_enc_associate_index(str, rb_utf8_encindex());
1024 if (rb_enc_mbminlen(enc) != 1) {
1027 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1031str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1040 rb_encoding *enc = rb_enc_get_from_index(encindex);
1041 str = str_new0(klass,
ptr,
len, rb_enc_mbminlen(enc));
1044 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1045 str = str_alloc_heap(klass);
1049 RBASIC(str)->flags |= STR_NOFREE;
1051 rb_enc_associate_index(str, encindex);
1079static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1081 int ecflags,
VALUE ecopts);
1086 int encidx = rb_enc_to_index(enc);
1087 if (rb_enc_get_index(str) == encidx)
1088 return is_ascii_string(str);
1099 if (!to)
return str;
1100 if (!from) from = rb_enc_get(str);
1101 if (from == to)
return str;
1102 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1103 rb_is_ascii8bit_enc(to)) {
1104 if (STR_ENC_GET(str) != to) {
1106 rb_enc_associate(str, to);
1113 from, to, ecflags, ecopts);
1114 if (
NIL_P(newstr)) {
1122rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1127 olen = RSTRING_LEN(newstr);
1128 if (ofs < -olen || olen < ofs)
1130 if (ofs < 0) ofs += olen;
1132 STR_SET_LEN(newstr, ofs);
1137 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1145 STR_SET_LEN(str, 0);
1146 rb_enc_associate(str, enc);
1152str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1154 int ecflags,
VALUE ecopts)
1159 VALUE econv_wrapper;
1160 const unsigned char *start, *sp;
1161 unsigned char *dest, *dp;
1162 size_t converted_output = (size_t)ofs;
1167 RBASIC_CLEAR_CLASS(econv_wrapper);
1169 if (!ec)
return Qnil;
1172 sp = (
unsigned char*)
ptr;
1174 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1175 (dp = dest + converted_output),
1179 size_t converted_input = sp - start;
1180 size_t rest =
len - converted_input;
1181 converted_output = dp - dest;
1183 if (converted_input && converted_output &&
1184 rest < (LONG_MAX / converted_output)) {
1185 rest = (rest * converted_output) / converted_input;
1190 olen += rest < 2 ? 2 : rest;
1197 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1199 rb_enc_associate(newstr, to);
1218 const int eidx = rb_enc_to_index(eenc);
1221 return rb_enc_str_new(
ptr,
len, eenc);
1225 if ((eidx == rb_ascii8bit_encindex()) ||
1226 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1230 ienc = rb_default_internal_encoding();
1231 if (!ienc || eenc == ienc) {
1232 return rb_enc_str_new(
ptr,
len, eenc);
1236 if ((eidx == rb_ascii8bit_encindex()) ||
1237 (eidx == rb_usascii_encindex()) ||
1238 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1239 return rb_enc_str_new(
ptr,
len, ienc);
1242 str = rb_enc_str_new(NULL, 0, ienc);
1245 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1246 rb_str_initialize(str,
ptr,
len, eenc);
1254 int eidx = rb_enc_to_index(eenc);
1255 if (eidx == rb_usascii_encindex() &&
1256 !is_ascii_string(str)) {
1257 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1260 rb_enc_associate_index(str, eidx);
1319str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1321 const int termlen = TERM_LEN(str);
1326 if (str_embed_capa(str2) >=
len + termlen) {
1327 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1328 STR_SET_EMBED(str2);
1329 memcpy(ptr2, RSTRING_PTR(str),
len);
1330 STR_SET_EMBED_LEN(str2,
len);
1331 TERM_FILL(ptr2+
len, termlen);
1335 if (STR_SHARED_P(str)) {
1336 root =
RSTRING(str)->as.heap.aux.shared;
1344 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1346 rb_fatal(
"about to free a possible shared root");
1348 char *ptr2 = STR_HEAP_PTR(str2);
1350 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1353 FL_SET(str2, STR_NOEMBED);
1356 STR_SET_SHARED(str2, root);
1364 str_replace_shared_without_enc(str2, str);
1365 rb_enc_cr_str_exact_copy(str2, str);
1372 return str_replace_shared(str_alloc_heap(klass), str);
1389rb_str_new_frozen_String(
VALUE orig)
1396rb_str_tmp_frozen_acquire(
VALUE orig)
1399 return str_new_frozen_buffer(0, orig, FALSE);
1403rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1405 if (RBASIC_CLASS(tmp) != 0)
1408 if (STR_EMBED_P(tmp)) {
1421 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1422 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1427 STR_SET_EMBED_LEN(tmp, 0);
1435 return str_new_frozen_buffer(klass, orig, TRUE);
1441 assert(!STR_EMBED_P(orig));
1442 assert(!STR_SHARED_P(orig));
1444 VALUE str = str_alloc_heap(klass);
1445 RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
1446 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1447 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1448 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1449 RBASIC(orig)->flags &= ~STR_NOFREE;
1450 STR_SET_SHARED(orig, str);
1457str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1461 long len = RSTRING_LEN(orig);
1462 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1464 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1465 str = str_new0(klass, RSTRING_PTR(orig),
len, termlen);
1466 assert(STR_EMBED_P(str));
1471 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1472 long rest = RSTRING_LEN(
shared) - ofs -
RSTRING(orig)->as.heap.len;
1475 assert(ofs + rest <= RSTRING_LEN(
shared));
1477 assert(!STR_EMBED_P(
shared));
1481 if ((ofs > 0) || (rest > 0) ||
1484 str = str_new_shared(klass,
shared);
1485 assert(!STR_EMBED_P(str));
1486 RSTRING(str)->as.heap.ptr += ofs;
1487 RSTRING(str)->as.heap.len -= ofs + rest;
1490 if (RBASIC_CLASS(
shared) == 0)
1495 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1496 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1498 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1499 STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
1500 TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
1503 str = heap_str_make_shared(klass, orig);
1507 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1519str_new_empty_String(
VALUE str)
1522 rb_enc_copy(v, str);
1526#define STR_BUF_MIN_SIZE 63
1534 if (STR_EMBEDDABLE_P(
capa, 1)) {
1541 if (
capa < STR_BUF_MIN_SIZE) {
1542 capa = STR_BUF_MIN_SIZE;
1547 RSTRING(str)->as.heap.ptr[0] =
'\0';
1567 return str_new(0, 0,
len);
1573 if (
FL_TEST(str, RSTRING_FSTR)) {
1574 st_data_t fstr = (st_data_t)str;
1578 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1579 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1584 if (STR_EMBED_P(str)) {
1585 RB_DEBUG_COUNTER_INC(obj_str_embed);
1587 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1588 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1589 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1592 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1593 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1597RUBY_FUNC_EXPORTED
size_t
1598rb_str_memsize(
VALUE str)
1600 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1601 return STR_HEAP_SIZE(str);
1611 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1614static inline void str_discard(
VALUE str);
1615static void str_shared_replace(
VALUE str,
VALUE str2);
1620 if (str != str2) str_shared_replace(str, str2);
1631 enc = STR_ENC_GET(str2);
1634 termlen = rb_enc_mbminlen(enc);
1636 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1638 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1639 STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
1640 rb_enc_associate(str, enc);
1645 if (STR_EMBED_P(str2)) {
1646 assert(!
FL_TEST(str2, STR_SHARED));
1648 assert(
len + termlen <= str_embed_capa(str2));
1650 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1651 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1652 RSTRING(str2)->as.heap.ptr = new_ptr;
1655 STR_SET_NOEMBED(str2);
1659 STR_SET_NOEMBED(str);
1661 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1662 RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
1664 if (
FL_TEST(str2, STR_SHARED)) {
1666 STR_SET_SHARED(str,
shared);
1669 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1673 STR_SET_EMBED(str2);
1674 RSTRING_PTR(str2)[0] = 0;
1675 STR_SET_EMBED_LEN(str2, 0);
1676 rb_enc_associate(str, enc);
1689 str = rb_funcall(obj, idTo_s, 0);
1690 return rb_obj_as_string_result(str, obj);
1693MJIT_FUNC_EXPORTED
VALUE
1706 len = RSTRING_LEN(str2);
1707 if (STR_SHARED_P(str2)) {
1710 STR_SET_NOEMBED(str);
1712 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1713 STR_SET_SHARED(str,
shared);
1714 rb_enc_cr_str_exact_copy(str, str2);
1717 str_replace_shared(str, str2);
1726 size_t size = rb_str_embed_size(
capa);
1728 assert(rb_gc_size_allocatable_p(size));
1730 assert(size <=
sizeof(
struct RString));
1733 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1742 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1751 const VALUE flag_mask =
1753 RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
1760 if (STR_EMBED_P(str)) {
1761 long len = RSTRING_EMBED_LEN(str);
1763 assert(STR_EMBED_P(dup));
1764 assert(str_embed_capa(dup) >=
len + 1);
1765 STR_SET_EMBED_LEN(dup,
len);
1771 root =
RSTRING(str)->as.heap.aux.shared;
1773 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1774 root = str = str_new_frozen(klass, str);
1777 assert(!STR_SHARED_P(root));
1778 assert(RB_OBJ_FROZEN_RAW(root));
1781 else if (STR_EMBED_P(root)) {
1788 RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
1789 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1790 FL_SET(root, STR_SHARED_ROOT);
1792 flags |= RSTRING_NOEMBED | STR_SHARED;
1797 encidx = rb_enc_get_index(str);
1798 flags &= ~ENCODING_MASK;
1801 if (encidx) rb_enc_associate_index(dup, encidx);
1809 if (
FL_TEST(str, STR_NOEMBED)) {
1810 dup = ec_str_alloc_heap(ec, klass);
1813 dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
1816 return str_duplicate_setup(klass, str, dup);
1823 if (
FL_TEST(str, STR_NOEMBED)) {
1824 dup = str_alloc_heap(klass);
1827 dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
1830 return str_duplicate_setup(klass, str, dup);
1842 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1849 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1850 return ec_str_duplicate(ec,
rb_cString, str);
1865 static ID keyword_ids[2];
1866 VALUE orig, opt, venc, vcapa;
1871 if (!keyword_ids[0]) {
1872 keyword_ids[0] = rb_id_encoding();
1873 CONST_ID(keyword_ids[1],
"capacity");
1876 n = rb_scan_args(argc, argv,
"01:", &orig, &opt);
1881 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1882 enc = rb_to_encoding(venc);
1884 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1887 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
1889 if (
capa < STR_BUF_MIN_SIZE) {
1890 capa = STR_BUF_MIN_SIZE;
1894 len = RSTRING_LEN(orig);
1898 if (orig == str) n = 0;
1900 str_modifiable(str);
1901 if (STR_EMBED_P(str)) {
1902 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1904 assert(
RSTRING(str)->
as.embed.len + 1 <= str_embed_capa(str));
1909 RSTRING(str)->as.heap.ptr = new_ptr;
1911 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1912 const size_t size = (size_t)
capa + termlen;
1913 const char *
const old_ptr = RSTRING_PTR(str);
1914 const size_t osize =
RSTRING(str)->as.heap.len + TERM_LEN(str);
1915 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1916 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1918 RSTRING(str)->as.heap.ptr = new_ptr;
1920 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1921 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1922 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1927 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
1928 rb_enc_cr_str_exact_copy(str, orig);
1930 FL_SET(str, STR_NOEMBED);
1937 rb_enc_associate(str, enc);
1948#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1963static inline uintptr_t
1964count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1969 d = (d>>6) | (~d>>7);
1970 d &= NONASCII_MASK >> 7;
1973#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1975 return rb_popcount_intptr(d);
1979# if SIZEOF_VOIDP == 8
1988enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
1993 if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1994 long diff = (long)(e - p);
1995 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2000 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2001 const uintptr_t *s, *t;
2002 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2003 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2004 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2005 while (p < (
const char *)s) {
2006 if (is_utf8_lead_byte(*p))
len++;
2010 len += count_utf8_lead_bytes_with_word(s);
2013 p = (
const char *)s;
2016 if (is_utf8_lead_byte(*p))
len++;
2022 else if (rb_enc_asciicompat(enc)) {
2027 q = search_nonascii(p, e);
2033 p += rb_enc_fast_mbclen(p, e, enc);
2040 q = search_nonascii(p, e);
2046 p += rb_enc_mbclen(p, e, enc);
2053 for (c=0; p<e; c++) {
2054 p += rb_enc_mbclen(p, e, enc);
2069rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2076 if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2077 long diff = (long)(e - p);
2078 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2080 else if (rb_enc_asciicompat(enc)) {
2084 q = search_nonascii(p, e);
2092 ret = rb_enc_precise_mbclen(p, e, enc);
2107 for (c=0; p<e; c++) {
2108 ret = rb_enc_precise_mbclen(p, e, enc);
2115 if (p + rb_enc_mbminlen(enc) <= e)
2116 p += rb_enc_mbminlen(enc);
2132 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2133 if (!enc) enc = STR_ENC_GET(str);
2134 p = RSTRING_PTR(str);
2135 e = RSTRING_END(str);
2139 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2144 return enc_strlen(p, e, enc, cr);
2151 return str_strlen(str, NULL);
2165 return LONG2NUM(str_strlen(str, NULL));
2177rb_str_bytesize(
VALUE str)
2195rb_str_empty(
VALUE str)
2197 return RBOOL(RSTRING_LEN(str) == 0);
2215 char *ptr1, *ptr2, *ptr3;
2220 enc = rb_enc_check_str(str1, str2);
2223 termlen = rb_enc_mbminlen(enc);
2224 if (len1 > LONG_MAX - len2) {
2227 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2228 ptr3 = RSTRING_PTR(str3);
2229 memcpy(ptr3, ptr1, len1);
2230 memcpy(ptr3+len1, ptr2, len2);
2231 TERM_FILL(&ptr3[len1+len2], termlen);
2241MJIT_FUNC_EXPORTED
VALUE
2247 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2250 int enc1 = rb_enc_get_index(str1);
2251 int enc2 = rb_enc_get_index(str2);
2256 else if (enc2 < 0) {
2259 else if (enc1 != enc2) {
2262 else if (len1 > LONG_MAX - len2) {
2295 rb_enc_copy(str2, str);
2302 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2303 if (STR_EMBEDDABLE_P(
len, 1)) {
2305 memset(RSTRING_PTR(str2), 0,
len + 1);
2312 STR_SET_LEN(str2,
len);
2313 rb_enc_copy(str2, str);
2316 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2320 len *= RSTRING_LEN(str);
2321 termlen = TERM_LEN(str);
2323 ptr2 = RSTRING_PTR(str2);
2325 n = RSTRING_LEN(str);
2326 memcpy(ptr2, RSTRING_PTR(str), n);
2327 while (n <=
len/2) {
2328 memcpy(ptr2 + n, ptr2, n);
2331 memcpy(ptr2 + n, ptr2,
len-n);
2333 STR_SET_LEN(str2,
len);
2334 TERM_FILL(&ptr2[
len], termlen);
2335 rb_enc_cr_str_copy_for_substr(str2, str);
2361 VALUE tmp = rb_check_array_type(arg);
2370rb_check_lockedtmp(
VALUE str)
2372 if (
FL_TEST(str, STR_TMPLOCK)) {
2378str_modifiable(
VALUE str)
2380 rb_check_lockedtmp(str);
2385str_dependent_p(
VALUE str)
2387 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2396str_independent(
VALUE str)
2398 str_modifiable(str);
2399 return !str_dependent_p(str);
2403str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2411 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2416 STR_SET_EMBED_LEN(str,
len);
2421 oldptr = RSTRING_PTR(str);
2423 memcpy(
ptr, oldptr,
len);
2425 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2428 STR_SET_NOEMBED(str);
2429 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2430 TERM_FILL(
ptr +
len, termlen);
2439 if (!str_independent(str))
2440 str_make_independent(str);
2447 int termlen = TERM_LEN(str);
2448 long len = RSTRING_LEN(str);
2453 if (expand >= LONG_MAX -
len) {
2457 if (!str_independent(str)) {
2458 str_make_independent_expand(str,
len, expand, termlen);
2460 else if (expand > 0) {
2461 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2467str_modify_keep_cr(
VALUE str)
2469 if (!str_independent(str))
2470 str_make_independent(str);
2477str_discard(
VALUE str)
2479 str_modifiable(str);
2480 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2481 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2482 RSTRING(str)->as.heap.ptr = 0;
2483 RSTRING(str)->as.heap.len = 0;
2494 if (!rb_enc_asciicompat(enc)) {
2514 return RSTRING_PTR(str);
2518zero_filled(
const char *s,
int n)
2520 for (; n > 0; --n) {
2527str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2529 const char *e = s +
len;
2531 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2532 if (zero_filled(s, minlen))
return s;
2538str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2543 if (str_dependent_p(str)) {
2544 if (!zero_filled(s +
len, termlen))
2545 str_make_independent_expand(str,
len, 0L, termlen);
2548 TERM_FILL(s +
len, termlen);
2551 return RSTRING_PTR(str);
2555rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2557 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2558 long len = RSTRING_LEN(str);
2562 rb_check_lockedtmp(str);
2563 str_make_independent_expand(str,
len, 0L, termlen);
2565 else if (str_dependent_p(str)) {
2566 if (termlen > oldtermlen)
2567 str_make_independent_expand(str,
len, 0L, termlen);
2570 if (!STR_EMBED_P(str)) {
2572 assert(!
FL_TEST((str), STR_SHARED));
2575 if (termlen > oldtermlen) {
2576 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2584str_null_check(
VALUE str,
int *w)
2586 char *s = RSTRING_PTR(str);
2587 long len = RSTRING_LEN(str);
2589 const int minlen = rb_enc_mbminlen(enc);
2593 if (str_null_char(s,
len, minlen, enc)) {
2596 return str_fill_term(str, s,
len, minlen);
2599 if (!s || memchr(s, 0,
len)) {
2603 s = str_fill_term(str, s,
len, minlen);
2609rb_str_to_cstr(
VALUE str)
2612 return str_null_check(str, &w);
2620 char *s = str_null_check(str, &w);
2631rb_str_fill_terminator(
VALUE str,
const int newminlen)
2633 char *s = RSTRING_PTR(str);
2634 long len = RSTRING_LEN(str);
2635 return str_fill_term(str, s,
len, newminlen);
2641 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2665str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2668 if (rb_enc_mbmaxlen(enc) == 1) {
2671 else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2672 p += nth * rb_enc_mbmaxlen(enc);
2674 else if (rb_enc_asciicompat(enc)) {
2675 const char *p2, *e2;
2678 while (p < e && 0 < nth) {
2685 p2 = search_nonascii(p, e2);
2694 n = rb_enc_mbclen(p, e, enc);
2705 while (p < e && nth--) {
2706 p += rb_enc_mbclen(p, e, enc);
2717 return str_nth_len(p, e, &nth, enc);
2721str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2726 p = str_nth_len(p, e, &nth, enc);
2735str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2737 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2738 if (!pp)
return e - p;
2745 return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2746 STR_ENC_GET(str), single_byte_optimizable(str));
2751str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2754 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2755 const uintptr_t *s, *t;
2756 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2757 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2758 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2759 while (p < (
const char *)s) {
2760 if (is_utf8_lead_byte(*p)) nth--;
2764 nth -= count_utf8_lead_bytes_with_word(s);
2766 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2770 if (is_utf8_lead_byte(*p)) {
2771 if (nth == 0)
break;
2781str_utf8_offset(
const char *p,
const char *e,
long nth)
2783 const char *pp = str_utf8_nth(p, e, &nth);
2792 if (single_byte_optimizable(str) || pos < 0)
2795 char *p = RSTRING_PTR(str);
2796 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2801str_subseq(
VALUE str,
long beg,
long len)
2805 const long rstring_embed_capa_max = ((
sizeof(
struct RString) - offsetof(struct
RString,
as.
embed.
ary)) / sizeof(char)) - 1;
2807 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str)) ||
2808 len <= rstring_embed_capa_max) {
2815 RSTRING(str2)->as.heap.ptr += beg;
2827 VALUE str2 = str_subseq(str, beg,
len);
2828 rb_enc_cr_str_copy_for_substr(str2, str);
2837 long blen = RSTRING_LEN(str);
2839 char *p, *s = RSTRING_PTR(str), *e = s + blen;
2841 if (
len < 0)
return 0;
2845 if (single_byte_optimizable(str)) {
2846 if (beg > blen)
return 0;
2849 if (beg < 0)
return 0;
2851 if (
len > blen - beg)
2853 if (
len < 0)
return 0;
2858 if (
len > -beg)
len = -beg;
2859 if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
2861 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
2864 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
2870 slen = str_strlen(str, enc);
2872 if (beg < 0)
return 0;
2874 if (
len == 0)
goto end;
2877 else if (beg > 0 && beg > RSTRING_LEN(str)) {
2881 if (beg > str_strlen(str, enc))
return 0;
2886 enc == rb_utf8_encoding()) {
2887 p = str_utf8_nth(s, e, &beg);
2888 if (beg > 0)
return 0;
2889 len = str_utf8_offset(p, e,
len);
2892 else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2893 int char_sz = rb_enc_mbmaxlen(enc);
2895 p = s + beg * char_sz;
2899 else if (
len * char_sz > e - p)
2904 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2905 if (beg > 0)
return 0;
2909 len = str_offset(p, e,
len, enc, 0);
2917static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2922 return str_substr(str, beg,
len, TRUE);
2926str_substr(
VALUE str,
long beg,
long len,
int empty)
2930 if (!p)
return Qnil;
2931 if (!
len && !empty)
return Qnil;
2933 beg = p - RSTRING_PTR(str);
2935 VALUE str2 = str_subseq(str, beg,
len);
2936 rb_enc_cr_str_copy_for_substr(str2, str);
2980str_uminus(
VALUE str)
2985 return rb_fstring(str);
2989#define rb_str_dup_frozen rb_str_new_frozen
2994 if (
FL_TEST(str, STR_TMPLOCK)) {
2997 FL_SET(str, STR_TMPLOCK);
3004 if (!
FL_TEST(str, STR_TMPLOCK)) {
3011RUBY_FUNC_EXPORTED
VALUE
3015 return rb_ensure(func, arg, rb_str_unlocktmp, str);
3022 const int termlen = TERM_LEN(str);
3024 str_modifiable(str);
3025 if (STR_SHARED_P(str)) {
3028 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3031 STR_SET_LEN(str,
len);
3032 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3042 int independent = str_independent(str);
3043 long slen = RSTRING_LEN(str);
3051 const int termlen = TERM_LEN(str);
3052 if (STR_EMBED_P(str)) {
3053 if (
len == slen)
return str;
3054 if (str_embed_capa(str) >=
len + termlen) {
3055 STR_SET_EMBED_LEN(str,
len);
3059 str_make_independent_expand(str, slen,
len - slen, termlen);
3061 else if (str_embed_capa(str) >=
len + termlen) {
3062 char *
ptr = STR_HEAP_PTR(str);
3064 if (slen >
len) slen =
len;
3067 STR_SET_EMBED_LEN(str,
len);
3068 if (independent) ruby_xfree(
ptr);
3071 else if (!independent) {
3072 if (
len == slen)
return str;
3073 str_make_independent_expand(str, slen,
len - slen, termlen);
3077 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3078 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3081 else if (
len == slen)
return str;
3089str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3092 str_modify_keep_cr(str);
3097 if (
len == 0)
return 0;
3099 long capa, total, olen, off = -1;
3101 const int termlen = TERM_LEN(str);
3107 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3111 if (STR_EMBED_P(str)) {
3112 capa = str_embed_capa(str) - termlen;
3113 sptr =
RSTRING(str)->as.embed.ary;
3114 olen = RSTRING_EMBED_LEN(str);
3118 sptr =
RSTRING(str)->as.heap.ptr;
3119 olen =
RSTRING(str)->as.heap.len;
3121 if (olen > LONG_MAX -
len) {
3126 if (total >= LONG_MAX / 2) {
3129 while (total >
capa) {
3132 RESIZE_CAPA_TERM(str,
capa, termlen);
3133 sptr = RSTRING_PTR(str);
3138 memcpy(sptr + olen,
ptr,
len);
3139 STR_SET_LEN(str, total);
3140 TERM_FILL(sptr + total, termlen);
3145#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3146#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3151 if (
len == 0)
return str;
3155 return str_buf_cat(str,
ptr,
len);
3170rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3171 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3180 if (str_encindex == ptr_encindex) {
3182 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3186 str_enc = rb_enc_from_index(str_encindex);
3187 ptr_enc = rb_enc_from_index(ptr_encindex);
3188 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3191 if (RSTRING_LEN(str) == 0) {
3194 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3200 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3209 *ptr_cr_ret = ptr_cr;
3211 if (str_encindex != ptr_encindex &&
3214 str_enc = rb_enc_from_index(str_encindex);
3215 ptr_enc = rb_enc_from_index(ptr_encindex);
3220 res_encindex = str_encindex;
3225 res_encindex = str_encindex;
3229 res_encindex = ptr_encindex;
3234 res_encindex = str_encindex;
3241 res_encindex = str_encindex;
3249 str_buf_cat(str,
ptr,
len);
3255 rb_enc_name(str_enc), rb_enc_name(ptr_enc));
3262 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3272 if (rb_enc_asciicompat(enc)) {
3273 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3277 char *buf =
ALLOCA_N(
char, rb_enc_mbmaxlen(enc));
3279 unsigned int c = (
unsigned char)*
ptr;
3280 int len = rb_enc_codelen(c, enc);
3281 rb_enc_mbcput(c, buf, enc);
3282 rb_enc_cr_str_buf_cat(str, buf,
len,
3295 if (str_enc_fastpath(str)) {
3299 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3304 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3314 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3329#define MIN_PRE_ALLOC_SIZE 48
3331MJIT_FUNC_EXPORTED
VALUE
3332rb_str_concat_literals(
size_t num,
const VALUE *strary)
3341 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3342 if (LIKELY(
len < MIN_PRE_ALLOC_SIZE)) {
3348 rb_enc_copy(str, strary[0]);
3352 for (i = s; i < num; ++i) {
3353 const VALUE v = strary[i];
3357 if (encidx != ENCINDEX_US_ASCII) {
3359 rb_enc_set_index(str, encidx);
3384rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3386 str_modifiable(str);
3391 else if (argc > 1) {
3394 rb_enc_copy(arg_str, str);
3395 for (i = 0; i < argc; i++) {
3430 if (rb_num_to_uint(str2, &code) == 0) {
3443 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3446 buf[0] = (char)code;
3448 if (encidx != rb_enc_to_index(enc)) {
3449 rb_enc_associate_index(str1, encidx);
3454 long pos = RSTRING_LEN(str1);
3459 switch (
len = rb_enc_codelen(code, enc)) {
3460 case ONIGERR_INVALID_CODE_POINT_VALUE:
3463 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3469 rb_enc_mbcput(code, buf, enc);
3470 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3474 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
3483rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3485 int encidx = rb_enc_to_index(enc);
3487 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3492 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3493 return ENCINDEX_ASCII_8BIT;
3516rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3518 str_modifiable(str);
3523 else if (argc > 1) {
3526 rb_enc_copy(arg_str, str);
3527 for (i = 0; i < argc; i++) {
3540 if (e && is_ascii_string(str)) {
3543 return rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
3550 const char *ptr1, *ptr2;
3553 return (len1 != len2 ||
3555 memcmp(ptr1, ptr2, len1) != 0);
3569rb_str_hash_m(
VALUE str)
3575#define lesser(a,b) (((a)>(b))?(b):(a))
3583 if (RSTRING_LEN(str1) == 0)
return TRUE;
3584 if (RSTRING_LEN(str2) == 0)
return TRUE;
3587 if (idx1 == idx2)
return TRUE;
3592 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
3596 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
3606 const char *ptr1, *ptr2;
3609 if (str1 == str2)
return 0;
3612 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3621 if (len1 > len2)
return 1;
3624 if (retval > 0)
return 1;
3651 if (str1 == str2)
return Qtrue;
3658 return rb_str_eql_internal(str1, str2);
3679MJIT_FUNC_EXPORTED
VALUE
3682 if (str1 == str2)
return Qtrue;
3684 return rb_str_eql_internal(str1, str2);
3715 return rb_invcmp(str1, str2);
3757 return str_casecmp(str1, s);
3765 const char *p1, *p1end, *p2, *p2end;
3767 enc = rb_enc_compatible(str1, str2);
3772 p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3773 p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3774 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3775 while (p1 < p1end && p2 < p2end) {
3777 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3778 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3780 return INT2FIX(c1 < c2 ? -1 : 1);
3787 while (p1 < p1end && p2 < p2end) {
3788 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3789 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3791 if (0 <= c1 && 0 <= c2) {
3795 return INT2FIX(c1 < c2 ? -1 : 1);
3799 l1 = rb_enc_mbclen(p1, p1end, enc);
3800 l2 = rb_enc_mbclen(p2, p2end, enc);
3801 len = l1 < l2 ? l1 : l2;
3802 r = memcmp(p1, p2,
len);
3804 return INT2FIX(r < 0 ? -1 : 1);
3806 return INT2FIX(l1 < l2 ? -1 : 1);
3812 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
3813 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
3847 return str_casecmp_p(str1, s);
3854 VALUE folded_str1, folded_str2;
3855 VALUE fold_opt = sym_fold;
3857 enc = rb_enc_compatible(str1, str2);
3862 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3863 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3865 return rb_str_eql(folded_str1, folded_str2);
3869strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3870 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3872 const char *search_start = str_ptr;
3873 long pos, search_len = str_len - offset;
3877 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3878 if (pos < 0)
return pos;
3879 t = rb_enc_right_char_head(search_start, search_start+pos, str_ptr_end, enc);
3880 if (t == search_start + pos)
break;
3881 search_len -= t - search_start;
3882 if (search_len <= 0)
return -1;
3883 offset += t - search_start;
3886 return pos + offset;
3889#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3892rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3894 const char *str_ptr, *str_ptr_end, *sub_ptr;
3895 long str_len, sub_len;
3898 enc = rb_enc_check(str, sub);
3899 if (is_broken_string(sub))
return -1;
3901 str_ptr = RSTRING_PTR(str);
3902 str_ptr_end = RSTRING_END(str);
3903 str_len = RSTRING_LEN(str);
3904 sub_ptr = RSTRING_PTR(sub);
3905 sub_len = RSTRING_LEN(sub);
3907 if (str_len < sub_len)
return -1;
3910 long str_len_char, sub_len_char;
3911 int single_byte = single_byte_optimizable(str);
3912 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3913 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3915 offset += str_len_char;
3916 if (offset < 0)
return -1;
3918 if (str_len_char - offset < sub_len_char)
return -1;
3919 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3922 if (sub_len == 0)
return offset;
3925 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3939rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
3945 if (rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
3952 pos += str_strlen(str, NULL);
3962 if (pos > str_strlen(str, NULL))
3964 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
3965 rb_enc_check(str, sub), single_byte_optimizable(str));
3979 pos = rb_str_index(str, sub, pos);
3983 if (pos == -1)
return Qnil;
3992str_check_byte_pos(
VALUE str,
long pos)
3994 const char *s = RSTRING_PTR(str);
3995 const char *e = RSTRING_END(str);
3996 const char *p = s + pos;
3997 const char *pp = rb_enc_left_char_head(s, p, e, rb_enc_get(str));
4043rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4049 if (rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4056 pos += RSTRING_LEN(str);
4065 if (!str_check_byte_pos(str, pos)) {
4067 "offset %ld does not land on character boundary", pos);
4071 if (pos > RSTRING_LEN(str))
4085 pos = rb_strseq_index(str, sub, pos, 1);
4088 if (pos == -1)
return Qnil;
4096 char *hit, *adjusted;
4098 long slen, searchlen;
4101 sbeg = RSTRING_PTR(str);
4102 slen = RSTRING_LEN(sub);
4103 if (slen == 0)
return s - sbeg;
4104 e = RSTRING_END(str);
4105 t = RSTRING_PTR(sub);
4107 searchlen = s - sbeg + 1;
4110 hit = memrchr(sbeg, c, searchlen);
4112 adjusted = rb_enc_left_char_head(sbeg, hit, e, enc);
4113 if (hit != adjusted) {
4114 searchlen = adjusted - sbeg;
4117 if (memcmp(hit, t, slen) == 0)
4119 searchlen = adjusted - sbeg;
4120 }
while (searchlen > 0);
4131 sbeg = RSTRING_PTR(str);
4132 e = RSTRING_END(str);
4133 t = RSTRING_PTR(sub);
4134 slen = RSTRING_LEN(sub);
4137 if (memcmp(s, t, slen) == 0) {
4140 if (s <= sbeg)
break;
4141 s = rb_enc_prev_char(sbeg, s, e, enc);
4156 enc = rb_enc_check(str, sub);
4157 if (is_broken_string(sub))
return -1;
4158 singlebyte = single_byte_optimizable(str);
4159 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4160 slen = str_strlen(sub, enc);
4163 if (len < slen)
return -1;
4164 if (len - pos < slen) pos = len - slen;
4165 if (len == 0)
return pos;
4167 sbeg = RSTRING_PTR(str);
4170 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4176 s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
4238rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4243 long pos, len = str_strlen(str, enc);
4245 if (rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4256 if (pos > len) pos = len;
4264 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4265 enc, single_byte_optimizable(str));
4276 pos = rb_str_rindex(str, sub, pos);
4277 if (pos >= 0)
return LONG2NUM(pos);
4283rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4289 enc = rb_enc_check(str, sub);
4290 if (is_broken_string(sub))
return -1;
4291 len = RSTRING_LEN(str);
4292 slen = RSTRING_LEN(sub);
4295 if (len < slen)
return -1;
4296 if (len - pos < slen) pos = len - slen;
4297 if (len == 0)
return pos;
4299 sbeg = RSTRING_PTR(str);
4302 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4309 return str_rindex(str, sub, s, enc);
4374rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4378 long pos, len = RSTRING_LEN(str);
4380 if (rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4391 if (pos > len) pos = len;
4397 if (!str_check_byte_pos(str, pos)) {
4399 "offset %ld does not land on character boundary", pos);
4412 pos = rb_str_byterindex(str, sub, pos);
4413 if (pos >= 0)
return LONG2NUM(pos);
4449 switch (OBJ_BUILTIN_TYPE(y)) {
4457 return rb_funcall(y, idEqTilde, 1, x);
4501rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4505 rb_check_arity(argc, 1, 2);
4508 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
4540rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4543 rb_check_arity(argc, 1, 2);
4544 re = get_pat(argv[0]);
4545 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4554static enum neighbor_char
4560 if (rb_enc_mbminlen(enc) > 1) {
4562 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4564 return NEIGHBOR_NOT_CHAR;
4566 c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
4567 l = rb_enc_code_to_mbclen(c, enc);
4568 if (!l)
return NEIGHBOR_NOT_CHAR;
4569 if (l != len)
return NEIGHBOR_WRAPPED;
4570 rb_enc_mbcput(c, p, enc);
4571 r = rb_enc_precise_mbclen(p, p + len, enc);
4573 return NEIGHBOR_NOT_CHAR;
4575 return NEIGHBOR_FOUND;
4578 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4581 return NEIGHBOR_WRAPPED;
4582 ++((
unsigned char*)p)[i];
4583 l = rb_enc_precise_mbclen(p, p+len, enc);
4587 return NEIGHBOR_FOUND;
4590 memset(p+l, 0xff, len-l);
4596 for (len2 = len-1; 0 < len2; len2--) {
4597 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4601 memset(p+len2+1, 0xff, len-(len2+1));
4606static enum neighbor_char
4611 if (rb_enc_mbminlen(enc) > 1) {
4613 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4615 return NEIGHBOR_NOT_CHAR;
4617 c = rb_enc_mbc_to_codepoint(p, p + len, enc);
4618 if (!c)
return NEIGHBOR_NOT_CHAR;
4620 l = rb_enc_code_to_mbclen(c, enc);
4621 if (!l)
return NEIGHBOR_NOT_CHAR;
4622 if (l != len)
return NEIGHBOR_WRAPPED;
4623 rb_enc_mbcput(c, p, enc);
4624 r = rb_enc_precise_mbclen(p, p + len, enc);
4626 return NEIGHBOR_NOT_CHAR;
4628 return NEIGHBOR_FOUND;
4631 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4634 return NEIGHBOR_WRAPPED;
4635 --((
unsigned char*)p)[i];
4636 l = rb_enc_precise_mbclen(p, p+len, enc);
4640 return NEIGHBOR_FOUND;
4643 memset(p+l, 0, len-l);
4649 for (len2 = len-1; 0 < len2; len2--) {
4650 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4654 memset(p+len2+1, 0, len-(len2+1));
4668static enum neighbor_char
4669enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4671 enum neighbor_char ret;
4675 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4679 const int max_gaps = 1;
4681 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4682 if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
4683 ctype = ONIGENC_CTYPE_DIGIT;
4684 else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
4685 ctype = ONIGENC_CTYPE_ALPHA;
4687 return NEIGHBOR_NOT_CHAR;
4689 MEMCPY(save, p,
char, len);
4690 for (
try = 0;
try <= max_gaps; ++
try) {
4691 ret = enc_succ_char(p, len, enc);
4692 if (ret == NEIGHBOR_FOUND) {
4693 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4694 if (rb_enc_isctype(c, ctype, enc))
4695 return NEIGHBOR_FOUND;
4698 MEMCPY(p, save,
char, len);
4701 MEMCPY(save, p,
char, len);
4702 ret = enc_pred_char(p, len, enc);
4703 if (ret == NEIGHBOR_FOUND) {
4704 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4705 if (!rb_enc_isctype(c, ctype, enc)) {
4706 MEMCPY(p, save,
char, len);
4711 MEMCPY(p, save,
char, len);
4717 return NEIGHBOR_NOT_CHAR;
4720 if (ctype != ONIGENC_CTYPE_DIGIT) {
4721 MEMCPY(carry, p,
char, len);
4722 return NEIGHBOR_WRAPPED;
4725 MEMCPY(carry, p,
char, len);
4726 enc_succ_char(carry, len, enc);
4727 return NEIGHBOR_WRAPPED;
4796 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
4797 rb_enc_cr_str_copy_for_substr(str, orig);
4798 return str_succ(str);
4805 char *sbeg, *s, *e, *last_alnum = 0;
4806 int found_alnum = 0;
4808 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4809 long carry_pos = 0, carry_len = 1;
4810 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4812 slen = RSTRING_LEN(str);
4813 if (slen == 0)
return str;
4815 enc = STR_ENC_GET(str);
4816 sbeg = RSTRING_PTR(str);
4817 s = e = sbeg + slen;
4819 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4820 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4826 l = rb_enc_precise_mbclen(s, e, enc);
4827 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4828 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4829 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4831 case NEIGHBOR_NOT_CHAR:
4833 case NEIGHBOR_FOUND:
4835 case NEIGHBOR_WRAPPED:
4840 carry_pos = s - sbeg;
4845 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4846 enum neighbor_char neighbor;
4847 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4848 l = rb_enc_precise_mbclen(s, e, enc);
4849 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4850 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4852 neighbor = enc_succ_char(tmp, l, enc);
4854 case NEIGHBOR_FOUND:
4858 case NEIGHBOR_WRAPPED:
4861 case NEIGHBOR_NOT_CHAR:
4864 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4866 enc_succ_char(s, l, enc);
4868 if (!rb_enc_asciicompat(enc)) {
4869 MEMCPY(carry, s,
char, l);
4872 carry_pos = s - sbeg;
4876 RESIZE_CAPA(str, slen + carry_len);
4877 sbeg = RSTRING_PTR(str);
4878 s = sbeg + carry_pos;
4879 memmove(s + carry_len, s, slen - carry_pos);
4880 memmove(s, carry, carry_len);
4882 STR_SET_LEN(str, slen);
4883 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
4899rb_str_succ_bang(
VALUE str)
4907all_digits_p(
const char *s,
long len)
4961 VALUE end, exclusive;
4963 rb_scan_args(argc, argv,
"11", &end, &exclusive);
4965 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
4971 VALUE current, after_end;
4978 enc = rb_enc_check(beg, end);
4979 ascii = (is_ascii_string(beg) && is_ascii_string(end));
4981 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
4982 char c = RSTRING_PTR(beg)[0];
4983 char e = RSTRING_PTR(end)[0];
4985 if (c > e || (excl && c == e))
return beg;
4987 if ((*each)(rb_enc_str_new(&c, 1, enc), arg))
break;
4988 if (!excl && c == e)
break;
4990 if (excl && c == e)
break;
4995 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
4996 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
4997 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5001 width = RSTRING_LENINT(beg);
5002 b = rb_str_to_inum(beg, 10, FALSE);
5003 e = rb_str_to_inum(end, 10, FALSE);
5010 if (excl && bi == ei)
break;
5011 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5016 ID op = excl ?
'<' : idLE;
5017 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5020 while (rb_funcall(b, op, 1, e)) {
5022 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5023 b = rb_funcallv(b, succ, 0, 0);
5030 if (n > 0 || (excl && n == 0))
return beg;
5032 after_end = rb_funcallv(end, succ, 0, 0);
5037 next = rb_funcallv(current, succ, 0, 0);
5038 if ((*each)(current, arg))
break;
5039 if (
NIL_P(next))
break;
5043 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5058 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5059 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5060 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5061 int width = RSTRING_LENINT(beg);
5062 b = rb_str_to_inum(beg, 10, FALSE);
5068 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5076 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5077 b = rb_funcallv(b, succ, 0, 0);
5083 VALUE next = rb_funcallv(current, succ, 0, 0);
5084 if ((*each)(current, arg))
break;
5087 if (RSTRING_LEN(current) == 0)
5098 if (!
rb_equal(str, *argp))
return 0;
5112 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5113 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5114 rb_enc_asciicompat(STR_ENC_GET(val))) {
5115 const char *bp = RSTRING_PTR(beg);
5116 const char *ep = RSTRING_PTR(end);
5117 const char *vp = RSTRING_PTR(val);
5118 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5119 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5127 if (b <= v && v < e)
return Qtrue;
5128 return RBOOL(!
RTEST(exclusive) && v == e);
5135 all_digits_p(bp, RSTRING_LEN(beg)) &&
5136 all_digits_p(ep, RSTRING_LEN(end))) {
5141 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5143 return RBOOL(
NIL_P(val));
5165 else if (RB_TYPE_P(indx,
T_REGEXP)) {
5166 return rb_str_subpat(str, indx,
INT2FIX(0));
5168 else if (RB_TYPE_P(indx,
T_STRING)) {
5169 if (rb_str_index(str, indx, 0) != -1)
5175 long beg, len = str_strlen(str, NULL);
5187 return str_substr(str, idx, 1, FALSE);
5206rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5209 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5210 return rb_str_subpat(str, argv[0], argv[1]);
5218 rb_check_arity(argc, 1, 2);
5219 return rb_str_aref(str, argv[0]);
5225 char *ptr = RSTRING_PTR(str);
5226 long olen = RSTRING_LEN(str), nlen;
5228 str_modifiable(str);
5229 if (len > olen) len = olen;
5231 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5233 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5235 STR_SET_EMBED_LEN(str, nlen);
5236 ptr =
RSTRING(str)->as.embed.ary;
5237 memmove(ptr, oldptr + len, nlen);
5238 if (fl == STR_NOEMBED)
xfree(oldptr);
5241 if (!STR_SHARED_P(str)) {
5243 rb_enc_cr_str_exact_copy(shared, str);
5246 ptr =
RSTRING(str)->as.heap.ptr += len;
5247 RSTRING(str)->as.heap.len = nlen;
5255rb_str_splice_0(
VALUE str,
long beg,
long len,
VALUE val)
5258 long slen, vlen = RSTRING_LEN(val);
5261 if (beg == 0 && vlen == 0) {
5266 str_modify_keep_cr(str);
5270 RESIZE_CAPA(str, slen + vlen - len);
5271 sptr = RSTRING_PTR(str);
5280 memmove(sptr + beg + vlen,
5282 slen - (beg + len));
5284 if (vlen < beg && len < 0) {
5285 MEMZERO(sptr + slen,
char, -len);
5288 memmove(sptr + beg, RSTRING_PTR(val), vlen);
5291 STR_SET_LEN(str, slen);
5292 TERM_FILL(&sptr[slen], TERM_LEN(str));
5302 int singlebyte = single_byte_optimizable(str);
5308 enc = rb_enc_check(str, val);
5309 slen = str_strlen(str, enc);
5311 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5318 assert(beg <= slen);
5319 if (len > slen - beg) {
5322 p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
5323 if (!p) p = RSTRING_END(str);
5324 e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
5325 if (!e) e = RSTRING_END(str);
5327 beg = p - RSTRING_PTR(str);
5329 rb_str_splice_0(str, beg, len, val);
5330 rb_enc_associate(str, enc);
5336#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
5343 long start, end, len;
5353 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5357 nth += regs->num_regs;
5367 enc = rb_enc_check_str(str, val);
5368 rb_str_splice_0(str, start, len, val);
5369 rb_enc_associate(str, enc);
5377 switch (
TYPE(indx)) {
5379 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5383 beg = rb_str_index(str, indx, 0);
5388 rb_str_splice(str, beg, str_strlen(indx, NULL), val);
5396 rb_str_splice(str, beg, len, val);
5404 rb_str_splice(str, idx, 1, val);
5439rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5442 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5443 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5450 rb_check_arity(argc, 2, 3);
5451 return rb_str_aset(str, argv[0], argv[1]);
5483 rb_str_splice(str, pos, 0, str2);
5511rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5518 rb_check_arity(argc, 1, 2);
5519 str_modify_keep_cr(str);
5527 if ((nth += regs->num_regs) <= 0)
return Qnil;
5529 else if (nth >= regs->num_regs)
return Qnil;
5531 len = END(nth) - beg;
5534 else if (argc == 2) {
5542 if (!len)
return Qnil;
5543 beg = p - RSTRING_PTR(str);
5546 else if (RB_TYPE_P(indx,
T_STRING)) {
5547 beg = rb_str_index(str, indx, 0);
5548 if (beg == -1)
return Qnil;
5549 len = RSTRING_LEN(indx);
5560 if (!len)
return Qnil;
5561 beg = p - RSTRING_PTR(str);
5570 beg = p - RSTRING_PTR(str);
5573 result =
rb_str_new(RSTRING_PTR(str)+beg, len);
5574 rb_enc_cr_str_copy_for_substr(result, str);
5582 char *sptr = RSTRING_PTR(str);
5583 long slen = RSTRING_LEN(str);
5584 if (beg + len > slen)
5588 slen - (beg + len));
5590 STR_SET_LEN(str, slen);
5591 TERM_FILL(&sptr[slen], TERM_LEN(str));
5602 switch (OBJ_BUILTIN_TYPE(pat)) {
5621get_pat_quoted(
VALUE pat,
int check)
5625 switch (OBJ_BUILTIN_TYPE(pat)) {
5639 if (check && is_broken_string(pat)) {
5646rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5649 pos = rb_strseq_index(str, pat, pos, 1);
5650 if (set_backref_str) {
5652 str = rb_str_new_frozen_String(str);
5653 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
5662 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5682rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5690 rb_check_arity(argc, min_arity, 2);
5696 hash = rb_check_hash_type(argv[1]);
5702 pat = get_pat_quoted(argv[0], 1);
5704 str_modifiable(str);
5705 beg = rb_pat_search(pat, str, 0, 1);
5719 end0 = beg0 + RSTRING_LEN(pat);
5728 if (iter || !
NIL_P(hash)) {
5729 p = RSTRING_PTR(str); len = RSTRING_LEN(str);
5735 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5738 str_mod_check(str, p, len);
5745 enc = rb_enc_compatible(str, repl);
5748 p = RSTRING_PTR(str); len = RSTRING_LEN(str);
5752 rb_enc_name(str_enc),
5753 rb_enc_name(STR_ENC_GET(repl)));
5755 enc = STR_ENC_GET(repl);
5758 rb_enc_associate(str, enc);
5768 rlen = RSTRING_LEN(repl);
5769 len = RSTRING_LEN(str);
5771 RESIZE_CAPA(str, len + rlen - plen);
5773 p = RSTRING_PTR(str);
5775 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
5777 rp = RSTRING_PTR(repl);
5778 memmove(p + beg0, rp, rlen);
5780 STR_SET_LEN(str, len);
5781 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
5808 rb_str_sub_bang(argc, argv, str);
5813str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5817 long beg, beg0, end0;
5818 long offset, blen, slen, len, last;
5819 enum {STR, ITER, MAP} mode = STR;
5821 int need_backref = -1;
5831 hash = rb_check_hash_type(argv[1]);
5840 rb_error_arity(argc, 1, 2);
5843 pat = get_pat_quoted(argv[0], 1);
5844 beg = rb_pat_search(pat, str, 0, need_backref);
5846 if (bang)
return Qnil;
5851 blen = RSTRING_LEN(str) + 30;
5853 sp = RSTRING_PTR(str);
5854 slen = RSTRING_LEN(str);
5856 str_enc = STR_ENC_GET(str);
5857 rb_enc_associate(dest, str_enc);
5865 end0 = beg0 + RSTRING_LEN(pat);
5879 val = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5882 str_mod_check(str, sp, slen);
5887 else if (need_backref) {
5889 if (need_backref < 0) {
5890 need_backref = val != repl;
5897 len = beg0 - offset;
5911 if (RSTRING_LEN(str) <= end0)
break;
5912 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
5914 offset = end0 + len;
5916 cp = RSTRING_PTR(str) + offset;
5917 if (offset > RSTRING_LEN(str))
break;
5918 beg = rb_pat_search(pat, str, offset, need_backref);
5920 if (RSTRING_LEN(str) > offset) {
5923 rb_pat_search(pat, str, last, 1);
5925 str_shared_replace(str, dest);
5953rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
5955 str_modify_keep_cr(str);
5956 return str_gsub(argc, argv, str, 1);
5979 return str_gsub(argc, argv, str, 0);
5997 str_modifiable(str);
5998 if (str == str2)
return str;
6002 return str_replace(str, str2);
6017rb_str_clear(
VALUE str)
6021 STR_SET_EMBED_LEN(str, 0);
6022 RSTRING_PTR(str)[0] = 0;
6023 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6042rb_str_chr(
VALUE str)
6066 pos += RSTRING_LEN(str);
6067 if (pos < 0 || RSTRING_LEN(str) <= pos)
6070 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6089 long len = RSTRING_LEN(str);
6090 char *ptr, *head, *left = 0;
6094 if (pos < -len || len <= pos)
6101 char byte = (char)(
NUM2INT(w) & 0xFF);
6103 if (!str_independent(str))
6104 str_make_independent(str);
6105 enc = STR_ENC_GET(str);
6106 head = RSTRING_PTR(str);
6108 if (!STR_EMBED_P(str)) {
6115 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6122 left = rb_enc_left_char_head(head, ptr, head+len, enc);
6123 width = rb_enc_precise_mbclen(left, head+len, enc);
6125 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6141str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6143 long n = RSTRING_LEN(str);
6145 if (beg > n || len < 0)
return Qnil;
6148 if (beg < 0)
return Qnil;
6153 if (!empty)
return Qnil;
6157 VALUE str2 = str_subseq(str, beg, len);
6159 str_enc_copy(str2, str);
6161 if (RSTRING_LEN(str2) == 0) {
6162 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6190 long beg, len = RSTRING_LEN(str);
6198 return str_byte_substr(str, beg, len, TRUE);
6203 return str_byte_substr(str, idx, 1, FALSE);
6250rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6255 return str_byte_substr(str, beg, len, TRUE);
6257 rb_check_arity(argc, 1, 2);
6258 return str_byte_aref(str, argv[0]);
6278rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6280 long beg, end, len, slen;
6285 rb_check_arity(argc, 2, 3);
6289 rb_builtin_class_name(argv[0]));
6299 slen = RSTRING_LEN(str);
6300 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6307 assert(beg <= slen);
6308 if (len > slen - beg) {
6312 if (!str_check_byte_pos(str, beg)) {
6314 "offset %ld does not land on character boundary", beg);
6316 if (!str_check_byte_pos(str, end)) {
6318 "offset %ld does not land on character boundary", end);
6321 enc = rb_enc_check(str, val);
6322 str_modify_keep_cr(str);
6323 rb_str_splice_0(str, beg, len, val);
6324 rb_enc_associate(str, enc);
6342rb_str_reverse(
VALUE str)
6349 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6350 enc = STR_ENC_GET(str);
6352 s = RSTRING_PTR(str); e = RSTRING_END(str);
6353 p = RSTRING_END(rev);
6356 if (RSTRING_LEN(str) > 1) {
6357 if (single_byte_optimizable(str)) {
6364 int clen = rb_enc_fast_mbclen(s, e, enc);
6372 cr = rb_enc_asciicompat(enc) ?
6375 int clen = rb_enc_mbclen(s, e, enc);
6384 STR_SET_LEN(rev, RSTRING_LEN(str));
6385 str_enc_copy(rev, str);
6405rb_str_reverse_bang(
VALUE str)
6407 if (RSTRING_LEN(str) > 1) {
6408 if (single_byte_optimizable(str)) {
6411 str_modify_keep_cr(str);
6412 s = RSTRING_PTR(str);
6413 e = RSTRING_END(str) - 1;
6421 str_shared_replace(str, rb_str_reverse(str));
6425 str_modify_keep_cr(str);
6450 i = rb_str_index(str, arg, 0);
6452 return RBOOL(i != -1);
6493 if (rb_check_arity(argc, 0, 1) && (base =
NUM2INT(argv[0])) < 0) {
6496 return rb_str_to_inum(str, base, FALSE);
6520rb_str_to_f(
VALUE str)
6538rb_str_to_s(
VALUE str)
6550 char s[RUBY_MAX_CHAR_LEN];
6551 int n = rb_enc_codelen(c, enc);
6553 rb_enc_mbcput(c, s, enc);
6558#define CHAR_ESC_LEN 13
6561rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6563 char buf[CHAR_ESC_LEN + 1];
6571 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6573 else if (c < 0x10000) {
6574 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6577 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6582 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6585 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6588 l = (int)strlen(buf);
6594ruby_escaped_char(
int c)
6597 case '\0':
return "\\0";
6598 case '\n':
return "\\n";
6599 case '\r':
return "\\r";
6600 case '\t':
return "\\t";
6601 case '\f':
return "\\f";
6602 case '\013':
return "\\v";
6603 case '\010':
return "\\b";
6604 case '\007':
return "\\a";
6605 case '\033':
return "\\e";
6606 case '\x7f':
return "\\c?";
6612rb_str_escape(
VALUE str)
6616 const char *p = RSTRING_PTR(str);
6617 const char *pend = RSTRING_END(str);
6618 const char *prev = p;
6619 char buf[CHAR_ESC_LEN + 1];
6621 int unicode_p = rb_enc_unicode_p(enc);
6622 int asciicompat = rb_enc_asciicompat(enc);
6627 int n = rb_enc_precise_mbclen(p, pend, enc);
6629 if (p > prev) str_buf_cat(result, prev, p - prev);
6630 n = rb_enc_mbminlen(enc);
6632 n = (int)(pend - p);
6634 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6635 str_buf_cat(result, buf, strlen(buf));
6641 c = rb_enc_mbc_to_codepoint(p, pend, enc);
6643 cc = ruby_escaped_char(c);
6645 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6646 str_buf_cat(result, cc, strlen(cc));
6649 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
6652 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6653 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6657 if (p > prev) str_buf_cat(result, prev, p - prev);
6681 const char *p, *pend, *prev;
6682 char buf[CHAR_ESC_LEN + 1];
6684 rb_encoding *resenc = rb_default_internal_encoding();
6685 int unicode_p = rb_enc_unicode_p(enc);
6686 int asciicompat = rb_enc_asciicompat(enc);
6688 if (resenc == NULL) resenc = rb_default_external_encoding();
6689 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
6690 rb_enc_associate(result, resenc);
6691 str_buf_cat2(result,
"\"");
6693 p = RSTRING_PTR(str); pend = RSTRING_END(str);
6699 n = rb_enc_precise_mbclen(p, pend, enc);
6701 if (p > prev) str_buf_cat(result, prev, p - prev);
6702 n = rb_enc_mbminlen(enc);
6704 n = (int)(pend - p);
6706 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6707 str_buf_cat(result, buf, strlen(buf));
6713 c = rb_enc_mbc_to_codepoint(p, pend, enc);
6715 if ((asciicompat || unicode_p) &&
6716 (c ==
'"'|| c ==
'\\' ||
6720 (cc = rb_enc_codepoint(p,pend,enc),
6721 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6722 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6723 str_buf_cat2(result,
"\\");
6724 if (asciicompat || enc == resenc) {
6730 case '\n': cc =
'n';
break;
6731 case '\r': cc =
'r';
break;
6732 case '\t': cc =
't';
break;
6733 case '\f': cc =
'f';
break;
6734 case '\013': cc =
'v';
break;
6735 case '\010': cc =
'b';
break;
6736 case '\007': cc =
'a';
break;
6737 case 033: cc =
'e';
break;
6738 default: cc = 0;
break;
6741 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6744 str_buf_cat(result, buf, 2);
6756 if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
6757 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
6761 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6762 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6767 if (p > prev) str_buf_cat(result, prev, p - prev);
6768 str_buf_cat2(result,
"\"");
6773#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6793 int encidx = rb_enc_get_index(str);
6796 const char *p, *pend;
6799 int u8 = (encidx == rb_utf8_encindex());
6800 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6803 if (!rb_enc_asciicompat(enc)) {
6805 len += strlen(enc->name);
6808 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6811 unsigned char c = *p++;
6814 case '"':
case '\\':
6815 case '\n':
case '\r':
6816 case '\t':
case '\f':
6817 case '\013':
case '\010':
case '\007':
case '\033':
6822 clen = IS_EVSTR(p, pend) ? 2 : 1;
6830 if (u8 && c > 0x7F) {
6831 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6833 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
6836 else if (cc <= 0xFFFFF)
6849 if (clen > LONG_MAX - len) {
6856 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6857 q = RSTRING_PTR(result); qend = q + len + 1;
6861 unsigned char c = *p++;
6863 if (c ==
'"' || c ==
'\\') {
6867 else if (c ==
'#') {
6868 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6871 else if (c ==
'\n') {
6875 else if (c ==
'\r') {
6879 else if (c ==
'\t') {
6883 else if (c ==
'\f') {
6887 else if (c ==
'\013') {
6891 else if (c ==
'\010') {
6895 else if (c ==
'\007') {
6899 else if (c ==
'\033') {
6909 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
6911 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
6914 snprintf(q, qend-q,
"u%04X", cc);
6916 snprintf(q, qend-q,
"u{%X}", cc);
6921 snprintf(q, qend-q,
"x%02X", c);
6927 if (!rb_enc_asciicompat(enc)) {
6928 snprintf(q, qend-q, nonascii_suffix, enc->name);
6929 encidx = rb_ascii8bit_encindex();
6932 rb_enc_associate_index(result, encidx);
6938unescape_ascii(
unsigned int c)
6962undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6964 const char *s = *ss;
6968 unsigned char buf[6];
6986 *buf = unescape_ascii(*s);
6998 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
6999 if (*penc != enc_utf8) {
7001 rb_enc_associate(undumped, enc_utf8);
7018 if (hexlen == 0 || hexlen > 6) {
7024 if (0xd800 <= c && c <= 0xdfff) {
7027 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7037 if (0xd800 <= c && c <= 0xdfff) {
7040 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7068static VALUE rb_str_is_ascii_only_p(
VALUE str);
7086str_undump(
VALUE str)
7088 const char *s = RSTRING_PTR(str);
7089 const char *s_end = RSTRING_END(str);
7091 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7093 bool binary =
false;
7097 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7100 if (!str_null_check(str, &w)) {
7103 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7104 if (*s !=
'"')
goto invalid_format;
7122 static const char force_encoding_suffix[] =
".force_encoding(\"";
7123 static const char dup_suffix[] =
".dup";
7124 const char *encname;
7129 size =
sizeof(dup_suffix) - 1;
7130 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7132 size =
sizeof(force_encoding_suffix) - 1;
7133 if (s_end - s <= size)
goto invalid_format;
7134 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7142 s = memchr(s,
'"', s_end-s);
7144 if (!s)
goto invalid_format;
7145 if (s_end - s != 2)
goto invalid_format;
7146 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7148 encidx = rb_enc_find_index2(encname, (
long)size);
7152 rb_enc_associate_index(undumped, encidx);
7162 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7171 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7177 if (rb_enc_dummy_p(enc)) {
7184str_true_enc(
VALUE str)
7187 rb_str_check_dummy_enc(enc);
7191static OnigCaseFoldType
7192check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7198 if (argv[0]==sym_turkic) {
7199 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7201 if (argv[1]==sym_lithuanian)
7202 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7207 else if (argv[0]==sym_lithuanian) {
7208 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7210 if (argv[1]==sym_turkic)
7211 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7218 else if (argv[0]==sym_ascii)
7219 flags |= ONIGENC_CASE_ASCII_ONLY;
7220 else if (argv[0]==sym_fold) {
7221 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7222 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7234 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc) == 1))
7240#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7241#ifndef CASEMAP_DEBUG
7242# define CASEMAP_DEBUG 0
7250 OnigUChar space[FLEX_ARY_LEN];
7254mapping_buffer_free(
void *p)
7258 while (current_buffer) {
7259 previous_buffer = current_buffer;
7260 current_buffer = current_buffer->next;
7261 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7267 {0, mapping_buffer_free,}
7275 const OnigUChar *source_current, *source_end;
7276 int target_length = 0;
7277 VALUE buffer_anchor;
7280 size_t buffer_count = 0;
7281 int buffer_length_or_invalid;
7283 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7285 source_current = (OnigUChar*)RSTRING_PTR(source);
7286 source_end = (OnigUChar*)RSTRING_END(source);
7290 while (source_current < source_end) {
7292 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7293 if (CASEMAP_DEBUG) {
7294 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n", capa);
7297 *pre_buffer = current_buffer;
7298 pre_buffer = ¤t_buffer->next;
7299 current_buffer->next = NULL;
7300 current_buffer->capa = capa;
7301 buffer_length_or_invalid = enc->case_map(flags,
7302 &source_current, source_end,
7303 current_buffer->space,
7304 current_buffer->space+current_buffer->capa,
7306 if (buffer_length_or_invalid < 0) {
7307 current_buffer =
DATA_PTR(buffer_anchor);
7309 mapping_buffer_free(current_buffer);
7312 target_length += current_buffer->used = buffer_length_or_invalid;
7314 if (CASEMAP_DEBUG) {
7315 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7318 if (buffer_count==1) {
7319 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7322 char *target_current;
7325 target_current = RSTRING_PTR(target);
7326 current_buffer =
DATA_PTR(buffer_anchor);
7327 while (current_buffer) {
7328 memcpy(target_current, current_buffer->space, current_buffer->used);
7329 target_current += current_buffer->used;
7330 current_buffer = current_buffer->next;
7333 current_buffer =
DATA_PTR(buffer_anchor);
7335 mapping_buffer_free(current_buffer);
7340 str_enc_copy(target, source);
7349 const OnigUChar *source_current, *source_end;
7350 OnigUChar *target_current, *target_end;
7351 long old_length = RSTRING_LEN(source);
7352 int length_or_invalid;
7354 if (old_length == 0)
return Qnil;
7356 source_current = (OnigUChar*)RSTRING_PTR(source);
7357 source_end = (OnigUChar*)RSTRING_END(source);
7358 if (source == target) {
7359 target_current = (OnigUChar*)source_current;
7360 target_end = (OnigUChar*)source_end;
7363 target_current = (OnigUChar*)RSTRING_PTR(target);
7364 target_end = (OnigUChar*)RSTRING_END(target);
7367 length_or_invalid = onigenc_ascii_only_case_map(flags,
7368 &source_current, source_end,
7369 target_current, target_end, enc);
7370 if (length_or_invalid < 0)
7372 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7373 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7374 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7376 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7379 str_enc_copy(target, source);
7385upcase_single(
VALUE str)
7387 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7388 bool modified =
false;
7391 unsigned int c = *(
unsigned char*)s;
7393 if (
'a' <= c && c <=
'z') {
7394 *s =
'A' + (c -
'a');
7422rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7425 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7427 flags = check_case_options(argc, argv, flags);
7428 str_modify_keep_cr(str);
7429 enc = str_true_enc(str);
7430 if (case_option_single_p(flags, enc, str)) {
7431 if (upcase_single(str))
7432 flags |= ONIGENC_CASE_MODIFIED;
7434 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7435 rb_str_ascii_casemap(str, str, &flags, enc);
7437 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7439 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7461rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7464 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7467 flags = check_case_options(argc, argv, flags);
7468 enc = str_true_enc(str);
7469 if (case_option_single_p(flags, enc, str)) {
7470 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7471 str_enc_copy(ret, str);
7474 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7476 rb_str_ascii_casemap(str, ret, &flags, enc);
7479 ret = rb_str_casemap(str, &flags, enc);
7486downcase_single(
VALUE str)
7488 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7489 bool modified =
false;
7492 unsigned int c = *(
unsigned char*)s;
7494 if (
'A' <= c && c <=
'Z') {
7495 *s =
'a' + (c -
'A');
7524rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7527 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7529 flags = check_case_options(argc, argv, flags);
7530 str_modify_keep_cr(str);
7531 enc = str_true_enc(str);
7532 if (case_option_single_p(flags, enc, str)) {
7533 if (downcase_single(str))
7534 flags |= ONIGENC_CASE_MODIFIED;
7536 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7537 rb_str_ascii_casemap(str, str, &flags, enc);
7539 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7541 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7563rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7566 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7569 flags = check_case_options(argc, argv, flags);
7570 enc = str_true_enc(str);
7571 if (case_option_single_p(flags, enc, str)) {
7572 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7573 str_enc_copy(ret, str);
7574 downcase_single(ret);
7576 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7578 rb_str_ascii_casemap(str, ret, &flags, enc);
7581 ret = rb_str_casemap(str, &flags, enc);
7609rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7612 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7614 flags = check_case_options(argc, argv, flags);
7615 str_modify_keep_cr(str);
7616 enc = str_true_enc(str);
7617 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7618 if (flags&ONIGENC_CASE_ASCII_ONLY)
7619 rb_str_ascii_casemap(str, str, &flags, enc);
7621 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7623 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7647rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7650 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7653 flags = check_case_options(argc, argv, flags);
7654 enc = str_true_enc(str);
7655 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
7656 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7658 rb_str_ascii_casemap(str, ret, &flags, enc);
7661 ret = rb_str_casemap(str, &flags, enc);
7688rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7691 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7693 flags = check_case_options(argc, argv, flags);
7694 str_modify_keep_cr(str);
7695 enc = str_true_enc(str);
7696 if (flags&ONIGENC_CASE_ASCII_ONLY)
7697 rb_str_ascii_casemap(str, str, &flags, enc);
7699 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7701 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7725rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7728 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7731 flags = check_case_options(argc, argv, flags);
7732 enc = str_true_enc(str);
7733 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
7734 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7736 rb_str_ascii_casemap(str, ret, &flags, enc);
7739 ret = rb_str_casemap(str, &flags, enc);
7744typedef unsigned char *USTR;
7748 unsigned int now, max;
7760 if (t->p == t->pend)
return -1;
7761 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7764 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7766 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7768 if (t->p < t->pend) {
7769 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7772 if (t->now < 0x80 && c < 0x80) {
7774 "invalid range \"%c-%c\" in string transliteration",
7789 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7790 if (t->now == t->max) {
7795 if (t->now < t->max) {
7811 const unsigned int errc = -1;
7812 unsigned int trans[256];
7814 struct tr trsrc, trrepl;
7816 unsigned int c, c0, last = 0;
7817 int modify = 0, i, l;
7818 unsigned char *s, *send;
7820 int singlebyte = single_byte_optimizable(str);
7824#define CHECK_IF_ASCII(c) \
7825 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7826 (cr = ENC_CODERANGE_VALID) : 0)
7830 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7831 if (RSTRING_LEN(repl) == 0) {
7832 return rb_str_delete_bang(1, &src, str);
7836 e1 = rb_enc_check(str, src);
7837 e2 = rb_enc_check(str, repl);
7842 enc = rb_enc_check(src, repl);
7844 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
7845 if (RSTRING_LEN(src) > 1 &&
7846 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7847 trsrc.p + l < trsrc.pend) {
7851 trrepl.p = RSTRING_PTR(repl);
7852 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
7853 trsrc.gen = trrepl.gen = 0;
7854 trsrc.now = trrepl.now = 0;
7855 trsrc.max = trrepl.max = 0;
7858 for (i=0; i<256; i++) {
7861 while ((c = trnext(&trsrc, enc)) != errc) {
7866 if (!hash) hash = rb_hash_new();
7870 while ((c = trnext(&trrepl, enc)) != errc)
7873 for (i=0; i<256; i++) {
7874 if (trans[i] != errc) {
7882 for (i=0; i<256; i++) {
7885 while ((c = trnext(&trsrc, enc)) != errc) {
7886 r = trnext(&trrepl, enc);
7887 if (r == errc) r = trrepl.now;
7890 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
7893 if (!hash) hash = rb_hash_new();
7901 str_modify_keep_cr(str);
7902 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)RSTRING_END(str);
7903 termlen = rb_enc_mbminlen(enc);
7906 long offset, max = RSTRING_LEN(str);
7907 unsigned int save = -1;
7908 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7913 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7914 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
7923 if (cflag) c = last;
7926 else if (cflag) c = errc;
7932 if (c != (
unsigned int)-1) {
7938 tlen = rb_enc_codelen(c, enc);
7944 if (enc != e1) may_modify = 1;
7946 if ((offset = t - buf) + tlen > max) {
7947 size_t MAYBE_UNUSED(old) = max + termlen;
7948 max = offset + tlen + (send - s);
7949 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7952 rb_enc_mbcput(c, t, enc);
7953 if (may_modify && memcmp(s, t, tlen) != 0) {
7959 if (!STR_EMBED_P(str)) {
7960 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7962 TERM_FILL((
char *)t, termlen);
7963 RSTRING(str)->as.heap.ptr = (
char *)buf;
7964 RSTRING(str)->as.heap.len = t - buf;
7965 STR_SET_NOEMBED(str);
7966 RSTRING(str)->as.heap.aux.capa = max;
7968 else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
7970 c = (
unsigned char)*s;
7971 if (trans[c] != errc) {
7988 long offset, max = (long)((send - s) * 1.2);
7989 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7993 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7994 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8002 if (cflag) c = last;
8005 else if (cflag) c = errc;
8009 c = cflag ? last : errc;
8012 tlen = rb_enc_codelen(c, enc);
8017 if (enc != e1) may_modify = 1;
8019 if ((offset = t - buf) + tlen > max) {
8020 size_t MAYBE_UNUSED(old) = max + termlen;
8021 max = offset + tlen + (long)((send - s) * 1.2);
8022 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8026 rb_enc_mbcput(c, t, enc);
8027 if (may_modify && memcmp(s, t, tlen) != 0) {
8035 if (!STR_EMBED_P(str)) {
8036 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8038 TERM_FILL((
char *)t, termlen);
8039 RSTRING(str)->as.heap.ptr = (
char *)buf;
8040 RSTRING(str)->as.heap.len = t - buf;
8041 STR_SET_NOEMBED(str);
8042 RSTRING(str)->as.heap.aux.capa = max;
8048 rb_enc_associate(str, enc);
8067 return tr_trans(str, src, repl, 0);
8114 tr_trans(str, src, repl, 0);
8118#define TR_TABLE_MAX (UCHAR_MAX+1)
8119#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8121tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8124 const unsigned int errc = -1;
8125 char buf[TR_TABLE_MAX];
8128 VALUE table = 0, ptable = 0;
8129 int i, l, cflag = 0;
8131 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8132 tr.gen =
tr.now =
tr.max = 0;
8134 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8139 for (i=0; i<TR_TABLE_MAX; i++) {
8142 stable[TR_TABLE_MAX] = cflag;
8144 else if (stable[TR_TABLE_MAX] && !cflag) {
8145 stable[TR_TABLE_MAX] = 0;
8147 for (i=0; i<TR_TABLE_MAX; i++) {
8151 while ((c = trnext(&
tr, enc)) != errc) {
8152 if (c < TR_TABLE_MAX) {
8153 buf[(
unsigned char)c] = !cflag;
8158 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8161 table = ptable ? ptable : rb_hash_new();
8165 table = rb_hash_new();
8170 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8171 rb_hash_aset(table, key,
Qtrue);
8175 for (i=0; i<TR_TABLE_MAX; i++) {
8176 stable[i] = stable[i] && buf[i];
8178 if (!table && !cflag) {
8185tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8187 if (c < TR_TABLE_MAX) {
8188 return table[c] != 0;
8194 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8195 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8199 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8202 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8216rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8218 char squeez[TR_TABLE_SIZE];
8221 VALUE del = 0, nodel = 0;
8223 int i, ascompat, cr;
8225 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8227 for (i=0; i<argc; i++) {
8231 enc = rb_enc_check(str, s);
8232 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8235 str_modify_keep_cr(str);
8236 ascompat = rb_enc_asciicompat(enc);
8237 s = t = RSTRING_PTR(str);
8238 send = RSTRING_END(str);
8244 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8255 c = rb_enc_codepoint_len(s, send, &clen, enc);
8257 if (tr_find(c, squeez, del, nodel)) {
8261 if (t != s) rb_enc_mbcput(c, t, enc);
8268 TERM_FILL(t, TERM_LEN(str));
8269 STR_SET_LEN(str, t - RSTRING_PTR(str));
8272 if (modify)
return str;
8292rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8295 rb_str_delete_bang(argc, argv, str);
8309rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8311 char squeez[TR_TABLE_SIZE];
8313 VALUE del = 0, nodel = 0;
8314 unsigned char *s, *send, *t;
8316 int ascompat, singlebyte = single_byte_optimizable(str);
8320 enc = STR_ENC_GET(str);
8323 for (i=0; i<argc; i++) {
8327 enc = rb_enc_check(str, s);
8328 if (singlebyte && !single_byte_optimizable(s))
8330 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8334 str_modify_keep_cr(str);
8335 s = t = (
unsigned char *)RSTRING_PTR(str);
8336 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8337 send = (
unsigned char *)RSTRING_END(str);
8339 ascompat = rb_enc_asciicompat(enc);
8343 unsigned int c = *s++;
8344 if (c != save || (argc > 0 && !squeez[c])) {
8354 if (ascompat && (c = *s) < 0x80) {
8355 if (c != save || (argc > 0 && !squeez[c])) {
8361 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8363 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8364 if (t != s) rb_enc_mbcput(c, t, enc);
8373 TERM_FILL((
char *)t, TERM_LEN(str));
8374 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8375 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8379 if (modify)
return str;
8402rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8405 rb_str_squeeze_bang(argc, argv, str);
8423 return tr_trans(str, src, repl, 1);
8446 tr_trans(str, src, repl, 1);
8475rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8477 char table[TR_TABLE_SIZE];
8479 VALUE del = 0, nodel = 0, tstr;
8489 enc = rb_enc_check(str, tstr);
8492 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8493 (ptstr = RSTRING_PTR(tstr),
8494 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8495 !is_broken_string(str)) {
8497 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8499 s = RSTRING_PTR(str);
8500 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8501 send = RSTRING_END(str);
8503 if (*(
unsigned char*)s++ == c) n++;
8509 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8510 for (i=1; i<argc; i++) {
8513 enc = rb_enc_check(str, tstr);
8514 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8517 s = RSTRING_PTR(str);
8518 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8519 send = RSTRING_END(str);
8520 ascompat = rb_enc_asciicompat(enc);
8524 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8532 c = rb_enc_codepoint_len(s, send, &clen, enc);
8533 if (tr_find(c, table, del, nodel)) {
8544rb_fs_check(
VALUE val)
8548 if (
NIL_P(val))
return 0;
8553static const char isspacetable[256] = {
8554 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8556 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8572#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8575split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8577 if (empty_count >= 0 && len == 0) {
8578 return empty_count + 1;
8580 if (empty_count > 0) {
8584 rb_ary_push(result, str_new_empty_String(str));
8585 }
while (--empty_count > 0);
8589 rb_yield(str_new_empty_String(str));
8590 }
while (--empty_count > 0);
8595 rb_ary_push(result, str);
8604 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8608literal_split_pattern(
VALUE spat, split_type_t default_type)
8616 return SPLIT_TYPE_CHARS;
8618 else if (rb_enc_asciicompat(enc)) {
8619 if (len == 1 && ptr[0] ==
' ') {
8620 return SPLIT_TYPE_AWK;
8625 if (rb_enc_ascget(ptr, ptr + len, &l, enc) ==
' ' && len == l) {
8626 return SPLIT_TYPE_AWK;
8629 return default_type;
8642rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8647 split_type_t split_type;
8648 long beg, end, i = 0, empty_count = -1;
8653 if (rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8655 if (lim <= 0) limit =
Qnil;
8656 else if (lim == 1) {
8657 if (RSTRING_LEN(str) == 0)
8668 if (
NIL_P(limit) && !lim) empty_count = 0;
8670 enc = STR_ENC_GET(str);
8671 split_type = SPLIT_TYPE_REGEXP;
8673 spat = get_pat_quoted(spat, 0);
8675 else if (
NIL_P(spat = rb_fs)) {
8676 split_type = SPLIT_TYPE_AWK;
8678 else if (!(spat = rb_fs_check(spat))) {
8684 if (split_type != SPLIT_TYPE_AWK) {
8688 tmp = RREGEXP_SRC(spat);
8689 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8690 if (split_type == SPLIT_TYPE_AWK) {
8692 split_type = SPLIT_TYPE_STRING;
8697 mustnot_broken(spat);
8698 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8706#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8708 if (result) result = rb_ary_new();
8710 char *ptr = RSTRING_PTR(str);
8711 char *eptr = RSTRING_END(str);
8712 if (split_type == SPLIT_TYPE_AWK) {
8718 if (is_ascii_string(str)) {
8719 while (ptr < eptr) {
8720 c = (
unsigned char)*ptr++;
8722 if (ascii_isspace(c)) {
8728 if (!
NIL_P(limit) && lim <= i)
break;
8731 else if (ascii_isspace(c)) {
8732 SPLIT_STR(beg, end-beg);
8735 if (!
NIL_P(limit)) ++i;
8743 while (ptr < eptr) {
8746 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8755 if (!
NIL_P(limit) && lim <= i)
break;
8759 SPLIT_STR(beg, end-beg);
8762 if (!
NIL_P(limit)) ++i;
8770 else if (split_type == SPLIT_TYPE_STRING) {
8771 char *str_start = ptr;
8772 char *substr_start = ptr;
8773 char *sptr = RSTRING_PTR(spat);
8774 long slen = RSTRING_LEN(spat);
8776 mustnot_broken(str);
8777 enc = rb_enc_check(str, spat);
8778 while (ptr < eptr &&
8779 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8781 char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
8782 if (t != ptr + end) {
8786 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8789 if (!
NIL_P(limit) && lim <= ++i)
break;
8791 beg = ptr - str_start;
8793 else if (split_type == SPLIT_TYPE_CHARS) {
8794 char *str_start = ptr;
8797 mustnot_broken(str);
8798 enc = rb_enc_get(str);
8799 while (ptr < eptr &&
8800 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8801 SPLIT_STR(ptr - str_start, n);
8803 if (!
NIL_P(limit) && lim <= ++i)
break;
8805 beg = ptr - str_start;
8808 long len = RSTRING_LEN(str);
8816 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8821 if (start == end && BEG(0) == END(0)) {
8826 else if (last_null == 1) {
8827 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8834 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8840 SPLIT_STR(beg, end-beg);
8841 beg = start = END(0);
8845 for (idx=1; idx < regs->num_regs; idx++) {
8846 if (BEG(idx) == -1)
continue;
8847 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8849 if (!
NIL_P(limit) && lim <= ++i)
break;
8851 if (match) rb_match_unbusy(match);
8853 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
8854 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
8857 return result ? result : str;
8867 return rb_str_split_m(1, &sep, str);
8870#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8876 rb_ary_push(ary, e);
8885#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8888chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8890 const char *prev = rb_enc_prev_char(p, e, e, enc);
8891 if (rb_enc_is_newline(prev, e, enc)) {
8893 prev = rb_enc_prev_char(p, e, e, enc);
8894 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
8906 RSTRING_LEN(rs) != 1 ||
8907 RSTRING_PTR(rs)[0] !=
'\n')) {
8913#define rb_rs get_rs()
8920 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8921 long pos, len, rslen;
8924 if (rb_scan_args(argc, argv,
"01:", &rs, &opts) == 0)
8927 static ID keywords[1];
8929 keywords[0] = rb_intern_const(
"chomp");
8932 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
8936 if (!ENUM_ELEM(ary, str)) {
8944 if (!RSTRING_LEN(str))
goto end;
8946 ptr = subptr = RSTRING_PTR(str);
8947 pend = RSTRING_END(str);
8948 len = RSTRING_LEN(str);
8950 rslen = RSTRING_LEN(rs);
8953 enc = rb_enc_get(str);
8955 enc = rb_enc_check(str, rs);
8960 const char *eol = NULL;
8962 while (subend < pend) {
8963 long chomp_rslen = 0;
8965 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
8967 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
8968 if (rb_enc_is_newline(subend + n, pend, enc)) {
8969 if (eol == subend)
break;
8973 chomp_rslen = -rslen;
8977 if (!subptr) subptr = subend;
8981 }
while (subend < pend);
8983 if (rslen == 0) chomp_rslen = 0;
8985 subend - subptr + (chomp ? chomp_rslen : rslen));
8986 if (ENUM_ELEM(ary, line)) {
8987 str_mod_check(str, ptr, len);
8989 subptr = eol = NULL;
8994 rsptr = RSTRING_PTR(rs);
8995 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
8996 rb_enc_is_newline(rsptr, rsptr + RSTRING_LEN(rs), enc)) {
9004 rsptr = RSTRING_PTR(rs);
9005 rslen = RSTRING_LEN(rs);
9008 while (subptr < pend) {
9009 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9012 adjusted = rb_enc_right_char_head(subptr, hit, pend, enc);
9013 if (hit != adjusted) {
9017 subend = hit += rslen;
9020 subend = chomp_newline(subptr, subend, enc);
9027 if (ENUM_ELEM(ary, line)) {
9028 str_mod_check(str, ptr, len);
9033 if (subptr != pend) {
9036 pend = chomp_newline(subptr, pend, enc);
9038 else if (pend - subptr >= rslen &&
9039 memcmp(pend - rslen, rsptr, rslen) == 0) {
9044 ENUM_ELEM(ary, line);
9065rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9068 return rb_str_enumerate_lines(argc, argv, str, 0);
9081rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9083 VALUE ary = WANTARRAY(
"lines", 0);
9084 return rb_str_enumerate_lines(argc, argv, str, ary);
9098 for (i=0; i<RSTRING_LEN(str); i++) {
9099 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9117rb_str_each_byte(
VALUE str)
9120 return rb_str_enumerate_bytes(str, 0);
9132rb_str_bytes(
VALUE str)
9134 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9135 return rb_str_enumerate_bytes(str, ary);
9153 ptr = RSTRING_PTR(str);
9154 len = RSTRING_LEN(str);
9155 enc = rb_enc_get(str);
9158 for (i = 0; i < len; i += n) {
9159 n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
9164 for (i = 0; i < len; i += n) {
9165 n = rb_enc_mbclen(ptr + i, ptr + len, enc);
9186rb_str_each_char(
VALUE str)
9189 return rb_str_enumerate_chars(str, 0);
9201rb_str_chars(
VALUE str)
9204 return rb_str_enumerate_chars(str, ary);
9208rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9213 const char *ptr, *end;
9216 if (single_byte_optimizable(str))
9217 return rb_str_enumerate_bytes(str, ary);
9220 ptr = RSTRING_PTR(str);
9221 end = RSTRING_END(str);
9222 enc = STR_ENC_GET(str);
9225 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9246rb_str_each_codepoint(
VALUE str)
9249 return rb_str_enumerate_codepoints(str, 0);
9261rb_str_codepoints(
VALUE str)
9264 return rb_str_enumerate_codepoints(str, ary);
9270 int encidx = rb_enc_to_index(enc);
9271 regex_t *reg_grapheme_cluster = NULL;
9272 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9275 if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
9276 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
9278 if (!reg_grapheme_cluster) {
9279 const OnigUChar source_ascii[] =
"\\X";
9281 const OnigUChar *source = source_ascii;
9282 size_t source_len =
sizeof(source_ascii) - 1;
9284#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9285#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9286#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9287#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9288#define CASE_UTF(e) \
9289 case ENCINDEX_UTF_##e: { \
9290 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9291 source = source_UTF_##e; \
9292 source_len = sizeof(source_UTF_##e); \
9295 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9302 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9303 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9305 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9306 onig_error_code_to_str(message, r, &einfo);
9307 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9309 if (encidx == rb_utf8_encindex()) {
9310 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
9313 return reg_grapheme_cluster;
9319 size_t grapheme_cluster_count = 0;
9320 regex_t *reg_grapheme_cluster = NULL;
9322 const char *ptr, *end;
9324 if (!rb_enc_unicode_p(enc)) {
9328 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9329 ptr = RSTRING_PTR(str);
9330 end = RSTRING_END(str);
9333 OnigPosition len = onig_match(reg_grapheme_cluster,
9334 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9335 (
const OnigUChar *)ptr, NULL, 0);
9336 if (len <= 0)
break;
9337 grapheme_cluster_count++;
9341 return SIZET2NUM(grapheme_cluster_count);
9345rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9348 regex_t *reg_grapheme_cluster = NULL;
9350 const char *ptr0, *ptr, *end;
9352 if (!rb_enc_unicode_p(enc)) {
9353 return rb_str_enumerate_chars(str, ary);
9357 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9358 ptr0 = ptr = RSTRING_PTR(str);
9359 end = RSTRING_END(str);
9362 OnigPosition len = onig_match(reg_grapheme_cluster,
9363 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9364 (
const OnigUChar *)ptr, NULL, 0);
9365 if (len <= 0)
break;
9386rb_str_each_grapheme_cluster(
VALUE str)
9389 return rb_str_enumerate_grapheme_clusters(str, 0);
9401rb_str_grapheme_clusters(
VALUE str)
9404 return rb_str_enumerate_grapheme_clusters(str, ary);
9408chopped_length(
VALUE str)
9411 const char *p, *p2, *beg, *end;
9413 beg = RSTRING_PTR(str);
9414 end = beg + RSTRING_LEN(str);
9415 if (beg >= end)
return 0;
9416 p = rb_enc_prev_char(beg, end, end, enc);
9418 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9419 p2 = rb_enc_prev_char(beg, p, end, enc);
9420 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9436rb_str_chop_bang(
VALUE str)
9438 str_modify_keep_cr(str);
9439 if (RSTRING_LEN(str) > 0) {
9441 len = chopped_length(str);
9442 STR_SET_LEN(str, len);
9443 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
9462rb_str_chop(
VALUE str)
9468smart_chomp(
VALUE str,
const char *e,
const char *p)
9471 if (rb_enc_mbminlen(enc) > 1) {
9472 const char *pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
9473 if (rb_enc_is_newline(pp, e, enc)) {
9476 pp = e - rb_enc_mbminlen(enc);
9478 pp = rb_enc_left_char_head(p, pp, e, enc);
9479 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9487 if (--e > p && *(e-1) ==
'\r') {
9504 char *pp, *e, *rsptr;
9506 char *
const p = RSTRING_PTR(str);
9507 long len = RSTRING_LEN(str);
9509 if (len == 0)
return 0;
9512 return smart_chomp(str, e, p);
9515 enc = rb_enc_get(str);
9518 if (rb_enc_mbminlen(enc) > 1) {
9520 pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
9521 if (!rb_enc_is_newline(pp, e, enc))
break;
9523 pp -= rb_enc_mbminlen(enc);
9525 pp = rb_enc_left_char_head(p, pp, e, enc);
9526 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9533 while (e > p && *(e-1) ==
'\n') {
9535 if (e > p && *(e-1) ==
'\r')
9541 if (rslen > len)
return len;
9543 enc = rb_enc_get(rs);
9544 newline = rsptr[rslen-1];
9545 if (rslen == rb_enc_mbminlen(enc)) {
9547 if (newline ==
'\n')
9548 return smart_chomp(str, e, p);
9551 if (rb_enc_is_newline(rsptr, rsptr+rslen, enc))
9552 return smart_chomp(str, e, p);
9556 enc = rb_enc_check(str, rs);
9557 if (is_broken_string(rs)) {
9561 if (p[len-1] == newline &&
9563 memcmp(rsptr, pp, rslen) == 0)) {
9564 if (rb_enc_left_char_head(p, pp, e, enc) == pp)
9577chomp_rs(
int argc,
const VALUE *argv)
9579 rb_check_arity(argc, 0, 1);
9593 long olen = RSTRING_LEN(str);
9594 long len = chompped_length(str, rs);
9595 if (len >= olen)
return Qnil;
9596 str_modify_keep_cr(str);
9597 STR_SET_LEN(str, len);
9598 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
9615rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9618 str_modifiable(str);
9619 if (RSTRING_LEN(str) == 0)
return Qnil;
9620 rs = chomp_rs(argc, argv);
9622 return rb_str_chomp_string(str, rs);
9635rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9637 VALUE rs = chomp_rs(argc, argv);
9645 const char *
const start = s;
9647 if (!s || s >= e)
return 0;
9650 if (single_byte_optimizable(str)) {
9651 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9656 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9676rb_str_lstrip_bang(
VALUE str)
9682 str_modify_keep_cr(str);
9683 enc = STR_ENC_GET(str);
9685 loffset = lstrip_offset(str, start, start+olen, enc);
9687 long len = olen-loffset;
9688 s = start + loffset;
9689 memmove(start, s, len);
9690 STR_SET_LEN(str, len);
9691 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9714rb_str_lstrip(
VALUE str)
9719 loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
9720 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9729 rb_str_check_dummy_enc(enc);
9733 if (!s || s >= e)
return 0;
9737 if (single_byte_optimizable(str)) {
9739 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9744 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
9745 unsigned int c = rb_enc_codepoint(tp, e, enc);
9764rb_str_rstrip_bang(
VALUE str)
9770 str_modify_keep_cr(str);
9771 enc = STR_ENC_GET(str);
9773 roffset = rstrip_offset(str, start, start+olen, enc);
9775 long len = olen - roffset;
9777 STR_SET_LEN(str, len);
9778 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9801rb_str_rstrip(
VALUE str)
9807 enc = STR_ENC_GET(str);
9809 roffset = rstrip_offset(str, start, start+olen, enc);
9811 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9827rb_str_strip_bang(
VALUE str)
9830 long olen, loffset, roffset;
9833 str_modify_keep_cr(str);
9834 enc = STR_ENC_GET(str);
9836 loffset = lstrip_offset(str, start, start+olen, enc);
9837 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9839 if (loffset > 0 || roffset > 0) {
9840 long len = olen-roffset;
9843 memmove(start, start + loffset, len);
9845 STR_SET_LEN(str, len);
9846 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9869rb_str_strip(
VALUE str)
9872 long olen, loffset, roffset;
9876 loffset = lstrip_offset(str, start, start+olen, enc);
9877 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9879 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9884scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9886 VALUE result, match;
9889 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9893 end = pos + RSTRING_LEN(pat);
9906 if (RSTRING_LEN(str) > end)
9907 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
9908 RSTRING_END(str), enc);
9915 if (!regs || regs->num_regs == 1) {
9920 for (i=1; i < regs->num_regs; i++) {
9925 rb_ary_push(result, s);
9978 long last = -1, prev = 0;
9979 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
9981 pat = get_pat_quoted(pat, 1);
9982 mustnot_broken(str);
9984 VALUE ary = rb_ary_new();
9986 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
9989 rb_ary_push(ary, result);
9991 if (last >= 0) rb_pat_search(pat, str, last, 1);
9996 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10000 str_mod_check(str, p, len);
10002 if (last >= 0) rb_pat_search(pat, str, last, 1);
10026rb_str_hex(
VALUE str)
10028 return rb_str_to_inum(str, 16, FALSE);
10053rb_str_oct(
VALUE str)
10055 return rb_str_to_inum(str, -8, FALSE);
10058#ifndef HAVE_CRYPT_R
10064} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10067crypt_mutex_initialize(
void)
10138# define CRYPT_END() ALLOCV_END(databuf)
10140 extern char *crypt(
const char *,
const char *);
10141# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10144 const char *s, *saltp;
10147 char salt_8bit_clean[3];
10151 mustnot_wchar(str);
10152 mustnot_wchar(salt);
10154 saltp = RSTRING_PTR(salt);
10155 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10160 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10161 salt_8bit_clean[0] = saltp[0] & 0x7f;
10162 salt_8bit_clean[1] = saltp[1] & 0x7f;
10163 salt_8bit_clean[2] =
'\0';
10164 saltp = salt_8bit_clean;
10169# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10170 data->initialized = 0;
10172 res = crypt_r(s, saltp, data);
10174 crypt_mutex_initialize();
10176 res = crypt(s, saltp);
10202 c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
10217 char *ptr, *p, *pend;
10220 unsigned long sum0 = 0;
10222 if (rb_check_arity(argc, 0, 1) && (bits =
NUM2INT(argv[0])) < 0) {
10225 ptr = p = RSTRING_PTR(str);
10226 len = RSTRING_LEN(str);
10231 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10232 str_mod_check(str, ptr, len);
10235 sum0 += (
unsigned char)*p;
10241 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10246 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10247 sum0 &= (((
unsigned long)1)<<bits)-1;
10255 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10259 mod = rb_funcall(mod,
'-', 1,
INT2FIX(1));
10260 sum = rb_funcall(sum,
'&', 1, mod);
10267rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10271 long width, len, flen = 1, fclen = 1;
10274 const char *f =
" ";
10275 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10277 int singlebyte = 1, cr;
10280 rb_scan_args(argc, argv,
"11", &w, &pad);
10281 enc = STR_ENC_GET(str);
10282 termlen = rb_enc_mbminlen(enc);
10286 enc = rb_enc_check(str, pad);
10287 f = RSTRING_PTR(pad);
10288 flen = RSTRING_LEN(pad);
10289 fclen = str_strlen(pad, enc);
10290 singlebyte = single_byte_optimizable(pad);
10291 if (flen == 0 || fclen == 0) {
10295 len = str_strlen(str, enc);
10296 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10298 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10302 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10303 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10305 size = RSTRING_LEN(str);
10306 if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10307 (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10308 (len += llen2 + rlen2) >= LONG_MAX - size) {
10312 res = str_new0(
rb_cString, 0, len, termlen);
10313 p = RSTRING_PTR(res);
10315 memset(p, *f, llen);
10319 while (llen >= fclen) {
10325 memcpy(p, f, llen2);
10329 memcpy(p, RSTRING_PTR(str), size);
10332 memset(p, *f, rlen);
10336 while (rlen >= fclen) {
10342 memcpy(p, f, rlen2);
10346 TERM_FILL(p, termlen);
10347 STR_SET_LEN(res, p-RSTRING_PTR(res));
10348 rb_enc_associate(res, enc);
10370rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10372 return rb_str_justify(argc, argv, str,
'l');
10386rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10388 return rb_str_justify(argc, argv, str,
'r');
10403rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10405 return rb_str_justify(argc, argv, str,
'c');
10421 sep = get_pat_quoted(sep, 0);
10433 pos = rb_str_index(str, sep, 0);
10434 if (pos < 0)
goto failed;
10439 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10442 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10456 long pos = RSTRING_LEN(str);
10458 sep = get_pat_quoted(sep, 0);
10471 pos = rb_str_rindex(str, sep, pos);
10481 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10483 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10495rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10499 for (i=0; i<argc; i++) {
10500 VALUE tmp = argv[i];
10502 if (rb_reg_start_with_p(tmp, str))
10507 rb_enc_check(str, tmp);
10508 if (RSTRING_LEN(str) < RSTRING_LEN(tmp))
continue;
10509 if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
10525rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10531 for (i=0; i<argc; i++) {
10532 VALUE tmp = argv[i];
10535 enc = rb_enc_check(str, tmp);
10536 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10537 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10538 p = RSTRING_PTR(str);
10541 if (rb_enc_left_char_head(p, s, e, enc) != s)
10543 if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
10559deleted_prefix_length(
VALUE str,
VALUE prefix)
10561 char *strptr, *prefixptr;
10562 long olen, prefixlen;
10565 if (is_broken_string(prefix))
return 0;
10566 rb_enc_check(str, prefix);
10569 prefixlen = RSTRING_LEN(prefix);
10570 if (prefixlen <= 0)
return 0;
10571 olen = RSTRING_LEN(str);
10572 if (olen < prefixlen)
return 0;
10573 strptr = RSTRING_PTR(str);
10574 prefixptr = RSTRING_PTR(prefix);
10575 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10590rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10593 str_modify_keep_cr(str);
10595 prefixlen = deleted_prefix_length(str, prefix);
10596 if (prefixlen <= 0)
return Qnil;
10610rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10614 prefixlen = deleted_prefix_length(str, prefix);
10615 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10617 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
10630deleted_suffix_length(
VALUE str,
VALUE suffix)
10632 char *strptr, *suffixptr, *s;
10633 long olen, suffixlen;
10637 if (is_broken_string(suffix))
return 0;
10638 enc = rb_enc_check(str, suffix);
10641 suffixlen = RSTRING_LEN(suffix);
10642 if (suffixlen <= 0)
return 0;
10643 olen = RSTRING_LEN(str);
10644 if (olen < suffixlen)
return 0;
10645 strptr = RSTRING_PTR(str);
10646 suffixptr = RSTRING_PTR(suffix);
10647 s = strptr + olen - suffixlen;
10648 if (memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10649 if (rb_enc_left_char_head(strptr, s, strptr + olen, enc) != s)
return 0;
10664rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10666 long olen, suffixlen, len;
10667 str_modifiable(str);
10669 suffixlen = deleted_suffix_length(str, suffix);
10670 if (suffixlen <= 0)
return Qnil;
10672 olen = RSTRING_LEN(str);
10673 str_modify_keep_cr(str);
10674 len = olen - suffixlen;
10675 STR_SET_LEN(str, len);
10676 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
10692rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10696 suffixlen = deleted_suffix_length(str, suffix);
10697 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10699 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
10714 val = rb_fs_check(val);
10717 "value of %"PRIsVALUE
" must be String or Regexp",
10721 rb_warn_deprecated(
"`$;'", NULL);
10738 str_modifiable(str);
10739 rb_enc_associate(str, rb_to_encoding(enc));
10756 if (
FL_TEST(str, STR_NOEMBED)) {
10760 str2 = str_alloc_embed(
rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
10762 str_replace_shared_without_enc(str2, str);
10764 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
10797rb_str_valid_encoding_p(
VALUE str)
10817rb_str_is_ascii_only_p(
VALUE str)
10827 static const char ellipsis[] =
"...";
10828 const long ellipsislen =
sizeof(ellipsis) - 1;
10830 const long blen = RSTRING_LEN(str);
10831 const char *
const p = RSTRING_PTR(str), *e = p + blen;
10832 VALUE estr, ret = 0;
10835 if (len * rb_enc_mbminlen(enc) >= blen ||
10836 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
10839 else if (len <= ellipsislen ||
10840 !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
10841 if (rb_enc_asciicompat(enc)) {
10843 rb_enc_associate(ret, enc);
10850 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
10855 rb_enc_from_encoding(enc), 0,
Qnil);
10874 rb_enc_name(enc), rb_enc_name(e));
10893 if (enc == STR_ENC_GET(str)) {
10898 return enc_str_scrub(enc, str, repl, cr);
10906 const char *rep, *p, *e, *p1, *sp;
10919 if (!
NIL_P(repl)) {
10920 repl = str_compat_and_valid(repl, enc);
10923 if (rb_enc_dummy_p(enc)) {
10926 encidx = rb_enc_to_index(enc);
10928#define DEFAULT_REPLACE_CHAR(str) do { \
10929 static const char replace[sizeof(str)-1] = str; \
10930 rep = replace; replen = (int)sizeof(replace); \
10933 slen = RSTRING_LEN(str);
10934 p = RSTRING_PTR(str);
10935 e = RSTRING_END(str);
10939 if (rb_enc_asciicompat(enc)) {
10945 else if (!
NIL_P(repl)) {
10946 rep = RSTRING_PTR(repl);
10947 replen = RSTRING_LEN(repl);
10950 else if (encidx == rb_utf8_encindex()) {
10951 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
10955 DEFAULT_REPLACE_CHAR(
"?");
10960 p = search_nonascii(p, e);
10965 int ret = rb_enc_precise_mbclen(p, e, enc);
10978 long clen = rb_enc_mbmaxlen(enc);
10984 if (e - p < clen) clen = e - p;
10991 for (; clen > 1; clen--) {
10992 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11003 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11004 str_mod_check(str, sp, slen);
11005 repl = str_compat_and_valid(repl, enc);
11012 p = search_nonascii(p, e);
11038 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11039 str_mod_check(str, sp, slen);
11040 repl = str_compat_and_valid(repl, enc);
11049 long mbminlen = rb_enc_mbminlen(enc);
11053 else if (!
NIL_P(repl)) {
11054 rep = RSTRING_PTR(repl);
11055 replen = RSTRING_LEN(repl);
11057 else if (encidx == ENCINDEX_UTF_16BE) {
11058 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11060 else if (encidx == ENCINDEX_UTF_16LE) {
11061 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11063 else if (encidx == ENCINDEX_UTF_32BE) {
11064 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11066 else if (encidx == ENCINDEX_UTF_32LE) {
11067 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11070 DEFAULT_REPLACE_CHAR(
"?");
11074 int ret = rb_enc_precise_mbclen(p, e, enc);
11083 long clen = rb_enc_mbmaxlen(enc);
11087 if (e - p < clen) clen = e - p;
11088 if (clen <= mbminlen * 2) {
11093 for (; clen > mbminlen; clen-=mbminlen) {
11094 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11104 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11105 str_mod_check(str, sp, slen);
11106 repl = str_compat_and_valid(repl, enc);
11131 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11132 str_mod_check(str, sp, slen);
11133 repl = str_compat_and_valid(repl, enc);
11154 VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) :
Qnil;
11169str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11171 VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) :
Qnil;
11177static ID id_normalize;
11178static ID id_normalized_p;
11179static VALUE mUnicodeNormalize;
11182unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11184 static int UnicodeNormalizeRequired = 0;
11187 if (!UnicodeNormalizeRequired) {
11188 rb_require(
"unicode_normalize/normalize.rb");
11189 UnicodeNormalizeRequired = 1;
11192 if (rb_check_arity(argc, 0, 1)) argv2[1] = argv[0];
11193 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11230rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11232 return unicode_normalize_common(argc, argv, str, id_normalize);
11246rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11248 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11275rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11277 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11412#define sym_equal rb_obj_equal
11415sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11419 int c = rb_enc_precise_mbclen(s, send, enc);
11423 c = rb_enc_mbc_to_codepoint(s, send, enc);
11424 if (!rb_enc_isprint(c, enc))
return FALSE;
11431rb_str_symname_p(
VALUE sym)
11436 rb_encoding *resenc = rb_default_internal_encoding();
11438 if (resenc == NULL) resenc = rb_default_external_encoding();
11439 enc = STR_ENC_GET(sym);
11440 ptr = RSTRING_PTR(sym);
11441 len = RSTRING_LEN(sym);
11442 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (
long)strlen(ptr) ||
11450rb_str_quote_unprintable(
VALUE str)
11458 resenc = rb_default_internal_encoding();
11459 if (resenc == NULL) resenc = rb_default_external_encoding();
11460 enc = STR_ENC_GET(str);
11461 ptr = RSTRING_PTR(str);
11462 len = RSTRING_LEN(str);
11463 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11464 !sym_printable(ptr, ptr + len, enc)) {
11465 return rb_str_escape(str);
11470MJIT_FUNC_EXPORTED
VALUE
11471rb_id_quote_unprintable(
ID id)
11473 VALUE str = rb_id2str(
id);
11474 if (!rb_str_symname_p(str)) {
11475 return rb_str_escape(str);
11493sym_inspect(
VALUE sym)
11500 if (!rb_str_symname_p(str)) {
11502 len = RSTRING_LEN(str);
11504 dest = RSTRING_PTR(str);
11505 memmove(dest + 1, dest, len);
11510 str = rb_enc_str_new(0, len + 1, enc);
11511 dest = RSTRING_PTR(str);
11512 memcpy(dest + 1, ptr, len);
11537MJIT_FUNC_EXPORTED
VALUE
11538rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11642 return rb_str_match(
rb_sym2str(sym), other);
11657sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11659 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11672sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11674 return rb_str_match_m_p(argc, argv, sym);
11692 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11706sym_length(
VALUE sym)
11720sym_empty(
VALUE sym)
11754sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11770sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11786sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11800sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11802 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11815sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
11817 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11829sym_encoding(
VALUE sym)
11835string_for_symbol(
VALUE name)
11854 name = string_for_symbol(name);
11864 name = string_for_symbol(name);
11888 return rb_fstring(str);
11895 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
11907 if (UNLIKELY(rb_enc_autoload_p(enc))) {
11908 rb_enc_autoload(enc);
11912 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
11925 assert(rb_vm_fstring_table());
11926 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
11983 sym_ascii =
ID2SYM(rb_intern_const(
"ascii"));
11984 sym_turkic =
ID2SYM(rb_intern_const(
"turkic"));
11985 sym_lithuanian =
ID2SYM(rb_intern_const(
"lithuanian"));
11986 sym_fold =
ID2SYM(rb_intern_const(
"fold"));
12079 id_normalize = rb_intern_const(
"normalize");
12080 id_normalized_p = rb_intern_const(
"normalized?");
12089 rb_gc_register_address(&rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_cObject
Documented in include/ruby/internal/globals.h.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cSymbol
Sumbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
@ RSTRING_EMBED_LEN_MAX
Max possible number of characters that can be embedded.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
char ary[RSTRING_EMBED_LEN_MAX+1]
When a string is short enough, it uses this area to store the contents themselves.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t VALUE
Type that represents a Ruby object.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.