Class: String

Inherits:
Object show all
Includes:
Comparable
Defined in:
string.c

Direct Known Subclasses

Warning::buffer

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Comparable

#<, #<=, #>, #>=, #between?, #clamp

Constructor Details

#new(string = '', **opts) ⇒ Object

:include: doc/string/new.rdoc



1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
# File 'string.c', line 1828

static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
    static ID keyword_ids[2];
    VALUE orig, opt, venc, vcapa;
    VALUE kwargs[2];
    rb_encoding *enc = 0;
    int n;

    if (!keyword_ids[0]) {
        keyword_ids[0] = rb_id_encoding();
        CONST_ID(keyword_ids[1], "capacity");
    }

    n = rb_scan_args(argc, argv, "01:", &orig, &opt);
    if (!NIL_P(opt)) {
        rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
        venc = kwargs[0];
        vcapa = kwargs[1];
        if (!UNDEF_P(venc) && !NIL_P(venc)) {
            enc = rb_to_encoding(venc);
        }
        if (!UNDEF_P(vcapa) && !NIL_P(vcapa)) {
            long capa = NUM2LONG(vcapa);
            long len = 0;
            int termlen = enc ? rb_enc_mbminlen(enc) : 1;

            if (capa < STR_BUF_MIN_SIZE) {
                capa = STR_BUF_MIN_SIZE;
            }
            if (n == 1) {
                StringValue(orig);
                len = RSTRING_LEN(orig);
                if (capa < len) {
                    capa = len;
                }
                if (orig == str) n = 0;
            }
            str_modifiable(str);
            if (STR_EMBED_P(str) || FL_TEST(str, STR_SHARED|STR_NOFREE)) {
                /* make noembed always */
                const size_t size = (size_t)capa + termlen;
                const char *const old_ptr = RSTRING_PTR(str);
                const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
                char *new_ptr = ALLOC_N(char, size);
                if (STR_EMBED_P(str)) RUBY_ASSERT(osize <= str_embed_capa(str));
                memcpy(new_ptr, old_ptr, osize < size ? osize : size);
                FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
                RSTRING(str)->as.heap.ptr = new_ptr;
            }
            else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) {
                SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
                        (size_t)capa + termlen, STR_HEAP_SIZE(str));
            }
            STR_SET_LEN(str, len);
            TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
            if (n == 1) {
                memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
                rb_enc_cr_str_exact_copy(str, orig);
            }
            FL_SET(str, STR_NOEMBED);
            RSTRING(str)->as.heap.aux.capa = capa;
        }
        else if (n == 1) {
            rb_str_replace(str, orig);
        }
        if (enc) {
            rb_enc_associate(str, enc);
            ENC_CODERANGE_CLEAR(str);
        }
    }
    else if (n == 1) {
        rb_str_replace(str, orig);
    }
    return str;
}

Class Method Details

.new(*args) ⇒ Object

:nodoc:



1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
# File 'string.c', line 1906

static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
    if (klass != rb_cString) {
        return rb_class_new_instance_pass_kw(argc, argv, klass);
    }

    static ID keyword_ids[2];
    VALUE orig, opt, encoding = Qnil, capacity = Qnil;
    VALUE kwargs[2];
    rb_encoding *enc = NULL;

    int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
    if (NIL_P(opt)) {
        return rb_class_new_instance_pass_kw(argc, argv, klass);
    }

    keyword_ids[0] = rb_id_encoding();
    CONST_ID(keyword_ids[1], "capacity");
    rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
    encoding = kwargs[0];
    capacity = kwargs[1];

    int termlen = 1;

    if (n == 1) {
        orig = StringValue(orig);
    }
    else {
        orig = Qnil;
    }

    if (UNDEF_P(encoding)) {
        if (!NIL_P(orig)) {
            encoding = rb_obj_encoding(orig);
        }
    }

    if (!UNDEF_P(encoding)) {
        enc = rb_to_encoding(encoding);
        termlen = rb_enc_mbminlen(enc);
    }

    // If capacity is nil, we're basically just duping `orig`.
    if (UNDEF_P(capacity)) {
        if (NIL_P(orig)) {
            VALUE empty_str = str_new(klass, "", 0);
            if (enc) {
                rb_enc_associate(empty_str, enc);
            }
            return empty_str;
        }
        VALUE copy = str_duplicate(klass, orig);
        rb_enc_associate(copy, enc);
        ENC_CODERANGE_CLEAR(copy);
        return copy;
    }

    long capa = 0;
    capa = NUM2LONG(capacity);
    if (capa < 0) {
        capa = 0;
    }

    if (!NIL_P(orig)) {
        long orig_capa = rb_str_capacity(orig);
        if (orig_capa > capa) {
            capa = orig_capa;
        }
    }

    VALUE str = str_new0(klass, NULL, capa, termlen);
    STR_SET_LEN(str, 0);
    TERM_FILL(RSTRING_PTR(str), termlen);

    if (enc) {
        rb_enc_associate(str, enc);
    }

    if (!NIL_P(orig)) {
        rb_str_buf_append(str, orig);
    }

    return str;
}

.try_convert(object) ⇒ Object?

If object is a String object, returns object.

Otherwise if object responds to :to_str, calls object.to_str and returns the result.

Returns nil if object does not respond to :to_str.

Raises an exception unless object.to_str returns a String object.

Returns:



2704
2705
2706
2707
2708
# File 'string.c', line 2704

static VALUE
rb_str_s_try_convert(VALUE dummy, VALUE str)
{
    return rb_check_string_type(str);
}

Instance Method Details

#%(object) ⇒ Object

Returns the result of formatting object into the format specification self (see Kernel#sprintf for formatting details):

"%05d" % 123 # => "00123"

If self contains multiple substitutions, object must be an Array or Hash containing the values to be substituted:

"%-5s: %016x" % [ "ID", self.object_id ] # => "ID   : 00002b054ec93168"
"foo = %{foo}" % {foo: 'bar'} # => "foo = bar"
"foo = %{foo}, baz = %{baz}" % {foo: 'bar', baz: 'bat'} # => "foo = bar, baz = bat"


2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
# File 'string.c', line 2403

static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
    VALUE tmp = rb_check_array_type(arg);

    if (!NIL_P(tmp)) {
        return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
    }
    return rb_str_format(1, &arg, str);
}

#*(integer) ⇒ Object

Returns a new String containing integer copies of self:

"Ho! " * 3 # => "Ho! Ho! Ho! "
"Ho! " * 0 # => ""


2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
# File 'string.c', line 2327

VALUE
rb_str_times(VALUE str, VALUE times)
{
    VALUE str2;
    long n, len;
    char *ptr2;
    int termlen;

    if (times == INT2FIX(1)) {
        return str_duplicate(rb_cString, str);
    }
    if (times == INT2FIX(0)) {
        str2 = str_alloc_embed(rb_cString, 0);
        rb_enc_copy(str2, str);
        return str2;
    }
    len = NUM2LONG(times);
    if (len < 0) {
        rb_raise(rb_eArgError, "negative argument");
    }
    if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
        if (STR_EMBEDDABLE_P(len, 1)) {
            str2 = str_alloc_embed(rb_cString, len + 1);
            memset(RSTRING_PTR(str2), 0, len + 1);
        }
        else {
            str2 = str_alloc_heap(rb_cString);
            RSTRING(str2)->as.heap.aux.capa = len;
            RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
        }
        STR_SET_LEN(str2, len);
        rb_enc_copy(str2, str);
        return str2;
    }
    if (len && LONG_MAX/len <  RSTRING_LEN(str)) {
        rb_raise(rb_eArgError, "argument too big");
    }

    len *= RSTRING_LEN(str);
    termlen = TERM_LEN(str);
    str2 = str_new0(rb_cString, 0, len, termlen);
    ptr2 = RSTRING_PTR(str2);
    if (len) {
        n = RSTRING_LEN(str);
        memcpy(ptr2, RSTRING_PTR(str), n);
        while (n <= len/2) {
            memcpy(ptr2 + n, ptr2, n);
            n *= 2;
        }
        memcpy(ptr2 + n, ptr2, len-n);
    }
    STR_SET_LEN(str2, len);
    TERM_FILL(&ptr2[len], termlen);
    rb_enc_cr_str_copy_for_substr(str2, str);

    return str2;
}

#+(other_string) ⇒ Object

Returns a new String containing other_string concatenated to self:

"Hello from " + self.to_s # => "Hello from main"


2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
# File 'string.c', line 2255

VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
    VALUE str3;
    rb_encoding *enc;
    char *ptr1, *ptr2, *ptr3;
    long len1, len2;
    int termlen;

    StringValue(str2);
    enc = rb_enc_check_str(str1, str2);
    RSTRING_GETMEM(str1, ptr1, len1);
    RSTRING_GETMEM(str2, ptr2, len2);
    termlen = rb_enc_mbminlen(enc);
    if (len1 > LONG_MAX - len2) {
        rb_raise(rb_eArgError, "string size too big");
    }
    str3 = str_new0(rb_cString, 0, len1+len2, termlen);
    ptr3 = RSTRING_PTR(str3);
    memcpy(ptr3, ptr1, len1);
    memcpy(ptr3+len1, ptr2, len2);
    TERM_FILL(&ptr3[len1+len2], termlen);

    ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
                           ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
    RB_GC_GUARD(str1);
    RB_GC_GUARD(str2);
    return str3;
}

#+self

Returns self if self is not frozen.

Otherwise returns self.dup, which is not frozen.

Returns:

  • (self)


3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
# File 'string.c', line 3021

static VALUE
str_uplus(VALUE str)
{
    if (OBJ_FROZEN(str)) {
        return rb_str_dup(str);
    }
    else {
        return str;
    }
}

#-Object #dedupObject Also known as: dedup

Returns a frozen, possibly pre-existing copy of the string.

The returned String will be deduplicated as long as it does not have any instance variables set on it and is not a String subclass.

Note that -string variant is more convenient for defining constants:

FILENAME = -'config/database.yml'

while dedup is better suitable for using the method in chains of calculations:

@url_list.concat(urls.map(&:dedup))


3053
3054
3055
3056
3057
3058
3059
3060
# File 'string.c', line 3053

static VALUE
str_uminus(VALUE str)
{
    if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) {
        str = rb_str_dup(str);
    }
    return rb_fstring(str);
}

#<<(object) ⇒ String

Concatenates object to self and returns self:

s = 'foo'
s << 'bar' # => "foobar"
s          # => "foobar"

If object is an Integer, the value is considered a codepoint and converted to a character before concatenation:

s = 'foo'
s << 33 # => "foo!"

Related: String#concat, which takes multiple arguments.

Returns:



3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
# File 'string.c', line 3505

VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
    unsigned int code;
    rb_encoding *enc = STR_ENC_GET(str1);
    int encidx;

    if (RB_INTEGER_TYPE_P(str2)) {
        if (rb_num_to_uint(str2, &code) == 0) {
        }
        else if (FIXNUM_P(str2)) {
            rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
        }
        else {
            rb_raise(rb_eRangeError, "bignum out of char range");
        }
    }
    else {
        return rb_str_append(str1, str2);
    }

    encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
    if (encidx >= 0) {
        char buf[1];
        buf[0] = (char)code;
        rb_str_cat(str1, buf, 1);
        if (encidx != rb_enc_to_index(enc)) {
            rb_enc_associate_index(str1, encidx);
            ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
        }
    }
    else {
        long pos = RSTRING_LEN(str1);
        int cr = ENC_CODERANGE(str1);
        int len;
        char *buf;

        switch (len = rb_enc_codelen(code, enc)) {
          case ONIGERR_INVALID_CODE_POINT_VALUE:
            rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
            break;
          case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
          case 0:
            rb_raise(rb_eRangeError, "%u out of char range", code);
            break;
        }
        buf = ALLOCA_N(char, len + 1);
        rb_enc_mbcput(code, buf, enc);
        if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
            rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
        }
        rb_str_resize(str1, pos+len);
        memcpy(RSTRING_PTR(str1) + pos, buf, len);
        if (cr == ENC_CODERANGE_7BIT && code > 127) {
            cr = ENC_CODERANGE_VALID;
        }
        else if (cr == ENC_CODERANGE_BROKEN) {
            cr = ENC_CODERANGE_UNKNOWN;
        }
        ENC_CODERANGE_SET(str1, cr);
    }
    return str1;
}

#<=>(other_string) ⇒ -1, ...

Compares self and other_string, returning:

  • -1 if other_string is larger.

  • 0 if the two are equal.

  • 1 if other_string is smaller.

  • nil if the two are incomparable.

Examples:

'foo' <=> 'foo' # => 0
'foo' <=> 'food' # => -1
'food' <=> 'foo' # => 1
'FOO' <=> 'foo' # => -1
'foo' <=> 'FOO' # => 1
'foo' <=> 1 # => nil

Returns:

  • (-1, 0, 1, nil)


3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
# File 'string.c', line 3797

static VALUE
rb_str_cmp_m(VALUE str1, VALUE str2)
{
    int result;
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return rb_invcmp(str1, str2);
    }
    result = rb_str_cmp(str1, s);
    return INT2FIX(result);
}

#==(object) ⇒ Boolean #===(object) ⇒ Boolean

Returns true if object has the same length and content; as self; false otherwise:

s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false

Returns false if the two strings’ encodings are not compatible:

"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false

If object is not an instance of String but responds to to_str, then the two strings are compared using object.==.

Overloads:

  • #==(object) ⇒ Boolean

    Returns:

    • (Boolean)
  • #===(object) ⇒ Boolean

    Returns:

    • (Boolean)


3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
# File 'string.c', line 3736

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
        if (!rb_respond_to(str2, idTo_str)) {
            return Qfalse;
        }
        return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#==(object) ⇒ Boolean #===(object) ⇒ Boolean

Returns true if object has the same length and content; as self; false otherwise:

s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false

Returns false if the two strings’ encodings are not compatible:

"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false

If object is not an instance of String but responds to to_str, then the two strings are compared using object.==.

Overloads:

  • #==(object) ⇒ Boolean

    Returns:

    • (Boolean)
  • #===(object) ⇒ Boolean

    Returns:

    • (Boolean)


3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
# File 'string.c', line 3736

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
        if (!rb_respond_to(str2, idTo_str)) {
            return Qfalse;
        }
        return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#=~(regexp) ⇒ Integer? #=~(object) ⇒ Integer?

Returns the Integer index of the first substring that matches the given regexp, or nil if no match found:

'foo' =~ /f/ # => 0
'foo' =~ /o/ # => 1
'foo' =~ /x/ # => nil

Note: also updates Regexp@Global+Variables.

If the given object is not a Regexp, returns the value returned by object =~ self.

Note that string =~ regexp is different from regexp =~ string (see Regexp#=~):

number= nil
"no. 9" =~ /(?<number>\d+)/
number # => nil (not assigned)
/(?<number>\d+)/ =~ "no. 9"
number #=> "9"

Overloads:



4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
# File 'string.c', line 4517

static VALUE
rb_str_match(VALUE x, VALUE y)
{
    switch (OBJ_BUILTIN_TYPE(y)) {
      case T_STRING:
        rb_raise(rb_eTypeError, "type mismatch: String given");

      case T_REGEXP:
        return rb_reg_match(y, x);

      default:
        return rb_funcall(y, idEqTilde, 1, x);
    }
}

#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil

Returns the substring of self specified by the arguments. See examples at String Slices.

Overloads:

  • #[](index) ⇒ nil

    Returns:

    • (nil)
  • #[](start, length) ⇒ nil

    Returns:

    • (nil)
  • #[](range) ⇒ nil

    Returns:

    • (nil)
  • #[](regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #[](substring) ⇒ nil

    Returns:

    • (nil)


5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
# File 'string.c', line 5273

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            return rb_str_subpat(str, argv[0], argv[1]);
        }
        else {
            long beg = NUM2LONG(argv[0]);
            long len = NUM2LONG(argv[1]);
            return rb_str_substr(str, beg, len);
        }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#[]=(index) ⇒ Object #[]=(start, length) ⇒ Object #[]=(range) ⇒ Object #[]=(regexp, capture = 0) ⇒ Object #[]=(substring) ⇒ Object

Replaces all, some, or none of the contents of self; returns new_string. See String Slices.

A few examples:

s = 'foo'
s[2] = 'rtune'     # => "rtune"
s                  # => "fortune"
s[1, 5] = 'init'   # => "init"
s                  # => "finite"
s[3..4] = 'al'     # => "al"
s                  # => "finale"
s[/e$/] = 'ly'     # => "ly"
s                  # => "finally"
s['lly'] = 'ncial' # => "ncial"
s                  # => "financial"


5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
# File 'string.c', line 5510

static VALUE
rb_str_aset_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 3) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
        }
        else {
            rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
        }
        return argv[2];
    }
    rb_check_arity(argc, 2, 3);
    return rb_str_aset(str, argv[0], argv[1]);
}

#ascii_only?Boolean

Returns true if self contains only ASCII characters, false otherwise:

'abc'.ascii_only?         # => true
"abc\u{6666}".ascii_only? # => false

Returns:

  • (Boolean)


11010
11011
11012
11013
11014
11015
11016
# File 'string.c', line 11010

static VALUE
rb_str_is_ascii_only_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return RBOOL(cr == ENC_CODERANGE_7BIT);
}

#bString

:include: doc/string/b.rdoc

Returns:



10946
10947
10948
10949
10950
10951
10952
10953
10954
10955
10956
10957
10958
10959
10960
10961
10962
10963
10964
10965
10966
10967
10968
10969
10970
10971
10972
10973
10974
10975
10976
10977
# File 'string.c', line 10946

static VALUE
rb_str_b(VALUE str)
{
    VALUE str2;
    if (STR_EMBED_P(str)) {
        str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
    }
    else {
        str2 = str_alloc_heap(rb_cString);
    }
    str_replace_shared_without_enc(str2, str);

    if (rb_enc_asciicompat(STR_ENC_GET(str))) {
        // BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
        // If we know the receiver's code range then we know the result's code range.
        int cr = ENC_CODERANGE(str);
        switch (cr) {
          case ENC_CODERANGE_7BIT:
            ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
            break;
          case ENC_CODERANGE_BROKEN:
          case ENC_CODERANGE_VALID:
            ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
            break;
          default:
            ENC_CODERANGE_CLEAR(str2);
            break;
        }
    }

    return str2;
}

#byteindex(substring, offset = 0) ⇒ Integer? #byteindex(regexp, offset = 0) ⇒ Integer?

Returns the Integer byte-based index of the first occurrence of the given substring, or nil if none found:

'foo'.byteindex('f') # => 0
'foo'.byteindex('o') # => 1
'foo'.byteindex('oo') # => 1
'foo'.byteindex('ooo') # => nil

Returns the Integer byte-based index of the first match for the given Regexp regexp, or nil if none found:

'foo'.byteindex(/f/) # => 0
'foo'.byteindex(/o/) # => 1
'foo'.byteindex(/oo/) # => 1
'foo'.byteindex(/ooo/) # => nil

Integer argument offset, if given, specifies the byte-based position in the string to begin the search:

'foo'.byteindex('o', 1) # => 1
'foo'.byteindex('o', 2) # => 2
'foo'.byteindex('o', 3) # => nil

If offset is negative, counts backward from the end of self:

'foo'.byteindex('o', -1) # => 2
'foo'.byteindex('o', -2) # => 1
'foo'.byteindex('o', -3) # => 1
'foo'.byteindex('o', -4) # => nil

If offset does not land on character (codepoint) boundary, IndexError is raised.

Related: String#index, String#byterindex.

Overloads:

  • #byteindex(substring, offset = 0) ⇒ Integer?

    Returns:

  • #byteindex(regexp, offset = 0) ⇒ Integer?

    Returns:



4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
# File 'string.c', line 4129

static VALUE
rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    long pos;

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        long slen = RSTRING_LEN(str);
        pos = NUM2LONG(initpos);
        if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
    }
    else {
        pos = 0;
    }

    str_ensure_byte_pos(str, pos);

    if (RB_TYPE_P(sub, T_REGEXP)) {
        if (rb_reg_search(sub, str, pos, 0) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = BEG(0);
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_byteindex(str, sub, pos);
        if (pos >= 0) return LONG2NUM(pos);
    }
    return Qnil;
}

#byterindex(substring, offset = self.bytesize) ⇒ Integer? #byterindex(regexp, offset = self.bytesize) ⇒ Integer?

Returns the Integer byte-based index of the last occurrence of the given substring, or nil if none found:

'foo'.byterindex('f') # => 0
'foo'.byterindex('o') # => 2
'foo'.byterindex('oo') # => 1
'foo'.byterindex('ooo') # => nil

Returns the Integer byte-based index of the last match for the given Regexp regexp, or nil if none found:

'foo'.byterindex(/f/) # => 0
'foo'.byterindex(/o/) # => 2
'foo'.byterindex(/oo/) # => 1
'foo'.byterindex(/ooo/) # => nil

The last match means starting at the possible last position, not the last of longest matches.

'foo'.byterindex(/o+/) # => 2
$~ #=> #<MatchData "o">

To get the last longest match, needs to combine with negative lookbehind.

'foo'.byterindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">

Or String#byteindex with negative lookforward.

'foo'.byteindex(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">

Integer argument offset, if given and non-negative, specifies the maximum starting byte-based position in the

string to _end_ the search:

 'foo'.byterindex('o', 0) # => nil
 'foo'.byterindex('o', 1) # => 1
 'foo'.byterindex('o', 2) # => 2
 'foo'.byterindex('o', 3) # => 2

If offset is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset:

'foo'.byterindex('o', -1) # => 2
'foo'.byterindex('o', -2) # => 1
'foo'.byterindex('o', -3) # => nil
'foo'.byterindex('o', -4) # => nil

If offset does not land on character (codepoint) boundary, IndexError is raised.

Related: String#byteindex.

Overloads:

  • #byterindex(substring, offset = self.bytesize) ⇒ Integer?

    Returns:

  • #byterindex(regexp, offset = self.bytesize) ⇒ Integer?

    Returns:



4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
# File 'string.c', line 4450

static VALUE
rb_str_byterindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    long pos, len = RSTRING_LEN(str);

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        pos = NUM2LONG(initpos);
        if (pos < 0 && (pos += len) < 0) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
        if (pos > len) pos = len;
    }
    else {
        pos = len;
    }

    str_ensure_byte_pos(str, pos);

    if (RB_TYPE_P(sub, T_REGEXP)) {
        if (rb_reg_search(sub, str, pos, 1) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = BEG(0);
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_byterindex(str, sub, pos);
        if (pos >= 0) return LONG2NUM(pos);
    }
    return Qnil;
}

#bytesObject

:include: doc/string/bytes.rdoc



9244
9245
9246
9247
9248
9249
# File 'string.c', line 9244

static VALUE
rb_str_bytes(VALUE str)
{
    VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str));
    return rb_str_enumerate_bytes(str, ary);
}

#bytesizeInteger

:include: doc/string/bytesize.rdoc

Returns:



2221
2222
2223
2224
2225
# File 'string.c', line 2221

VALUE
rb_str_bytesize(VALUE str)
{
    return LONG2NUM(RSTRING_LEN(str));
}

#byteslice(index, length = 1) ⇒ String? #byteslice(range) ⇒ String?

Returns a substring of self, or nil if the substring cannot be constructed.

With integer arguments index and length given, returns the substring beginning at the given index of the given length (if possible), or nil if length is negative or index falls outside of self:

s = '0123456789' # => "0123456789"
s.byteslice(2)   # => "2"
s.byteslice(200) # => nil
s.byteslice(4, 3)  # => "456"
s.byteslice(4, 30) # => "456789"
s.byteslice(4, -1) # => nil
s.byteslice(40, 2) # => nil

In either case above, counts backwards from the end of self if index is negative:

s = '0123456789'   # => "0123456789"
s.byteslice(-4)    # => "6"
s.byteslice(-4, 3) # => "678"

With Range argument range given, returns byteslice(range.begin, range.size):

s = '0123456789'    # => "0123456789"
s.byteslice(4..6)   # => "456"
s.byteslice(-6..-4) # => "456"
s.byteslice(5..2)   # => "" # range.size is zero.
s.byteslice(40..42) # => nil

In all cases, a returned string has the same encoding as self:

s.encoding              # => #<Encoding:UTF-8>
s.byteslice(4).encoding # => #<Encoding:UTF-8>

Overloads:



6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
# File 'string.c', line 6324

static VALUE
rb_str_byteslice(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        long beg = NUM2LONG(argv[0]);
        long len = NUM2LONG(argv[1]);
        return str_byte_substr(str, beg, len, TRUE);
    }
    rb_check_arity(argc, 1, 2);
    return str_byte_aref(str, argv[0]);
}

#bytesplice(index, length, str) ⇒ String #bytesplice(index, length, str, str_index, str_length) ⇒ String #bytesplice(range, str) ⇒ String #bytesplice(range, str, str_range) ⇒ String

Replaces some or all of the content of self with str, and returns self. The portion of the string affected is determined using the same criteria as String#byteslice, except that length cannot be omitted. If the replacement string is not the same length as the text it is replacing, the string will be adjusted accordingly.

If str_index and str_length, or str_range are given, the content of self is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); however the substring of str is not allocated as a new string.

The form that take an Integer will raise an IndexError if the value is out of range; the Range form will raise a RangeError. If the beginning or ending offset does not land on character (codepoint) boundary, an IndexError will be raised.

Overloads:

  • #bytesplice(index, length, str) ⇒ String

    Returns:

  • #bytesplice(index, length, str, str_index, str_length) ⇒ String

    Returns:

  • #bytesplice(range, str) ⇒ String

    Returns:

  • #bytesplice(range, str, str_range) ⇒ String

    Returns:



6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
# File 'string.c', line 6379

static VALUE
rb_str_bytesplice(int argc, VALUE *argv, VALUE str)
{
    long beg, len, vbeg, vlen;
    VALUE val;
    rb_encoding *enc;
    int cr;

    rb_check_arity(argc, 2, 5);
    if (!(argc == 2 || argc == 3 || argc == 5)) {
        rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
    }
    if (argc == 2 || (argc == 3 && !RB_INTEGER_TYPE_P(argv[0]))) {
        if (!rb_range_beg_len(argv[0], &beg, &len, RSTRING_LEN(str), 2)) {
            rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
                     rb_builtin_class_name(argv[0]));
        }
        val = argv[1];
        StringValue(val);
        if (argc == 2) {
            /* bytesplice(range, str) */
            vbeg = 0;
            vlen = RSTRING_LEN(val);
        }
        else {
            /* bytesplice(range, str, str_range) */
            if (!rb_range_beg_len(argv[2], &vbeg, &vlen, RSTRING_LEN(val), 2)) {
                rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
                         rb_builtin_class_name(argv[2]));
            }
        }
    }
    else {
        beg = NUM2LONG(argv[0]);
        len = NUM2LONG(argv[1]);
        val = argv[2];
        StringValue(val);
        if (argc == 3) {
            /* bytesplice(index, length, str) */
            vbeg = 0;
            vlen = RSTRING_LEN(val);
        }
        else {
            /* bytesplice(index, length, str, str_index, str_length) */
            vbeg = NUM2LONG(argv[3]);
            vlen = NUM2LONG(argv[4]);
        }
    }
    str_check_beg_len(str, &beg, &len);
    str_check_beg_len(val, &vbeg, &vlen);
    enc = rb_enc_check(str, val);
    str_modify_keep_cr(str);
    rb_str_update_1(str, beg, len, val, vbeg, vlen);
    rb_enc_associate(str, enc);
    cr = ENC_CODERANGE_AND(ENC_CODERANGE(str), ENC_CODERANGE(val));
    if (cr != ENC_CODERANGE_BROKEN)
        ENC_CODERANGE_SET(str, cr);
    return str;
}

#capitalize(*options) ⇒ String

Returns a string containing the characters in self; the first character is upcased; the remaining characters are downcased:

s = 'hello World!' # => "hello World!"
s.capitalize       # => "Hello world!"

The casing may be affected by the given options; see Case Mapping.

Related: String#capitalize!.

Returns:



7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
# File 'string.c', line 7754

static VALUE
rb_str_capitalize(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#capitalize!(*options) ⇒ self?

Upcases the first character in self; downcases the remaining characters; returns self if any changes were made, nil otherwise:

s = 'hello World!' # => "hello World!"
s.capitalize!      # => "Hello world!"
s                  # => "Hello world!"
s.capitalize!      # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#capitalize.

Returns:

  • (self, nil)


7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
# File 'string.c', line 7716

static VALUE
rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#casecmp(other_string) ⇒ -1, ...

Compares self.downcase and other_string.downcase; returns:

  • -1 if other_string.downcase is larger.

  • 0 if the two are equal.

  • 1 if other_string.downcase is smaller.

  • nil if the two are incomparable.

Examples:

'foo'.casecmp('foo') # => 0
'foo'.casecmp('food') # => -1
'food'.casecmp('foo') # => 1
'FOO'.casecmp('foo') # => 0
'foo'.casecmp('FOO') # => 0
'foo'.casecmp(1) # => nil

See Case Mapping.

Related: String#casecmp?.

Returns:

  • (-1, 0, 1, nil)


3838
3839
3840
3841
3842
3843
3844
3845
3846
# File 'string.c', line 3838

static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return Qnil;
    }
    return str_casecmp(str1, s);
}

#casecmp?(other_string) ⇒ true, ...

Returns true if self and other_string are equal after Unicode case folding, otherwise false:

'foo'.casecmp?('foo') # => true
'foo'.casecmp?('food') # => false
'food'.casecmp?('foo') # => false
'FOO'.casecmp?('foo') # => true
'foo'.casecmp?('FOO') # => true

Returns nil if the two values are incomparable:

'foo'.casecmp?(1) # => nil

See Case Mapping.

Related: String#casecmp.

Returns:

  • (true, false, nil)


3928
3929
3930
3931
3932
3933
3934
3935
3936
# File 'string.c', line 3928

static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return Qnil;
    }
    return str_casecmp_p(str1, s);
}

#center(size, pad_string = ' ') ⇒ Object

:include: doc/string/center.rdoc

Related: String#ljust, String#rjust.



10551
10552
10553
10554
10555
# File 'string.c', line 10551

static VALUE
rb_str_center(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'c');
}

#charsObject

:include: doc/string/chars.rdoc



9313
9314
9315
9316
9317
9318
# File 'string.c', line 9313

static VALUE
rb_str_chars(VALUE str)
{
    VALUE ary = WANTARRAY("chars", rb_str_strlen(str));
    return rb_str_enumerate_chars(str, ary);
}

#chomp(line_sep = $/) ⇒ Object

:include: doc/string/chomp.rdoc



9777
9778
9779
9780
9781
9782
9783
# File 'string.c', line 9777

static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
    VALUE rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, 0, chompped_length(str, rs));
}

#chomp!(line_sep = $/) ⇒ self?

Like String#chomp, but modifies self in place; returns nil if no modification made, self otherwise.

Returns:

  • (self, nil)


9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
# File 'string.c', line 9757

static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE rs;
    str_modifiable(str);
    if (RSTRING_LEN(str) == 0 && argc < 2) return Qnil;
    rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return Qnil;
    return rb_str_chomp_string(str, rs);
}

#chopObject

:include: doc/string/chop.rdoc



9604
9605
9606
9607
9608
# File 'string.c', line 9604

static VALUE
rb_str_chop(VALUE str)
{
    return rb_str_subseq(str, 0, chopped_length(str));
}

#chop!self?

Like String#chop, but modifies self in place; returns nil if self is empty, self otherwise.

Related: String#chomp!.

Returns:

  • (self, nil)


9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
# File 'string.c', line 9578

static VALUE
rb_str_chop_bang(VALUE str)
{
    str_modify_keep_cr(str);
    if (RSTRING_LEN(str) > 0) {
        long len;
        len = chopped_length(str);
        STR_SET_LEN(str, len);
        TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
        if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
            ENC_CODERANGE_CLEAR(str);
        }
        return str;
    }
    return Qnil;
}

#chrString

Returns a string containing the first character of self:

s = 'foo' # => "foo"
s.chr     # => "f"

Returns:



6116
6117
6118
6119
6120
# File 'string.c', line 6116

static VALUE
rb_str_chr(VALUE str)
{
    return rb_str_substr(str, 0, 1);
}

#clearself

Removes the contents of self:

s = 'foo' # => "foo"
s.clear   # => ""

Returns:

  • (self)


6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
# File 'string.c', line 6091

static VALUE
rb_str_clear(VALUE str)
{
    str_discard(str);
    STR_SET_EMBED(str);
    STR_SET_LEN(str, 0);
    RSTRING_PTR(str)[0] = 0;
    if (rb_enc_asciicompat(STR_ENC_GET(str)))
        ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
    else
        ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
    return str;
}

#codepointsObject

:include: doc/string/codepoints.rdoc



9373
9374
9375
9376
9377
9378
# File 'string.c', line 9373

static VALUE
rb_str_codepoints(VALUE str)
{
    VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str));
    return rb_str_enumerate_codepoints(str, ary);
}

#concat(*objects) ⇒ String

Concatenates each object in objects to self and returns self:

s = 'foo'
s.concat('bar', 'baz') # => "foobarbaz"
s                      # => "foobarbaz"

For each given object object that is an Integer, the value is considered a codepoint and converted to a character before concatenation:

s = 'foo'
s.concat(32, 'bar', 32, 'baz') # => "foo bar baz"

Related: String#<<, which takes a single argument.

Returns:



3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
# File 'string.c', line 3466

static VALUE
rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
        return rb_str_concat(str, argv[0]);
    }
    else if (argc > 1) {
        int i;
        VALUE arg_str = rb_str_tmp_new(0);
        rb_enc_copy(arg_str, str);
        for (i = 0; i < argc; i++) {
            rb_str_concat(arg_str, argv[i]);
        }
        rb_str_buf_append(str, arg_str);
    }

    return str;
}

#count(*selectors) ⇒ Integer

Returns the total number of characters in self that are specified by the given selectors (see Multiple Character Selectors):

a = "hello world"
a.count "lo"                   #=> 5
a.count "lo", "o"              #=> 2
a.count "hello", "^l"          #=> 4
a.count "ej-m"                 #=> 4

"hello^world".count "\\^aeiou" #=> 4
"hello-world".count "a\\-eo"   #=> 4

c = "hello world\\r\\n"
c.count "\\"                   #=> 2
c.count "\\A"                  #=> 0
c.count "X-\\w"                #=> 3

Returns:



8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
# File 'string.c', line 8584

static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
    char table[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0, tstr;
    char *s, *send;
    int i;
    int ascompat;
    size_t n = 0;

    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);

    tstr = argv[0];
    StringValue(tstr);
    enc = rb_enc_check(str, tstr);
    if (argc == 1) {
        const char *ptstr;
        if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
            (ptstr = RSTRING_PTR(tstr),
             ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
            !is_broken_string(str)) {
            int clen;
            unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);

            s = RSTRING_PTR(str);
            if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
            send = RSTRING_END(str);
            while (s < send) {
                if (*(unsigned char*)s++ == c) n++;
            }
            return SIZET2NUM(n);
        }
    }

    tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
    for (i=1; i<argc; i++) {
        tstr = argv[i];
        StringValue(tstr);
        enc = rb_enc_check(str, tstr);
        tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
    }

    s = RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
    send = RSTRING_END(str);
    ascompat = rb_enc_asciicompat(enc);
    while (s < send) {
        unsigned int c;

        if (ascompat && (c = *(unsigned char*)s) < 0x80) {
            if (table[c]) {
                n++;
            }
            s++;
        }
        else {
            int clen;
            c = rb_enc_codepoint_len(s, send, &clen, enc);
            if (tr_find(c, table, del, nodel)) {
                n++;
            }
            s += clen;
        }
    }

    return SIZET2NUM(n);
}

#crypt(salt_str) ⇒ Object

Returns the string generated by calling crypt(3) standard library function with str and salt_str, in this order, as its arguments. Please do not use this method any longer. It is legacy; provided only for backward compatibility with ruby scripts in earlier days. It is bad to use in contemporary programs for several reasons:

  • Behaviour of C’s crypt(3) depends on the OS it is run. The generated string lacks data portability.

  • On some OSes such as Mac OS, crypt(3) never fails (i.e. silently ends up in unexpected results).

  • On some OSes such as Mac OS, crypt(3) is not thread safe.

  • So-called “traditional” usage of crypt(3) is very very very weak. According to its manpage, Linux’s traditional crypt(3) output has only 2**56 variations; too easy to brute force today. And this is the default behaviour.

  • In order to make things robust some OSes implement so-called “modular” usage. To go through, you have to do a complex build-up of the salt_str parameter, by hand. Failure in generation of a proper salt string tends not to yield any errors; typos in parameters are normally not detectable.

    • For instance, in the following example, the second invocation of String#crypt is wrong; it has a typo in “round=” (lacks “s”). However the call does not fail and something unexpected is generated.

      "foo".crypt("$5$rounds=1000$salt$") # OK, proper usage
      "foo".crypt("$5$round=1000$salt$")  # Typo not detected
      
  • Even in the “modular” mode, some hash functions are considered archaic and no longer recommended at all; for instance module $1$ is officially abandoned by its author: see phk.freebsd.dk/sagas/md5crypt_eol/ . For another instance module $3$ is considered completely broken: see the manpage of FreeBSD.

  • On some OS such as Mac OS, there is no modular mode. Yet, as written above, crypt(3) on Mac OS never fails. This means even if you build up a proper salt string it generates a traditional DES hash anyways, and there is no way for you to be aware of.

    "foo".crypt("$5$rounds=1000$salt$") # => "$5fNPQMxC5j6."
    

If for some reason you cannot migrate to other secure contemporary password hashing algorithms, install the string-crypt gem and require 'string/crypt' to continue using it.



10281
10282
10283
10284
10285
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
# File 'string.c', line 10281

static VALUE
rb_str_crypt(VALUE str, VALUE salt)
{
#ifdef HAVE_CRYPT_R
    VALUE databuf;
    struct crypt_data *data;
#   define CRYPT_END() ALLOCV_END(databuf)
#else
    extern char *crypt(const char *, const char *);
#   define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
#endif
    VALUE result;
    const char *s, *saltp;
    char *res;
#ifdef BROKEN_CRYPT
    char salt_8bit_clean[3];
#endif

    StringValue(salt);
    mustnot_wchar(str);
    mustnot_wchar(salt);
    s = StringValueCStr(str);
    saltp = RSTRING_PTR(salt);
    if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
        rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
    }

#ifdef BROKEN_CRYPT
    if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
        salt_8bit_clean[0] = saltp[0] & 0x7f;
        salt_8bit_clean[1] = saltp[1] & 0x7f;
        salt_8bit_clean[2] = '\0';
        saltp = salt_8bit_clean;
    }
#endif
#ifdef HAVE_CRYPT_R
    data = ALLOCV(databuf, sizeof(struct crypt_data));
# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
    data->initialized = 0;
# endif
    res = crypt_r(s, saltp, data);
#else
    crypt_mutex_initialize();
    rb_nativethread_lock_lock(&crypt_mutex.lock);
    res = crypt(s, saltp);
#endif
    if (!res) {
        int err = errno;
        CRYPT_END();
        rb_syserr_fail(err, "crypt");
    }
    result = rb_str_new_cstr(res);
    CRYPT_END();
    return result;
}

#delete(*selectors) ⇒ Object

Returns a copy of self with characters specified by selectors removed (see Multiple Character Selectors):

"hello".delete "l","lo"        #=> "heo"
"hello".delete "lo"            #=> "he"
"hello".delete "aeiou", "^e"   #=> "hell"
"hello".delete "ej-m"          #=> "ho"


8401
8402
8403
8404
8405
8406
8407
# File 'string.c', line 8401

static VALUE
rb_str_delete(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_delete_bang(argc, argv, str);
    return str;
}

#delete!(*selectors) ⇒ self?

Like String#delete, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)


8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
# File 'string.c', line 8325

static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    char *s, *send, *t;
    VALUE del = 0, nodel = 0;
    int modify = 0;
    int i, ascompat, cr;

    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
    for (i=0; i<argc; i++) {
        VALUE s = argv[i];

        StringValue(s);
        enc = rb_enc_check(str, s);
        tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
    }

    str_modify_keep_cr(str);
    ascompat = rb_enc_asciicompat(enc);
    s = t = RSTRING_PTR(str);
    send = RSTRING_END(str);
    cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
    while (s < send) {
        unsigned int c;
        int clen;

        if (ascompat && (c = *(unsigned char*)s) < 0x80) {
            if (squeez[c]) {
                modify = 1;
            }
            else {
                if (t != s) *t = c;
                t++;
            }
            s++;
        }
        else {
            c = rb_enc_codepoint_len(s, send, &clen, enc);

            if (tr_find(c, squeez, del, nodel)) {
                modify = 1;
            }
            else {
                if (t != s) rb_enc_mbcput(c, t, enc);
                t += clen;
                if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
            }
            s += clen;
        }
    }
    TERM_FILL(t, TERM_LEN(str));
    STR_SET_LEN(str, t - RSTRING_PTR(str));
    ENC_CODERANGE_SET(str, cr);

    if (modify) return str;
    return Qnil;
}

#delete_prefix(prefix) ⇒ Object

:include: doc/string/delete_prefix.rdoc



10786
10787
10788
10789
10790
10791
10792
10793
10794
10795
# File 'string.c', line 10786

static VALUE
rb_str_delete_prefix(VALUE str, VALUE prefix)
{
    long prefixlen;

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return str_duplicate(rb_cString, str);

    return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
}

#delete_prefix!(prefix) ⇒ self?

Like String#delete_prefix, except that self is modified in place. Returns self if the prefix is removed, nil otherwise.

Returns:

  • (self, nil)


10766
10767
10768
10769
10770
10771
10772
10773
10774
10775
10776
# File 'string.c', line 10766

static VALUE
rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
{
    long prefixlen;
    str_modify_keep_cr(str);

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return Qnil;

    return rb_str_drop_bytes(str, prefixlen);
}

#delete_suffix(suffix) ⇒ Object

:include: doc/string/delete_suffix.rdoc



10869
10870
10871
10872
10873
10874
10875
10876
10877
10878
# File 'string.c', line 10869

static VALUE
rb_str_delete_suffix(VALUE str, VALUE suffix)
{
    long suffixlen;

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return str_duplicate(rb_cString, str);

    return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
}

#delete_suffix!(suffix) ⇒ self?

Like String#delete_suffix, except that self is modified in place. Returns self if the suffix is removed, nil otherwise.

Returns:

  • (self, nil)


10841
10842
10843
10844
10845
10846
10847
10848
10849
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
# File 'string.c', line 10841

static VALUE
rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
{
    long olen, suffixlen, len;
    str_modifiable(str);

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return Qnil;

    olen = RSTRING_LEN(str);
    str_modify_keep_cr(str);
    len = olen - suffixlen;
    STR_SET_LEN(str, len);
    TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
    if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
        ENC_CODERANGE_CLEAR(str);
    }
    return str;
}

#downcase(*options) ⇒ String

Returns a string containing the downcased characters in self:

s = 'Hello World!' # => "Hello World!"
s.downcase         # => "hello world!"

The casing may be affected by the given options; see Case Mapping.

Related: String#downcase!, String#upcase, String#upcase!.

Returns:



7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
# File 'string.c', line 7670

static VALUE
rb_str_downcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy_direct(ret, str);
        downcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#downcase!(*options) ⇒ self?

Downcases the characters in self; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.downcase!        # => "hello world!"
s                  # => "hello world!"
s.downcase!        # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#downcase, String#upcase, String#upcase!.

Returns:

  • (self, nil)


7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
# File 'string.c', line 7631

static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (downcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#dumpString

Returns a printable version of self, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:

"hello \n ''".dump    # => "\"hello \\n ''\""
"\f\x00\xff\\\"".dump # => "\"\\f\\x00\\xFF\\\\\\\"\""

Related: String#undump (inverse of String#dump).

Returns:



6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
# File 'string.c', line 6895

VALUE
rb_str_dump(VALUE str)
{
    int encidx = rb_enc_get_index(str);
    rb_encoding *enc = rb_enc_from_index(encidx);
    long len;
    const char *p, *pend;
    char *q, *qend;
    VALUE result;
    int u8 = (encidx == rb_utf8_encindex());
    static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")";

    len = 2;			/* "" */
    if (!rb_enc_asciicompat(enc)) {
        len += strlen(nonascii_suffix) - rb_strlen_lit("%s");
        len += strlen(enc->name);
    }

    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    while (p < pend) {
        int clen;
        unsigned char c = *p++;

        switch (c) {
          case '"':  case '\\':
          case '\n': case '\r':
          case '\t': case '\f':
          case '\013': case '\010': case '\007': case '\033':
            clen = 2;
            break;

          case '#':
            clen = IS_EVSTR(p, pend) ? 2 : 1;
            break;

          default:
            if (ISPRINT(c)) {
                clen = 1;
            }
            else {
                if (u8 && c > 0x7F) {	/* \u notation */
                    int n = rb_enc_precise_mbclen(p-1, pend, enc);
                    if (MBCLEN_CHARFOUND_P(n)) {
                        unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
                        if (cc <= 0xFFFF)
                            clen = 6;  /* \uXXXX */
                        else if (cc <= 0xFFFFF)
                            clen = 9;  /* \u{XXXXX} */
                        else
                            clen = 10; /* \u{XXXXXX} */
                        p += MBCLEN_CHARFOUND_LEN(n)-1;
                        break;
                    }
                }
                clen = 4;	/* \xNN */
            }
            break;
        }

        if (clen > LONG_MAX - len) {
            rb_raise(rb_eRuntimeError, "string size too big");
        }
        len += clen;
    }

    result = rb_str_new(0, len);
    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    q = RSTRING_PTR(result); qend = q + len + 1;

    *q++ = '"';
    while (p < pend) {
        unsigned char c = *p++;

        if (c == '"' || c == '\\') {
            *q++ = '\\';
            *q++ = c;
        }
        else if (c == '#') {
            if (IS_EVSTR(p, pend)) *q++ = '\\';
            *q++ = '#';
        }
        else if (c == '\n') {
            *q++ = '\\';
            *q++ = 'n';
        }
        else if (c == '\r') {
            *q++ = '\\';
            *q++ = 'r';
        }
        else if (c == '\t') {
            *q++ = '\\';
            *q++ = 't';
        }
        else if (c == '\f') {
            *q++ = '\\';
            *q++ = 'f';
        }
        else if (c == '\013') {
            *q++ = '\\';
            *q++ = 'v';
        }
        else if (c == '\010') {
            *q++ = '\\';
            *q++ = 'b';
        }
        else if (c == '\007') {
            *q++ = '\\';
            *q++ = 'a';
        }
        else if (c == '\033') {
            *q++ = '\\';
            *q++ = 'e';
        }
        else if (ISPRINT(c)) {
            *q++ = c;
        }
        else {
            *q++ = '\\';
            if (u8) {
                int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
                if (MBCLEN_CHARFOUND_P(n)) {
                    int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
                    p += n;
                    if (cc <= 0xFFFF)
                        snprintf(q, qend-q, "u%04X", cc);    /* \uXXXX */
                    else
                        snprintf(q, qend-q, "u{%X}", cc);  /* \u{XXXXX} or \u{XXXXXX} */
                    q += strlen(q);
                    continue;
                }
            }
            snprintf(q, qend-q, "x%02X", c);
            q += 3;
        }
    }
    *q++ = '"';
    *q = '\0';
    if (!rb_enc_asciicompat(enc)) {
        snprintf(q, qend-q, nonascii_suffix, enc->name);
        encidx = rb_ascii8bit_encindex();
    }
    /* result from dump is ASCII */
    rb_enc_associate_index(result, encidx);
    ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
    return result;
}

#dupObject

:nodoc:



1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
# File 'string.c', line 1794

VALUE
rb_str_dup_m(VALUE str)
{
    if (LIKELY(BARE_STRING_P(str))) {
        return str_duplicate(rb_obj_class(str), str);
    }
    else {
        return rb_obj_dup(str);
    }
}

#each_byte {|byte| ... } ⇒ self #each_byteObject

:include: doc/string/each_byte.rdoc

Overloads:

  • #each_byte {|byte| ... } ⇒ self

    Yields:

    • (byte)

    Returns:

    • (self)


9229
9230
9231
9232
9233
9234
# File 'string.c', line 9229

static VALUE
rb_str_each_byte(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
    return rb_str_enumerate_bytes(str, 0);
}

#each_char {|c| ... } ⇒ self #each_charObject

:include: doc/string/each_char.rdoc

Overloads:

  • #each_char {|c| ... } ⇒ self

    Yields:

    • (c)

    Returns:

    • (self)


9298
9299
9300
9301
9302
9303
# File 'string.c', line 9298

static VALUE
rb_str_each_char(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_chars(str, 0);
}

#each_codepoint {|integer| ... } ⇒ self #each_codepointObject

:include: doc/string/each_codepoint.rdoc

Overloads:

  • #each_codepoint {|integer| ... } ⇒ self

    Yields:

    • (integer)

    Returns:

    • (self)


9358
9359
9360
9361
9362
9363
# File 'string.c', line 9358

static VALUE
rb_str_each_codepoint(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_codepoints(str, 0);
}

#each_grapheme_cluster {|gc| ... } ⇒ self #each_grapheme_clusterObject

:include: doc/string/each_grapheme_cluster.rdoc

Overloads:

  • #each_grapheme_cluster {|gc| ... } ⇒ self

    Yields:

    • (gc)

    Returns:

    • (self)


9528
9529
9530
9531
9532
9533
# File 'string.c', line 9528

static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
    return rb_str_enumerate_grapheme_clusters(str, 0);
}

#each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self #each_line(line_sep = $/, chomp: false) ⇒ Object

:include: doc/string/each_line.rdoc

Overloads:

  • #each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self

    Yields:

    • (substring)

    Returns:

    • (self)


9177
9178
9179
9180
9181
9182
# File 'string.c', line 9177

static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, argc, argv, 0);
    return rb_str_enumerate_lines(argc, argv, str, 0);
}

#empty?Boolean

Returns true if the length of self is zero, false otherwise:

"hello".empty? # => false
" ".empty? # => false
"".empty? # => true

Returns:

  • (Boolean)


2239
2240
2241
2242
2243
# File 'string.c', line 2239

static VALUE
rb_str_empty(VALUE str)
{
    return RBOOL(RSTRING_LEN(str) == 0);
}

#encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String

:include: doc/string/encode.rdoc

Overloads:

  • #encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String

    Returns:

  • #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String

    Returns:



2905
2906
2907
2908
2909
2910
2911
# File 'transcode.c', line 2905

static VALUE
str_encode(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr = str;
    int encidx = str_transcode(argc, argv, &newstr);
    return encoded_dup(newstr, str, encidx);
}

#encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self

Like #encode, but applies encoding changes to self; returns self.

Overloads:

  • #encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self

    Returns:

    • (self)
  • #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self

    Returns:

    • (self)


2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
# File 'transcode.c', line 2874

static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr;
    int encidx;

    rb_check_frozen(str);

    newstr = str;
    encidx = str_transcode(argc, argv, &newstr);

    if (encidx < 0) return str;
    if (newstr == str) {
        rb_enc_associate_index(str, encidx);
        return str;
    }
    rb_str_shared_replace(str, newstr);
    return str_encode_associate(str, encidx);
}

#encodingEncoding

Returns the Encoding object that represents the encoding of obj.

Returns:



1146
1147
1148
1149
1150
1151
1152
1153
1154
# File 'encoding.c', line 1146

VALUE
rb_obj_encoding(VALUE obj)
{
    int idx = rb_enc_get_index(obj);
    if (idx < 0) {
        rb_raise(rb_eTypeError, "unknown encoding");
    }
    return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}

#end_with?(*strings) ⇒ Boolean

:include: doc/string/end_with_p.rdoc

Returns:

  • (Boolean)


10682
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703
10704
10705
10706
# File 'string.c', line 10682

static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
    int i;

    for (i=0; i<argc; i++) {
        VALUE tmp = argv[i];
        const char *p, *s, *e;
        long slen, tlen;
        rb_encoding *enc;

        StringValue(tmp);
        enc = rb_enc_check(str, tmp);
        if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
        if ((slen = RSTRING_LEN(str)) < tlen) continue;
        p = RSTRING_PTR(str);
        e = p + slen;
        s = e - tlen;
        if (!at_char_boundary(p, s, e, enc))
            continue;
        if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
            return Qtrue;
    }
    return Qfalse;
}

#eql?(object) ⇒ Boolean

Returns true if object has the same length and content;

as +self+; +false+ otherwise:

  s = 'foo'
  s.eql?('foo') # => true
  s.eql?('food') # => false
  s.eql?('FOO') # => false

Returns +false+ if the two strings' encodings are not compatible:

  "\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false

Returns:

  • (Boolean)


3767
3768
3769
3770
3771
3772
3773
# File 'string.c', line 3767

VALUE
rb_str_eql(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
    return rb_str_eql_internal(str1, str2);
}

#force_encoding(encoding) ⇒ self

:include: doc/string/force_encoding.rdoc

Returns:

  • (self)


10913
10914
10915
10916
10917
10918
10919
10920
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
# File 'string.c', line 10913

static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
    str_modifiable(str);

    rb_encoding *encoding = rb_to_encoding(enc);
    int idx = rb_enc_to_index(encoding);

    // If the encoding is unchanged, we do nothing.
    if (ENCODING_GET(str) == idx) {
        return str;
    }

    rb_enc_associate_index(str, idx);

    // If the coderange was 7bit and the new encoding is ASCII-compatible
    // we can keep the coderange.
    if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
        return str;
    }

    ENC_CODERANGE_CLEAR(str);
    return str;
}

#freezeObject

:nodoc:



3004
3005
3006
3007
3008
3009
3010
# File 'string.c', line 3004

VALUE
rb_str_freeze(VALUE str)
{
    if (OBJ_FROZEN(str)) return str;
    rb_str_resize(str, RSTRING_LEN(str));
    return rb_obj_freeze(str);
}

#getbyte(index) ⇒ Integer?

Returns the byte at zero-based index as an integer, or nil if index is out of range:

s = 'abcde'   # => "abcde"
s.getbyte(0)  # => 97
s.getbyte(-1) # => 101
s.getbyte(5)  # => nil

Related: String#setbyte.

Returns:



6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
# File 'string.c', line 6135

VALUE
rb_str_getbyte(VALUE str, VALUE index)
{
    long pos = NUM2LONG(index);

    if (pos < 0)
        pos += RSTRING_LEN(str);
    if (pos < 0 ||  RSTRING_LEN(str) <= pos)
        return Qnil;

    return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
}

#grapheme_clustersObject

:include: doc/string/grapheme_clusters.rdoc



9543
9544
9545
9546
9547
9548
# File 'string.c', line 9543

static VALUE
rb_str_grapheme_clusters(VALUE str)
{
    VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str));
    return rb_str_enumerate_grapheme_clusters(str, ary);
}

#gsub(pattern, replacement) ⇒ Object #gsub(pattern) {|match| ... } ⇒ Object #gsub(pattern) ⇒ Object

Returns a copy of self with all occurrences of the given pattern replaced.

See Substitution Methods.

Returns an Enumerator if no replacement and no block given.

Related: String#sub, String#sub!, String#gsub!.

Overloads:

  • #gsub(pattern) {|match| ... } ⇒ Object

    Yields:



6051
6052
6053
6054
6055
# File 'string.c', line 6051

static VALUE
rb_str_gsub(int argc, VALUE *argv, VALUE str)
{
    return str_gsub(argc, argv, str, 0);
}

#gsub!(pattern, replacement) ⇒ self? #gsub!(pattern) {|match| ... } ⇒ self? #gsub!(pattern) ⇒ Object

Performs the specified substring replacement(s) on self; returns self if any replacement occurred, nil otherwise.

See Substitution Methods.

Returns an Enumerator if no replacement and no block given.

Related: String#sub, String#gsub, String#sub!.

Overloads:

  • #gsub!(pattern, replacement) ⇒ self?

    Returns:

    • (self, nil)
  • #gsub!(pattern) {|match| ... } ⇒ self?

    Yields:

    Returns:

    • (self, nil)


6027
6028
6029
6030
6031
6032
# File 'string.c', line 6027

static VALUE
rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
{
    str_modify_keep_cr(str);
    return str_gsub(argc, argv, str, 1);
}

#hashInteger

Returns the integer hash value for self. The value is based on the length, content and encoding of self.

Related: Object#hash.

Returns:



3656
3657
3658
3659
3660
3661
# File 'string.c', line 3656

static VALUE
rb_str_hash_m(VALUE str)
{
    st_index_t hval = rb_str_hash(str);
    return ST2FIX(hval);
}

#hexInteger

Interprets the leading substring of self as a string of hexadecimal digits (with an optional sign and an optional 0x) and returns the corresponding number; returns zero if there is no such leading substring:

'0x0a'.hex        # => 10
'-1234'.hex       # => -4660
'0'.hex           # => 0
'non-numeric'.hex # => 0

Related: String#oct.

Returns:



10174
10175
10176
10177
10178
# File 'string.c', line 10174

static VALUE
rb_str_hex(VALUE str)
{
    return rb_str_to_inum(str, 16, FALSE);
}

#include?(other_string) ⇒ Boolean

Returns true if self contains other_string, false otherwise:

s = 'foo'
s.include?('f')    # => true
s.include?('fo')   # => true
s.include?('food') # => false

Returns:

  • (Boolean)


6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
# File 'string.c', line 6552

VALUE
rb_str_include(VALUE str, VALUE arg)
{
    long i;

    StringValue(arg);
    i = rb_str_index(str, arg, 0);

    return RBOOL(i != -1);
}

#index(substring, offset = 0) ⇒ Integer? #index(regexp, offset = 0) ⇒ Integer?

:include: doc/string/index.rdoc

Overloads:



4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
# File 'string.c', line 4028

static VALUE
rb_str_index_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    rb_encoding *enc = STR_ENC_GET(str);
    long pos;

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        long slen = str_strlen(str, enc); /* str's enc */
        pos = NUM2LONG(initpos);
        if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
    }
    else {
        pos = 0;
    }

    if (RB_TYPE_P(sub, T_REGEXP)) {
        pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
                         enc, single_byte_optimizable(str));

        if (rb_reg_search(sub, str, pos, 0) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = rb_str_sublen(str, BEG(0));
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_index(str, sub, pos);
        if (pos >= 0) {
            pos = rb_str_sublen(str, pos);
            return LONG2NUM(pos);
        }
    }
    return Qnil;
}

#replace(other_string) ⇒ self

Replaces the contents of self with the contents of other_string:

s = 'foo'        # => "foo"
s.replace('bar') # => "bar"

Returns:

  • (self)


6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
# File 'string.c', line 6069

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#insert(index, other_string) ⇒ self

Inserts the given other_string into self; returns self.

If the Integer index is positive, inserts other_string at offset index:

'foo'.insert(1, 'bar') # => "fbaroo"

If the Integer index is negative, counts backward from the end of self and inserts other_string at offset index+1 (that is, after self[index]):

'foo'.insert(-2, 'bar') # => "fobaro"

Returns:

  • (self)


5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
# File 'string.c', line 5544

static VALUE
rb_str_insert(VALUE str, VALUE idx, VALUE str2)
{
    long pos = NUM2LONG(idx);

    if (pos == -1) {
        return rb_str_append(str, str2);
    }
    else if (pos < 0) {
        pos++;
    }
    rb_str_update(str, pos, 0, str2);
    return str;
}

#inspectString

Returns a printable version of self, enclosed in double-quotes, and with special characters escaped:

s = "foo\tbar\tbaz\n"
s.inspect
# => "\"foo\\tbar\\tbaz\\n\""

Returns:



6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
# File 'string.c', line 6781

VALUE
rb_str_inspect(VALUE str)
{
    int encidx = ENCODING_GET(str);
    rb_encoding *enc = rb_enc_from_index(encidx);
    const char *p, *pend, *prev;
    char buf[CHAR_ESC_LEN + 1];
    VALUE result = rb_str_buf_new(0);
    rb_encoding *resenc = rb_default_internal_encoding();
    int unicode_p = rb_enc_unicode_p(enc);
    int asciicompat = rb_enc_asciicompat(enc);

    if (resenc == NULL) resenc = rb_default_external_encoding();
    if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
    rb_enc_associate(result, resenc);
    str_buf_cat2(result, "\"");

    p = RSTRING_PTR(str); pend = RSTRING_END(str);
    prev = p;
    while (p < pend) {
        unsigned int c, cc;
        int n;

        n = rb_enc_precise_mbclen(p, pend, enc);
        if (!MBCLEN_CHARFOUND_P(n)) {
            if (p > prev) str_buf_cat(result, prev, p - prev);
            n = rb_enc_mbminlen(enc);
            if (pend < p + n)
                n = (int)(pend - p);
            while (n--) {
                snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
                str_buf_cat(result, buf, strlen(buf));
                prev = ++p;
            }
            continue;
        }
        n = MBCLEN_CHARFOUND_LEN(n);
        c = rb_enc_mbc_to_codepoint(p, pend, enc);
        p += n;
        if ((asciicompat || unicode_p) &&
          (c == '"'|| c == '\\' ||
            (c == '#' &&
             p < pend &&
             MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
             (cc = rb_enc_codepoint(p,pend,enc),
              (cc == '$' || cc == '@' || cc == '{'))))) {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            str_buf_cat2(result, "\\");
            if (asciicompat || enc == resenc) {
                prev = p - n;
                continue;
            }
        }
        switch (c) {
          case '\n': cc = 'n'; break;
          case '\r': cc = 'r'; break;
          case '\t': cc = 't'; break;
          case '\f': cc = 'f'; break;
          case '\013': cc = 'v'; break;
          case '\010': cc = 'b'; break;
          case '\007': cc = 'a'; break;
          case 033: cc = 'e'; break;
          default: cc = 0; break;
        }
        if (cc) {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            buf[0] = '\\';
            buf[1] = (char)cc;
            str_buf_cat(result, buf, 2);
            prev = p;
            continue;
        }
        /* The special casing of 0x85 (NEXT_LINE) here is because
         * Oniguruma historically treats it as printable, but it
         * doesn't match the print POSIX bracket class or character
         * property in regexps.
         *
         * See Ruby Bug #16842 for details:
         * https://bugs.ruby-lang.org/issues/16842
         */
        if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
            (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
            continue;
        }
        else {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            rb_str_buf_cat_escaped_char(result, c, unicode_p);
            prev = p;
            continue;
        }
    }
    if (p > prev) str_buf_cat(result, prev, p - prev);
    str_buf_cat2(result, "\"");

    return result;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"


860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
# File 'symbol.c', line 860

VALUE
rb_str_intern(VALUE str)
{
    VALUE sym;

    GLOBAL_SYMBOLS_ENTER(symbols);
    {
        sym = lookup_str_sym_with_lock(symbols, str);

        if (sym) {
            // ok
        }
        else if (USE_SYMBOL_GC) {
            rb_encoding *enc = rb_enc_get(str);
            rb_encoding *ascii = rb_usascii_encoding();
            if (enc != ascii && sym_check_asciionly(str, false)) {
                str = rb_str_dup(str);
                rb_enc_associate(str, ascii);
                OBJ_FREEZE(str);
                enc = ascii;
            }
            else {
                str = rb_str_dup(str);
                OBJ_FREEZE(str);
            }
            str = rb_fstring(str);
            int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
            if (type < 0) type = ID_JUNK;
            sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
        }
        else {
            ID id = intern_str(str, 0);
            sym = ID2SYM(id);
        }
    }
    GLOBAL_SYMBOLS_LEAVE();
    return sym;
}

#lengthInteger

:include: doc/string/length.rdoc

Returns:



2207
2208
2209
2210
2211
# File 'string.c', line 2207

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#lines(Line_sep = $/, chomp: false) ⇒ Object

Forms substrings (“lines”) of self according to the given arguments (see String#each_line for details); returns the lines in an array.



9193
9194
9195
9196
9197
9198
# File 'string.c', line 9193

static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
    VALUE ary = WANTARRAY("lines", 0);
    return rb_str_enumerate_lines(argc, argv, str, ary);
}

#ljust(size, pad_string = ' ') ⇒ Object

:include: doc/string/ljust.rdoc

Related: String#rjust, String#center.



10518
10519
10520
10521
10522
# File 'string.c', line 10518

static VALUE
rb_str_ljust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'l');
}

#lstripObject

Returns a copy of self with leading whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s        # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.lstrip # => "abc\u0000\t\n\v\f\r "

Related: String#rstrip, String#strip.



9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
# File 'string.c', line 9856

static VALUE
rb_str_lstrip(VALUE str)
{
    char *start;
    long len, loffset;
    RSTRING_GETMEM(str, start, len);
    loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
    if (loffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, loffset, len - loffset);
}

#lstrip!self?

Like String#lstrip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#rstrip!, String#strip!.

Returns:

  • (self, nil)


9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
# File 'string.c', line 9818

static VALUE
rb_str_lstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start, *s;
    long olen, loffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    if (loffset > 0) {
        long len = olen-loffset;
        s = start + loffset;
        memmove(start, s, len);
        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#match(pattern, offset = 0) ⇒ MatchData? #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object

Returns a MatchData object (or nil) based on self and the given pattern.

Note: also updates Regexp@Global+Variables.

  • Computes regexp by converting pattern (if not already a Regexp).

    regexp = Regexp.new(pattern)
    
  • Computes matchdata, which will be either a MatchData object or nil (see Regexp#match):

    matchdata = <tt>regexp.match(self)
    

With no block given, returns the computed matchdata:

'foo'.match('f') # => #<MatchData "f">
'foo'.match('o') # => #<MatchData "o">
'foo'.match('x') # => nil

If Integer argument offset is given, the search begins at index offset:

'foo'.match('f', 1) # => nil
'foo'.match('o', 1) # => #<MatchData "o">

With a block given, calls the block with the computed matchdata and returns the block’s return value:

'foo'.match(/o/) {|matchdata| matchdata } # => #<MatchData "o">
'foo'.match(/x/) {|matchdata| matchdata } # => nil
'foo'.match(/f/, 1) {|matchdata| matchdata } # => nil

Overloads:

  • #match(pattern, offset = 0) ⇒ MatchData?

    Returns:

  • #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object

    Yields:

    • (matchdata)

    Returns:



4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
# File 'string.c', line 4571

static VALUE
rb_str_match_m(int argc, VALUE *argv, VALUE str)
{
    VALUE re, result;
    if (argc < 1)
        rb_check_arity(argc, 1, 2);
    re = argv[0];
    argv[0] = str;
    result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv);
    if (!NIL_P(result) && rb_block_given_p()) {
        return rb_yield(result);
    }
    return result;
}

#match?(pattern, offset = 0) ⇒ Boolean

Returns true or false based on whether a match is found for self and pattern.

Note: does not update Regexp@Global+Variables.

Computes regexp by converting pattern (if not already a Regexp).

regexp = Regexp.new(pattern)

Returns true if self+.match(regexp) returns a MatchData object, false otherwise:

'foo'.match?(/o/) # => true
'foo'.match?('o') # => true
'foo'.match?(/x/) # => false

If Integer argument offset is given, the search begins at index offset:

'foo'.match?('f', 1) # => false
'foo'.match?('o', 1) # => true

Returns:

  • (Boolean)


4610
4611
4612
4613
4614
4615
4616
4617
# File 'string.c', line 4610

static VALUE
rb_str_match_m_p(int argc, VALUE *argv, VALUE str)
{
    VALUE re;
    rb_check_arity(argc, 1, 2);
    re = get_pat(argv[0]);
    return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);
}

#succString

Returns the successor to self. The successor is calculated by incrementing characters.

The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:

'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'

The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:

'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"

The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:

'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"

The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:

s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"

Carrying can occur between and among mixtures of alphanumeric characters:

s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"

The successor to an empty String is a new empty String:

''.succ # => ""

Returns:



4862
4863
4864
4865
4866
4867
4868
4869
# File 'string.c', line 4862

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!self

Equivalent to String#succ, but modifies self in place; returns self.

Returns:

  • (self)


4966
4967
4968
4969
4970
4971
4972
# File 'string.c', line 4966

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#octInteger

Interprets the leading substring of self as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:

'123'.oct             # => 83
'-377'.oct            # => -255
'0377non-numeric'.oct # => 255
'non-numeric'.oct     # => 0

If self starts with 0, radix indicators are honored; see Kernel#Integer.

Related: String#hex.

Returns:



10201
10202
10203
10204
10205
# File 'string.c', line 10201

static VALUE
rb_str_oct(VALUE str)
{
    return rb_str_to_inum(str, -8, FALSE);
}

#ordInteger

:include: doc/string/ord.rdoc

Returns:



10346
10347
10348
10349
10350
10351
10352
10353
# File 'string.c', line 10346

static VALUE
rb_str_ord(VALUE s)
{
    unsigned int c;

    c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
    return UINT2NUM(c);
}

#partition(string_or_regexp) ⇒ Array

:include: doc/string/partition.rdoc

Returns:



10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591
10592
# File 'string.c', line 10565

static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
    long pos;

    sep = get_pat_quoted(sep, 0);
    if (RB_TYPE_P(sep, T_REGEXP)) {
        if (rb_reg_search(sep, str, 0, 0) < 0) {
            goto failed;
        }
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);

        pos = BEG(0);
        sep = rb_str_subseq(str, pos, END(0) - pos);
    }
    else {
        pos = rb_str_index(str, sep, 0);
        if (pos < 0) goto failed;
    }
    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
                          sep,
                          rb_str_subseq(str, pos+RSTRING_LEN(sep),
                                             RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));

  failed:
    return rb_ary_new3(3, str_duplicate(rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
}

#prepend(*other_strings) ⇒ String

Prepends each string in other_strings to self and returns self:

s = 'foo'
s.prepend('bar', 'baz') # => "barbazfoo"
s                       # => "barbazfoo"

Related: String#concat.

Returns:



3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
# File 'string.c', line 3602

static VALUE
rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
        rb_str_update(str, 0L, 0L, argv[0]);
    }
    else if (argc > 1) {
        int i;
        VALUE arg_str = rb_str_tmp_new(0);
        rb_enc_copy(arg_str, str);
        for (i = 0; i < argc; i++) {
            rb_str_append(arg_str, argv[i]);
        }
        rb_str_update(str, 0L, 0L, arg_str);
    }

    return str;
}

#replace(other_string) ⇒ self

Replaces the contents of self with the contents of other_string:

s = 'foo'        # => "foo"
s.replace('bar') # => "bar"

Returns:

  • (self)


6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
# File 'string.c', line 6069

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#reverseString

Returns a new string with the characters from self in reverse order.

'stressed'.reverse # => "desserts"

Returns:



6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
# File 'string.c', line 6449

static VALUE
rb_str_reverse(VALUE str)
{
    rb_encoding *enc;
    VALUE rev;
    char *s, *e, *p;
    int cr;

    if (RSTRING_LEN(str) <= 1) return str_duplicate(rb_cString, str);
    enc = STR_ENC_GET(str);
    rev = rb_str_new(0, RSTRING_LEN(str));
    s = RSTRING_PTR(str); e = RSTRING_END(str);
    p = RSTRING_END(rev);
    cr = ENC_CODERANGE(str);

    if (RSTRING_LEN(str) > 1) {
        if (single_byte_optimizable(str)) {
            while (s < e) {
                *--p = *s++;
            }
        }
        else if (cr == ENC_CODERANGE_VALID) {
            while (s < e) {
                int clen = rb_enc_fast_mbclen(s, e, enc);

                p -= clen;
                memcpy(p, s, clen);
                s += clen;
            }
        }
        else {
            cr = rb_enc_asciicompat(enc) ?
                ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
            while (s < e) {
                int clen = rb_enc_mbclen(s, e, enc);

                if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
                p -= clen;
                memcpy(p, s, clen);
                s += clen;
            }
        }
    }
    STR_SET_LEN(rev, RSTRING_LEN(str));
    str_enc_copy_direct(rev, str);
    ENC_CODERANGE_SET(rev, cr);

    return rev;
}

#reverse!self

Returns self with its characters reversed:

s = 'stressed'
s.reverse! # => "desserts"
s          # => "desserts"

Returns:

  • (self)


6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
# File 'string.c', line 6512

static VALUE
rb_str_reverse_bang(VALUE str)
{
    if (RSTRING_LEN(str) > 1) {
        if (single_byte_optimizable(str)) {
            char *s, *e, c;

            str_modify_keep_cr(str);
            s = RSTRING_PTR(str);
            e = RSTRING_END(str) - 1;
            while (s < e) {
                c = *s;
                *s++ = *e;
                *e-- = c;
            }
        }
        else {
            str_shared_replace(str, rb_str_reverse(str));
        }
    }
    else {
        str_modify_keep_cr(str);
    }
    return str;
}

#rindex(substring, offset = self.length) ⇒ Integer? #rindex(regexp, offset = self.length) ⇒ Integer?

Returns the Integer index of the last occurrence of the given substring, or nil if none found:

'foo'.rindex('f') # => 0
'foo'.rindex('o') # => 2
'foo'.rindex('oo') # => 1
'foo'.rindex('ooo') # => nil

Returns the Integer index of the last match for the given Regexp regexp, or nil if none found:

'foo'.rindex(/f/) # => 0
'foo'.rindex(/o/) # => 2
'foo'.rindex(/oo/) # => 1
'foo'.rindex(/ooo/) # => nil

The last match means starting at the possible last position, not the last of longest matches.

'foo'.rindex(/o+/) # => 2
$~ #=> #<MatchData "o">

To get the last longest match, needs to combine with negative lookbehind.

'foo'.rindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">

Or String#index with negative lookforward.

'foo'.index(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">

Integer argument offset, if given and non-negative, specifies the maximum starting position in the

string to _end_ the search:

 'foo'.rindex('o', 0) # => nil
 'foo'.rindex('o', 1) # => 1
 'foo'.rindex('o', 2) # => 2
 'foo'.rindex('o', 3) # => 2

If offset is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset:

'foo'.rindex('o', -1) # => 2
'foo'.rindex('o', -2) # => 1
'foo'.rindex('o', -3) # => nil
'foo'.rindex('o', -4) # => nil

Related: String#index.

Overloads:

  • #rindex(substring, offset = self.length) ⇒ Integer?

    Returns:

  • #rindex(regexp, offset = self.length) ⇒ Integer?

    Returns:



4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
# File 'string.c', line 4314

static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    rb_encoding *enc = STR_ENC_GET(str);
    long pos, len = str_strlen(str, enc); /* str's enc */

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        pos = NUM2LONG(initpos);
        if (pos < 0 && (pos += len) < 0) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
        if (pos > len) pos = len;
    }
    else {
        pos = len;
    }

    if (RB_TYPE_P(sub, T_REGEXP)) {
        /* enc = rb_enc_check(str, sub); */
        pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
                         enc, single_byte_optimizable(str));

        if (rb_reg_search(sub, str, pos, 1) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = rb_str_sublen(str, BEG(0));
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_rindex(str, sub, pos);
        if (pos >= 0) {
            pos = rb_str_sublen(str, pos);
            return LONG2NUM(pos);
        }
    }
    return Qnil;
}

#rjust(size, pad_string = ' ') ⇒ Object

:include: doc/string/rjust.rdoc

Related: String#ljust, String#center.



10534
10535
10536
10537
10538
# File 'string.c', line 10534

static VALUE
rb_str_rjust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'r');
}

#rpartition(sep) ⇒ Array

:include: doc/string/rpartition.rdoc

Returns:



10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632
# File 'string.c', line 10602

static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
    long pos = RSTRING_LEN(str);

    sep = get_pat_quoted(sep, 0);
    if (RB_TYPE_P(sep, T_REGEXP)) {
        if (rb_reg_search(sep, str, pos, 1) < 0) {
            goto failed;
        }
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);

        pos = BEG(0);
        sep = rb_str_subseq(str, pos, END(0) - pos);
    }
    else {
        pos = rb_str_sublen(str, pos);
        pos = rb_str_rindex(str, sep, pos);
        if (pos < 0) {
            goto failed;
        }
    }

    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
                          sep,
                          rb_str_subseq(str, pos+RSTRING_LEN(sep),
                                        RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
  failed:
    return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(rb_cString, str));
}

#rstripObject

Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s        # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.rstrip # => "\u0000\t\n\v\f\r abc"

Related: String#lstrip, String#strip.



9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
# File 'string.c', line 9943

static VALUE
rb_str_rstrip(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);

    if (roffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, 0, olen-roffset);
}

#rstrip!self?

Like String#rstrip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#lstrip!, String#strip!.

Returns:

  • (self, nil)


9906
9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
# File 'string.c', line 9906

static VALUE
rb_str_rstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);
    if (roffset > 0) {
        long len = olen - roffset;

        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#scan(string_or_regexp) ⇒ Array #scan(string_or_regexp) {|matches| ... } ⇒ self

Matches a pattern against self; the pattern is:

  • string_or_regexp itself, if it is a Regexp.

  • Regexp.quote(string_or_regexp), if string_or_regexp is a string.

Iterates through self, generating a collection of matching results:

  • If the pattern contains no groups, each result is the matched string, $&.

  • If the pattern contains groups, each result is an array containing one entry per group.

With no block given, returns an array of the results:

s = 'cruel world'
s.scan(/\w+/)      # => ["cruel", "world"]
s.scan(/.../)      # => ["cru", "el ", "wor"]
s.scan(/(...)/)    # => [["cru"], ["el "], ["wor"]]
s.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]]

With a block given, calls the block with each result; returns self:

s.scan(/\w+/) {|w| print "<<#{w}>> " }
print "\n"
s.scan(/(.)(.)/) {|x,y| print y, x }
print "\n"

Output:

<<cruel>> <<world>>
rceu lowlr

Overloads:

  • #scan(string_or_regexp) ⇒ Array

    Returns:

  • #scan(string_or_regexp) {|matches| ... } ⇒ self

    Yields:

    • (matches)

    Returns:

    • (self)


10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
# File 'string.c', line 10122

static VALUE
rb_str_scan(VALUE str, VALUE pat)
{
    VALUE result;
    long start = 0;
    long last = -1, prev = 0;
    char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);

    pat = get_pat_quoted(pat, 1);
    mustnot_broken(str);
    if (!rb_block_given_p()) {
        VALUE ary = rb_ary_new();

        while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
            last = prev;
            prev = start;
            rb_ary_push(ary, result);
        }
        if (last >= 0) rb_pat_search(pat, str, last, 1);
        else rb_backref_set(Qnil);
        return ary;
    }

    while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
        last = prev;
        prev = start;
        rb_yield(result);
        str_mod_check(str, p, len);
    }
    if (last >= 0) rb_pat_search(pat, str, last, 1);
    return str;
}

#scrub(replacement_string = default_replacement) ⇒ Object #scrub {|bytes| ... } ⇒ Object

:include: doc/string/scrub.rdoc

Overloads:



11345
11346
11347
11348
11349
11350
11351
# File 'string.c', line 11345

static VALUE
str_scrub(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    return NIL_P(new) ? str_duplicate(rb_cString, str): new;
}

#scrub!self #scrub!(replacement_string = default_replacement) ⇒ self #scrub! {|bytes| ... } ⇒ self

Like String#scrub, except that any replacements are made in self.

Overloads:

  • #scrub!self

    Returns:

    • (self)
  • #scrub!(replacement_string = default_replacement) ⇒ self

    Returns:

    • (self)
  • #scrub! {|bytes| ... } ⇒ self

    Yields:

    Returns:

    • (self)


11362
11363
11364
11365
11366
11367
11368
11369
# File 'string.c', line 11362

static VALUE
str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    if (!NIL_P(new)) rb_str_replace(str, new);
    return str;
}

#setbyte(index, integer) ⇒ Integer

Sets the byte at zero-based index to integer; returns integer:

s = 'abcde'      # => "abcde"
s.setbyte(0, 98) # => 98
s                # => "bbcde"

Related: String#getbyte.

Returns:



6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
# File 'string.c', line 6160

static VALUE
rb_str_setbyte(VALUE str, VALUE index, VALUE value)
{
    long pos = NUM2LONG(index);
    long len = RSTRING_LEN(str);
    char *ptr, *head, *left = 0;
    rb_encoding *enc;
    int cr = ENC_CODERANGE_UNKNOWN, width, nlen;

    if (pos < -len || len <= pos)
        rb_raise(rb_eIndexError, "index %ld out of string", pos);
    if (pos < 0)
        pos += len;

    VALUE v = rb_to_int(value);
    VALUE w = rb_int_and(v, INT2FIX(0xff));
    char byte = (char)(NUM2INT(w) & 0xFF);

    if (!str_independent(str))
        str_make_independent(str);
    enc = STR_ENC_GET(str);
    head = RSTRING_PTR(str);
    ptr = &head[pos];
    if (!STR_EMBED_P(str)) {
        cr = ENC_CODERANGE(str);
        switch (cr) {
          case ENC_CODERANGE_7BIT:
            left = ptr;
            *ptr = byte;
            if (ISASCII(byte)) goto end;
            nlen = rb_enc_precise_mbclen(left, head+len, enc);
            if (!MBCLEN_CHARFOUND_P(nlen))
                ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
            else
                ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
            goto end;
          case ENC_CODERANGE_VALID:
            left = rb_enc_left_char_head(head, ptr, head+len, enc);
            width = rb_enc_precise_mbclen(left, head+len, enc);
            *ptr = byte;
            nlen = rb_enc_precise_mbclen(left, head+len, enc);
            if (!MBCLEN_CHARFOUND_P(nlen))
                ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
            else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte))
                ENC_CODERANGE_CLEAR(str);
            goto end;
        }
    }
    ENC_CODERANGE_CLEAR(str);
    *ptr = byte;

  end:
    return value;
}

#lengthInteger

:include: doc/string/length.rdoc

Returns:



2207
2208
2209
2210
2211
# File 'string.c', line 2207

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil

Returns the substring of self specified by the arguments. See examples at String Slices.

Overloads:

  • #[](index) ⇒ nil

    Returns:

    • (nil)
  • #[](start, length) ⇒ nil

    Returns:

    • (nil)
  • #[](range) ⇒ nil

    Returns:

    • (nil)
  • #[](regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #[](substring) ⇒ nil

    Returns:

    • (nil)


5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
# File 'string.c', line 5273

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            return rb_str_subpat(str, argv[0], argv[1]);
        }
        else {
            long beg = NUM2LONG(argv[0]);
            long len = NUM2LONG(argv[1]);
            return rb_str_substr(str, beg, len);
        }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#slice!(index) ⇒ nil #slice!(start, length) ⇒ nil #slice!(range) ⇒ nil #slice!(regexp, capture = 0) ⇒ nil #slice!(substring) ⇒ nil

Removes and returns the substring of self specified by the arguments. See String Slices.

A few examples:

string = "This is a string"
string.slice!(2)        #=> "i"
string.slice!(3..6)     #=> " is "
string.slice!(/s.*t/)   #=> "sa st"
string.slice!("r")      #=> "r"
string                  #=> "Thing"

Overloads:

  • #slice!(index) ⇒ nil

    Returns:

    • (nil)
  • #slice!(start, length) ⇒ nil

    Returns:

    • (nil)
  • #slice!(range) ⇒ nil

    Returns:

    • (nil)
  • #slice!(regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #slice!(substring) ⇒ nil

    Returns:

    • (nil)


5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
# File 'string.c', line 5582

static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE result = Qnil;
    VALUE indx;
    long beg, len = 1;
    char *p;

    rb_check_arity(argc, 1, 2);
    str_modify_keep_cr(str);
    indx = argv[0];
    if (RB_TYPE_P(indx, T_REGEXP)) {
        if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);
        int nth = 0;
        if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
            if ((nth += regs->num_regs) <= 0) return Qnil;
        }
        else if (nth >= regs->num_regs) return Qnil;
        beg = BEG(nth);
        len = END(nth) - beg;
        goto subseq;
    }
    else if (argc == 2) {
        beg = NUM2LONG(indx);
        len = NUM2LONG(argv[1]);
        goto num_index;
    }
    else if (FIXNUM_P(indx)) {
        beg = FIX2LONG(indx);
        if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
        if (!len) return Qnil;
        beg = p - RSTRING_PTR(str);
        goto subseq;
    }
    else if (RB_TYPE_P(indx, T_STRING)) {
        beg = rb_str_index(str, indx, 0);
        if (beg == -1) return Qnil;
        len = RSTRING_LEN(indx);
        result = str_duplicate(rb_cString, indx);
        goto squash;
    }
    else {
        switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) {
          case Qnil:
            return Qnil;
          case Qfalse:
            beg = NUM2LONG(indx);
            if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
            if (!len) return Qnil;
            beg = p - RSTRING_PTR(str);
            goto subseq;
          default:
            goto num_index;
        }
    }

  num_index:
    if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
    beg = p - RSTRING_PTR(str);

  subseq:
    result = rb_str_new(RSTRING_PTR(str)+beg, len);
    rb_enc_cr_str_copy_for_substr(result, str);

  squash:
    if (len > 0) {
        if (beg == 0) {
            rb_str_drop_bytes(str, len);
        }
        else {
            char *sptr = RSTRING_PTR(str);
            long slen = RSTRING_LEN(str);
            if (beg + len > slen) /* pathological check */
                len = slen - beg;
            memmove(sptr + beg,
                    sptr + beg + len,
                    slen - (beg + len));
            slen -= len;
            STR_SET_LEN(str, slen);
            TERM_FILL(&sptr[slen], TERM_LEN(str));
        }
    }
    return result;
}

#split(field_sep = $;, limit = nil) ⇒ Array #split(field_sep = $;, limit = nil) {|substring| ... } ⇒ self

:include: doc/string/split.rdoc

Overloads:

  • #split(field_sep = $;, limit = nil) ⇒ Array

    Returns:

  • #split(field_sep = $;, limit = nil) {|substring| ... } ⇒ self

    Yields:

    • (substring)

    Returns:

    • (self)


8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
# File 'string.c', line 8751

static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    VALUE spat;
    VALUE limit;
    split_type_t split_type;
    long beg, end, i = 0, empty_count = -1;
    int lim = 0;
    VALUE result, tmp;

    result = rb_block_given_p() ? Qfalse : Qnil;
    if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
        lim = NUM2INT(limit);
        if (lim <= 0) limit = Qnil;
        else if (lim == 1) {
            if (RSTRING_LEN(str) == 0)
                return result ? rb_ary_new2(0) : str;
            tmp = str_duplicate(rb_cString, str);
            if (!result) {
                rb_yield(tmp);
                return str;
            }
            return rb_ary_new3(1, tmp);
        }
        i = 1;
    }
    if (NIL_P(limit) && !lim) empty_count = 0;

    enc = STR_ENC_GET(str);
    split_type = SPLIT_TYPE_REGEXP;
    if (!NIL_P(spat)) {
        spat = get_pat_quoted(spat, 0);
    }
    else if (NIL_P(spat = rb_fs)) {
        split_type = SPLIT_TYPE_AWK;
    }
    else if (!(spat = rb_fs_check(spat))) {
        rb_raise(rb_eTypeError, "value of $; must be String or Regexp");
    }
    else {
        rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "$; is set to non-nil value");
    }
    if (split_type != SPLIT_TYPE_AWK) {
        switch (BUILTIN_TYPE(spat)) {
          case T_REGEXP:
            rb_reg_options(spat); /* check if uninitialized */
            tmp = RREGEXP_SRC(spat);
            split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
            if (split_type == SPLIT_TYPE_AWK) {
                spat = tmp;
                split_type = SPLIT_TYPE_STRING;
            }
            break;

          case T_STRING:
            mustnot_broken(spat);
            split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
            break;

          default:
            UNREACHABLE_RETURN(Qnil);
        }
    }

#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))

    beg = 0;
    char *ptr = RSTRING_PTR(str);
    char *eptr = RSTRING_END(str);
    if (split_type == SPLIT_TYPE_AWK) {
        char *bptr = ptr;
        int skip = 1;
        unsigned int c;

        if (result) result = rb_ary_new();
        end = beg;
        if (is_ascii_string(str)) {
            while (ptr < eptr) {
                c = (unsigned char)*ptr++;
                if (skip) {
                    if (ascii_isspace(c)) {
                        beg = ptr - bptr;
                    }
                    else {
                        end = ptr - bptr;
                        skip = 0;
                        if (!NIL_P(limit) && lim <= i) break;
                    }
                }
                else if (ascii_isspace(c)) {
                    SPLIT_STR(beg, end-beg);
                    skip = 1;
                    beg = ptr - bptr;
                    if (!NIL_P(limit)) ++i;
                }
                else {
                    end = ptr - bptr;
                }
            }
        }
        else {
            while (ptr < eptr) {
                int n;

                c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
                ptr += n;
                if (skip) {
                    if (rb_isspace(c)) {
                        beg = ptr - bptr;
                    }
                    else {
                        end = ptr - bptr;
                        skip = 0;
                        if (!NIL_P(limit) && lim <= i) break;
                    }
                }
                else if (rb_isspace(c)) {
                    SPLIT_STR(beg, end-beg);
                    skip = 1;
                    beg = ptr - bptr;
                    if (!NIL_P(limit)) ++i;
                }
                else {
                    end = ptr - bptr;
                }
            }
        }
    }
    else if (split_type == SPLIT_TYPE_STRING) {
        char *str_start = ptr;
        char *substr_start = ptr;
        char *sptr = RSTRING_PTR(spat);
        long slen = RSTRING_LEN(spat);

        if (result) result = rb_ary_new();
        mustnot_broken(str);
        enc = rb_enc_check(str, spat);
        while (ptr < eptr &&
               (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
            /* Check we are at the start of a char */
            char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
            if (t != ptr + end) {
                ptr = t;
                continue;
            }
            SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
            ptr += end + slen;
            substr_start = ptr;
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        beg = ptr - str_start;
    }
    else if (split_type == SPLIT_TYPE_CHARS) {
        char *str_start = ptr;
        int n;

        if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
        mustnot_broken(str);
        enc = rb_enc_get(str);
        while (ptr < eptr &&
               (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
            SPLIT_STR(ptr - str_start, n);
            ptr += n;
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        beg = ptr - str_start;
    }
    else {
        if (result) result = rb_ary_new();
        long len = RSTRING_LEN(str);
        long start = beg;
        long idx;
        int last_null = 0;
        struct re_registers *regs;
        VALUE match = 0;

        for (; rb_reg_search(spat, str, start, 0) >= 0;
             (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
            match = rb_backref_get();
            if (!result) rb_match_busy(match);
            regs = RMATCH_REGS(match);
            end = BEG(0);
            if (start == end && BEG(0) == END(0)) {
                if (!ptr) {
                    SPLIT_STR(0, 0);
                    break;
                }
                else if (last_null == 1) {
                    SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
                    beg = start;
                }
                else {
                    if (start == len)
                        start++;
                    else
                        start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
                    last_null = 1;
                    continue;
                }
            }
            else {
                SPLIT_STR(beg, end-beg);
                beg = start = END(0);
            }
            last_null = 0;

            for (idx=1; idx < regs->num_regs; idx++) {
                if (BEG(idx) == -1) continue;
                SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
            }
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        if (match) rb_match_unbusy(match);
    }
    if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
        SPLIT_STR(beg, RSTRING_LEN(str)-beg);
    }

    return result ? result : str;
}

#squeeze(*selectors) ⇒ Object

Returns a copy of self with characters specified by selectors “squeezed” (see Multiple Character Selectors):

“Squeezed” means that each multiple-character run of a selected character is squeezed down to a single character; with no arguments given, squeezes all characters:

"yellow moon".squeeze                  #=> "yelow mon"
"  now   is  the".squeeze(" ")         #=> " now is the"
"putters shoot balls".squeeze("m-z")   #=> "puters shot balls"


8511
8512
8513
8514
8515
8516
8517
# File 'string.c', line 8511

static VALUE
rb_str_squeeze(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_squeeze_bang(argc, argv, str);
    return str;
}

#squeeze!(*selectors) ⇒ self?

Like String#squeeze, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)


8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
# File 'string.c', line 8418

static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0;
    unsigned char *s, *send, *t;
    int i, modify = 0;
    int ascompat, singlebyte = single_byte_optimizable(str);
    unsigned int save;

    if (argc == 0) {
        enc = STR_ENC_GET(str);
    }
    else {
        for (i=0; i<argc; i++) {
            VALUE s = argv[i];

            StringValue(s);
            enc = rb_enc_check(str, s);
            if (singlebyte && !single_byte_optimizable(s))
                singlebyte = 0;
            tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
        }
    }

    str_modify_keep_cr(str);
    s = t = (unsigned char *)RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return Qnil;
    send = (unsigned char *)RSTRING_END(str);
    save = -1;
    ascompat = rb_enc_asciicompat(enc);

    if (singlebyte) {
        while (s < send) {
            unsigned int c = *s++;
            if (c != save || (argc > 0 && !squeez[c])) {
                *t++ = save = c;
            }
        }
    }
    else {
        while (s < send) {
            unsigned int c;
            int clen;

            if (ascompat && (c = *s) < 0x80) {
                if (c != save || (argc > 0 && !squeez[c])) {
                    *t++ = save = c;
                }
                s++;
            }
            else {
                c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);

                if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
                    if (t != s) rb_enc_mbcput(c, t, enc);
                    save = c;
                    t += clen;
                }
                s += clen;
            }
        }
    }

    TERM_FILL((char *)t, TERM_LEN(str));
    if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
        STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
        modify = 1;
    }

    if (modify) return str;
    return Qnil;
}

#start_with?(*string_or_regexp) ⇒ Boolean

:include: doc/string/start_with_p.rdoc

Returns:

  • (Boolean)


10642
10643
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670
10671
10672
# File 'string.c', line 10642

static VALUE
rb_str_start_with(int argc, VALUE *argv, VALUE str)
{
    int i;

    for (i=0; i<argc; i++) {
        VALUE tmp = argv[i];
        if (RB_TYPE_P(tmp, T_REGEXP)) {
            if (rb_reg_start_with_p(tmp, str))
                return Qtrue;
        }
        else {
            const char *p, *s, *e;
            long slen, tlen;
            rb_encoding *enc;

            StringValue(tmp);
            enc = rb_enc_check(str, tmp);
            if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
            if ((slen = RSTRING_LEN(str)) < tlen) continue;
            p = RSTRING_PTR(str);
            e = p + slen;
            s = p + tlen;
            if (!at_char_right_boundary(p, s, e, enc))
                continue;
            if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
                return Qtrue;
        }
    }
    return Qfalse;
}

#stripObject

Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s       # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.strip # => "abc"

Related: String#lstrip, String#rstrip.



10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
# File 'string.c', line 10011

static VALUE
rb_str_strip(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc = STR_ENC_GET(str);

    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, loffset, olen-loffset-roffset);
}

#strip!self?

Like String#strip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#lstrip!, String#strip!.

Returns:

  • (self, nil)


9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
# File 'string.c', line 9969

static VALUE
rb_str_strip_bang(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset > 0 || roffset > 0) {
        long len = olen-roffset;
        if (loffset > 0) {
            len -= loffset;
            memmove(start, start + loffset, len);
        }
        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#sub(pattern, replacement) ⇒ Object #sub(pattern) {|match| ... } ⇒ Object

Returns a copy of self with only the first occurrence (not all occurrences) of the given pattern replaced.

See Substitution Methods.

Related: String#sub!, String#gsub, String#gsub!.

Overloads:

  • #sub(pattern) {|match| ... } ⇒ Object

    Yields:



5878
5879
5880
5881
5882
5883
5884
# File 'string.c', line 5878

static VALUE
rb_str_sub(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_sub_bang(argc, argv, str);
    return str;
}

#sub!(pattern, replacement) ⇒ self? #sub!(pattern) {|match| ... } ⇒ self?

Returns self with only the first occurrence (not all occurrences) of the given pattern replaced.

See Substitution Methods.

Related: String#sub, String#gsub, String#gsub!.

Overloads:

  • #sub!(pattern, replacement) ⇒ self?

    Returns:

    • (self, nil)
  • #sub!(pattern) {|match| ... } ⇒ self?

    Yields:

    Returns:

    • (self, nil)


5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
# File 'string.c', line 5753

static VALUE
rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE pat, repl, hash = Qnil;
    int iter = 0;
    long plen;
    int min_arity = rb_block_given_p() ? 1 : 2;
    long beg;

    rb_check_arity(argc, min_arity, 2);
    if (argc == 1) {
        iter = 1;
    }
    else {
        repl = argv[1];
        hash = rb_check_hash_type(argv[1]);
        if (NIL_P(hash)) {
            StringValue(repl);
        }
    }

    pat = get_pat_quoted(argv[0], 1);

    str_modifiable(str);
    beg = rb_pat_search(pat, str, 0, 1);
    if (beg >= 0) {
        rb_encoding *enc;
        int cr = ENC_CODERANGE(str);
        long beg0, end0;
        VALUE match, match0 = Qnil;
        struct re_registers *regs;
        char *p, *rp;
        long len, rlen;

        match = rb_backref_get();
        regs = RMATCH_REGS(match);
        if (RB_TYPE_P(pat, T_STRING)) {
            beg0 = beg;
            end0 = beg0 + RSTRING_LEN(pat);
            match0 = pat;
        }
        else {
            beg0 = BEG(0);
            end0 = END(0);
            if (iter) match0 = rb_reg_nth_match(0, match);
        }

        if (iter || !NIL_P(hash)) {
            p = RSTRING_PTR(str); len = RSTRING_LEN(str);

            if (iter) {
                repl = rb_obj_as_string(rb_yield(match0));
            }
            else {
                repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
                repl = rb_obj_as_string(repl);
            }
            str_mod_check(str, p, len);
            rb_check_frozen(str);
        }
        else {
            repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat);
        }

        enc = rb_enc_compatible(str, repl);
        if (!enc) {
            rb_encoding *str_enc = STR_ENC_GET(str);
            p = RSTRING_PTR(str); len = RSTRING_LEN(str);
            if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
                coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
                rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
                         rb_enc_name(str_enc),
                         rb_enc_name(STR_ENC_GET(repl)));
            }
            enc = STR_ENC_GET(repl);
        }
        rb_str_modify(str);
        rb_enc_associate(str, enc);
        if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
            int cr2 = ENC_CODERANGE(repl);
            if (cr2 == ENC_CODERANGE_BROKEN ||
                (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
                cr = ENC_CODERANGE_UNKNOWN;
            else
                cr = cr2;
        }
        plen = end0 - beg0;
        rlen = RSTRING_LEN(repl);
        len = RSTRING_LEN(str);
        if (rlen > plen) {
            RESIZE_CAPA(str, len + rlen - plen);
        }
        p = RSTRING_PTR(str);
        if (rlen != plen) {
            memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
        }
        rp = RSTRING_PTR(repl);
        memmove(p + beg0, rp, rlen);
        len += rlen - plen;
        STR_SET_LEN(str, len);
        TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
        ENC_CODERANGE_SET(str, cr);

        RB_GC_GUARD(match);

        return str;
    }
    return Qnil;
}

#succString

Returns the successor to self. The successor is calculated by incrementing characters.

The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:

'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'

The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:

'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"

The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:

'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"

The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:

s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"

Carrying can occur between and among mixtures of alphanumeric characters:

s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"

The successor to an empty String is a new empty String:

''.succ # => ""

Returns:



4862
4863
4864
4865
4866
4867
4868
4869
# File 'string.c', line 4862

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!self

Equivalent to String#succ, but modifies self in place; returns self.

Returns:

  • (self)


4966
4967
4968
4969
4970
4971
4972
# File 'string.c', line 4966

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#sum(n = 16) ⇒ Integer

:include: doc/string/sum.rdoc

Returns:



10362
10363
10364
10365
10366
10367
10368
10369
10370
10371
10372
10373
10374
10375
10376
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388
10389
10390
10391
10392
10393
10394
10395
10396
10397
10398
10399
10400
10401
10402
10403
10404
10405
10406
10407
10408
10409
10410
10411
10412
10413
# File 'string.c', line 10362

static VALUE
rb_str_sum(int argc, VALUE *argv, VALUE str)
{
    int bits = 16;
    char *ptr, *p, *pend;
    long len;
    VALUE sum = INT2FIX(0);
    unsigned long sum0 = 0;

    if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) {
        bits = 0;
    }
    ptr = p = RSTRING_PTR(str);
    len = RSTRING_LEN(str);
    pend = p + len;

    while (p < pend) {
        if (FIXNUM_MAX - UCHAR_MAX < sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            str_mod_check(str, ptr, len);
            sum0 = 0;
        }
        sum0 += (unsigned char)*p;
        p++;
    }

    if (bits == 0) {
        if (sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
        }
    }
    else {
        if (sum == INT2FIX(0)) {
            if (bits < (int)sizeof(long)*CHAR_BIT) {
                sum0 &= (((unsigned long)1)<<bits)-1;
            }
            sum = LONG2FIX(sum0);
        }
        else {
            VALUE mod;

            if (sum0) {
                sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            }

            mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits));
            mod = rb_funcall(mod, '-', 1, INT2FIX(1));
            sum = rb_funcall(sum, '&', 1, mod);
        }
    }
    return sum;
}

#swapcase(*options) ⇒ String

Returns a string containing the characters in self, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:

s = 'Hello World!' # => "Hello World!"
s.swapcase         # => "hELLO wORLD!"

The casing may be affected by the given options; see Case Mapping.

Related: String#swapcase!.

Returns:



7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
# File 'string.c', line 7832

static VALUE
rb_str_swapcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str_duplicate(rb_cString, str);
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#swapcase!(*options) ⇒ self?

Upcases each lowercase character in self; downcases uppercase character; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.swapcase!        # => "hELLO wORLD!"
s                  # => "hELLO wORLD!"
''.swapcase!       # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#swapcase.

Returns:

  • (self, nil)


7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
# File 'string.c', line 7795

static VALUE
rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#to_cObject

Returns self interpreted as a Complex object; leading whitespace and trailing garbage are ignored:

'9'.to_c                 # => (9+0i)
'2.5'.to_c               # => (2.5+0i)
'2.5/1'.to_c             # => ((5/2)+0i)
'-3/2'.to_c              # => ((-3/2)+0i)
'-i'.to_c                # => (0-1i)
'45i'.to_c               # => (0+45i)
'3-4i'.to_c              # => (3-4i)
'-4e2-4e-2i'.to_c        # => (-400.0-0.04i)
'-0.0-0.0i'.to_c         # => (-0.0-0.0i)
'1/2+3/4i'.to_c          # => ((1/2)+(3/4)*i)
'1.0@0'.to_c             # => (1+0.0i)
"1.0@#{Math::PI/2}".to_c # => (0.0+1i)
"1.0@#{Math::PI}".to_c   # => (-1+0.0i)

Returns Complex zero if the string cannot be converted:

'ruby'.to_c        # => (0+0i)

See Kernel#Complex.



2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
# File 'complex.c', line 2260

static VALUE
string_to_c(VALUE self)
{
    VALUE num;

    rb_must_asciicompat(self);

    (void)parse_comp(rb_str_fill_terminator(self, 1), FALSE, &num);

    return num;
}

#to_fFloat

Returns the result of interpreting leading characters in self as a Float:

'3.14159'.to_f  # => 3.14159
'1.234e-2'.to_f # => 0.01234

Characters past a leading valid number (in the given base) are ignored:

'3.14 (pi to two places)'.to_f # => 3.14

Returns zero if there is no leading valid number:

'abcdef'.to_f # => 0.0

Returns:



6627
6628
6629
6630
6631
# File 'string.c', line 6627

static VALUE
rb_str_to_f(VALUE str)
{
    return DBL2NUM(rb_str_to_dbl(str, FALSE));
}

#to_i(base = 10) ⇒ Integer

Returns the result of interpreting leading characters in self as an integer in the given base (which must be in (0, 2..36)):

'123456'.to_i     # => 123456
'123def'.to_i(16) # => 1195503

With base zero, string object may contain leading characters to specify the actual base:

'123def'.to_i(0)   # => 123
'0123def'.to_i(0)  # => 83
'0b123def'.to_i(0) # => 1
'0o123def'.to_i(0) # => 83
'0d123def'.to_i(0) # => 123
'0x123def'.to_i(0) # => 1195503

Characters past a leading valid number (in the given base) are ignored:

'12.345'.to_i   # => 12
'12345'.to_i(2) # => 1

Returns zero if there is no leading valid number:

'abcdef'.to_i # => 0
'2'.to_i(2)   # => 0

Returns:



6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
# File 'string.c', line 6596

static VALUE
rb_str_to_i(int argc, VALUE *argv, VALUE str)
{
    int base = 10;

    if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) {
        rb_raise(rb_eArgError, "invalid radix %d", base);
    }
    return rb_str_to_inum(str, base, FALSE);
}

#to_rObject

Returns the result of interpreting leading characters in str as a rational. Leading whitespace and extraneous characters past the end of a valid number are ignored. Digit sequences can be separated by an underscore. If there is not a valid number at the start of str, zero is returned. This method never raises an exception.

'  2  '.to_r       #=> (2/1)
'300/2'.to_r       #=> (150/1)
'-9.2'.to_r        #=> (-46/5)
'-9.2e2'.to_r      #=> (-920/1)
'1_234_567'.to_r   #=> (1234567/1)
'21 June 09'.to_r  #=> (21/1)
'21/06/09'.to_r    #=> (7/2)
'BWV 1079'.to_r    #=> (0/1)

NOTE: “0.3”.to_r isn’t the same as 0.3.to_r. The former is equivalent to “3/10”.to_r, but the latter isn’t so.

"0.3".to_r == 3/10r  #=> true
0.3.to_r   == 3/10r  #=> false

See also Kernel#Rational.



2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
# File 'rational.c', line 2524

static VALUE
string_to_r(VALUE self)
{
    VALUE num;

    rb_must_asciicompat(self);

    num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE);

    if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num))
        rb_raise(rb_eFloatDomainError, "Infinity");
    return num;
}

#to_sself, String

Returns self if self is a String, or self converted to a String if self is a subclass of String.

Returns:



6642
6643
6644
6645
6646
6647
6648
6649
# File 'string.c', line 6642

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
        return str_duplicate(rb_cString, str);
    }
    return str;
}

#to_sself, String

Returns self if self is a String, or self converted to a String if self is a subclass of String.

Returns:



6642
6643
6644
6645
6646
6647
6648
6649
# File 'string.c', line 6642

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
        return str_duplicate(rb_cString, str);
    }
    return str;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"


860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
# File 'symbol.c', line 860

VALUE
rb_str_intern(VALUE str)
{
    VALUE sym;

    GLOBAL_SYMBOLS_ENTER(symbols);
    {
        sym = lookup_str_sym_with_lock(symbols, str);

        if (sym) {
            // ok
        }
        else if (USE_SYMBOL_GC) {
            rb_encoding *enc = rb_enc_get(str);
            rb_encoding *ascii = rb_usascii_encoding();
            if (enc != ascii && sym_check_asciionly(str, false)) {
                str = rb_str_dup(str);
                rb_enc_associate(str, ascii);
                OBJ_FREEZE(str);
                enc = ascii;
            }
            else {
                str = rb_str_dup(str);
                OBJ_FREEZE(str);
            }
            str = rb_fstring(str);
            int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
            if (type < 0) type = ID_JUNK;
            sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
        }
        else {
            ID id = intern_str(str, 0);
            sym = ID2SYM(id);
        }
    }
    GLOBAL_SYMBOLS_LEAVE();
    return sym;
}

#tr(selector, replacements) ⇒ Object

Returns a copy of self with each character specified by string selector translated to the corresponding character in string replacements. The correspondence is positional:

  • Each occurrence of the first character specified by selector is translated to the first character in replacements.

  • Each occurrence of the second character specified by selector is translated to the second character in replacements.

  • And so on.

Example:

'hello'.tr('el', 'ip') #=> "hippo"

If replacements is shorter than selector, it is implicitly padded with its own last character:

'hello'.tr('aeiou', '-')   # => "h-ll-"
'hello'.tr('aeiou', 'AA-') # => "hAll-"

Arguments selector and replacements must be valid character selectors (see Character Selectors), and may use any of its valid forms, including negation, ranges, and escaping:

# Negation.
'hello'.tr('^aeiou', '-') # => "-e--o"
# Ranges.
'ibm'.tr('b-z', 'a-z') # => "hal"
# Escapes.
'hel^lo'.tr('\^aeiou', '-')     # => "h-l-l-"    # Escaped leading caret.
'i-b-m'.tr('b\-z', 'a-z')       # => "ibabm"     # Escaped embedded hyphen.
'foo\\bar'.tr('ab\\', 'XYZ')    # => "fooZYXr"   # Escaped backslash.


8220
8221
8222
8223
8224
8225
8226
# File 'string.c', line 8220

static VALUE
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
    str = str_duplicate(rb_cString, str);
    tr_trans(str, src, repl, 0);
    return str;
}

#tr!(selector, replacements) ⇒ self?

Like String#tr, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)


8174
8175
8176
8177
8178
# File 'string.c', line 8174

static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 0);
}

#tr_s(selector, replacements) ⇒ String

Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).

'hello'.tr_s('l', 'r')   #=> "hero"
'hello'.tr_s('el', '-')  #=> "h-o"
'hello'.tr_s('el', 'hx') #=> "hhxo"

Related: String#squeeze.

Returns:



8552
8553
8554
8555
8556
8557
8558
# File 'string.c', line 8552

static VALUE
rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
{
    str = str_duplicate(rb_cString, str);
    tr_trans(str, src, repl, 1);
    return str;
}

#tr_s!(selector, replacements) ⇒ self?

Like String#tr_s, but modifies self in place. Returns self if any changes were made, nil otherwise.

Related: String#squeeze!.

Returns:

  • (self, nil)


8530
8531
8532
8533
8534
# File 'string.c', line 8530

static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 1);
}

#undumpString

Returns an unescaped version of self:

s_orig = "\f\x00\xff\\\""    # => "\f\u0000\xFF\\\""
s_dumped = s_orig.dump       # => "\"\\f\\x00\\xFF\\\\\\\"\""
s_undumped = s_dumped.undump # => "\f\u0000\xFF\\\""
s_undumped == s_orig         # => true

Related: String#dump (inverse of String#undump).

Returns:



7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
# File 'string.c', line 7190

static VALUE
str_undump(VALUE str)
{
    const char *s = RSTRING_PTR(str);
    const char *s_end = RSTRING_END(str);
    rb_encoding *enc = rb_enc_get(str);
    VALUE undumped = rb_enc_str_new(s, 0L, enc);
    bool utf8 = false;
    bool binary = false;
    int w;

    rb_must_asciicompat(str);
    if (rb_str_is_ascii_only_p(str) == Qfalse) {
        rb_raise(rb_eRuntimeError, "non-ASCII character detected");
    }
    if (!str_null_check(str, &w)) {
        rb_raise(rb_eRuntimeError, "string contains null byte");
    }
    if (RSTRING_LEN(str) < 2) goto invalid_format;
    if (*s != '"') goto invalid_format;

    /* strip '"' at the start */
    s++;

    for (;;) {
        if (s >= s_end) {
            rb_raise(rb_eRuntimeError, "unterminated dumped string");
        }

        if (*s == '"') {
            /* epilogue */
            s++;
            if (s == s_end) {
                /* ascii compatible dumped string */
                break;
            }
            else {
                static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
                static const char dup_suffix[] = ".dup";
                const char *encname;
                int encidx;
                ptrdiff_t size;

                /* check separately for strings dumped by older versions */
                size = sizeof(dup_suffix) - 1;
                if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;

                size = sizeof(force_encoding_suffix) - 1;
                if (s_end - s <= size) goto invalid_format;
                if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
                s += size;

                if (utf8) {
                    rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
                }

                encname = s;
                s = memchr(s, '"', s_end-s);
                size = s - encname;
                if (!s) goto invalid_format;
                if (s_end - s != 2) goto invalid_format;
                if (s[0] != '"' || s[1] != ')') goto invalid_format;

                encidx = rb_enc_find_index2(encname, (long)size);
                if (encidx < 0) {
                    rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
                }
                rb_enc_associate_index(undumped, encidx);
            }
            break;
        }

        if (*s == '\\') {
            s++;
            if (s >= s_end) {
                rb_raise(rb_eRuntimeError, "invalid escape");
            }
            undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
        }
        else {
            rb_str_cat(undumped, s++, 1);
        }
    }

    RB_GC_GUARD(str);

    return undumped;
invalid_format:
    rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}

#unicode_normalize(form = :nfc) ⇒ String

Returns a copy of self with Unicode normalization applied.

Argument form must be one of the following symbols (see Unicode normalization forms):

  • :nfc: Canonical decomposition, followed by canonical composition.

  • :nfd: Canonical decomposition.

  • :nfkc: Compatibility decomposition, followed by canonical composition.

  • :nfkd: Compatibility decomposition.

The encoding of self must be one of:

  • Encoding::UTF_8

  • Encoding::UTF_16BE

  • Encoding::UTF_16LE

  • Encoding::UTF_32BE

  • Encoding::UTF_32LE

  • Encoding::GB18030

  • Encoding::UCS_2BE

  • Encoding::UCS_4BE

Examples:

"a\u0300".unicode_normalize      # => "a"
"\u00E0".unicode_normalize(:nfd) # => "a "

Related: String#unicode_normalize!, String#unicode_normalized?.

Returns:



11423
11424
11425
11426
11427
# File 'string.c', line 11423

static VALUE
rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalize);
}

#unicode_normalize!(form = :nfc) ⇒ self

Like String#unicode_normalize, except that the normalization is performed on self.

Related String#unicode_normalized?.

Returns:

  • (self)


11439
11440
11441
11442
11443
# File 'string.c', line 11439

static VALUE
rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
{
    return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
}

#unicode_normalized?(form = :nfc) ⇒ Boolean

Returns true if self is in the given form of Unicode normalization, false otherwise. The form must be one of :nfc, :nfd, :nfkc, or :nfkd.

Examples:

"a\u0300".unicode_normalized?       # => false
"a\u0300".unicode_normalized?(:nfd) # => true
"\u00E0".unicode_normalized?        # => true
"\u00E0".unicode_normalized?(:nfd)  # => false

Raises an exception if self is not in a Unicode encoding:

s = "\xE0".force_encoding('ISO-8859-1')
s.unicode_normalized? # Raises Encoding::CompatibilityError.

Related: String#unicode_normalize, String#unicode_normalize!.

Returns:

  • (Boolean)


11468
11469
11470
11471
11472
# File 'string.c', line 11468

static VALUE
rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalized_p);
}

#upcase(*options) ⇒ String

Returns a string containing the upcased characters in self:

s = 'Hello World!' # => "Hello World!"
s.upcase           # => "HELLO WORLD!"

The casing may be affected by the given options; see Case Mapping.

Related: String#upcase!, String#downcase, String#downcase!.

Returns:



7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
# File 'string.c', line 7568

static VALUE
rb_str_upcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy_direct(ret, str);
        upcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#upcase!(*options) ⇒ self?

Upcases the characters in self; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.upcase!          # => "HELLO WORLD!"
s                  # => "HELLO WORLD!"
s.upcase!          # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#upcase, String#downcase, String#downcase!.

Returns:

  • (self, nil)


7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
# File 'string.c', line 7529

static VALUE
rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (upcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#upto(other_string, exclusive = false) {|string| ... } ⇒ self #upto(other_string, exclusive = false) ⇒ Object

With a block given, calls the block with each String value returned by successive calls to String#succ; the first value is self, the next is self.succ, and so on; the sequence terminates when value other_string is reached; returns self:

'a8'.upto('b6') {|s| print s, ' ' } # => "a8"

Output:

a8 a9 b0 b1 b2 b3 b4 b5 b6

If argument exclusive is given as a truthy object, the last value is omitted:

'a8'.upto('b6', true) {|s| print s, ' ' } # => "a8"

Output:

a8 a9 b0 b1 b2 b3 b4 b5

If other_string would not be reached, does not call the block:

'25'.upto('5') {|s| fail s }
'aa'.upto('a') {|s| fail s }

With no block given, returns a new Enumerator:

'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")>

Overloads:

  • #upto(other_string, exclusive = false) {|string| ... } ⇒ self

    Yields:

    • (string)

    Returns:

    • (self)


5026
5027
5028
5029
5030
5031
5032
5033
5034
# File 'string.c', line 5026

static VALUE
rb_str_upto(int argc, VALUE *argv, VALUE beg)
{
    VALUE end, exclusive;

    rb_scan_args(argc, argv, "11", &end, &exclusive);
    RETURN_ENUMERATOR(beg, argc, argv);
    return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);
}

#valid_encoding?Boolean

Returns true if self is encoded correctly, false otherwise:

"\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
"\xc2".force_encoding("UTF-8").valid_encoding?     # => false
"\x80".force_encoding("UTF-8").valid_encoding?     # => false

Returns:

  • (Boolean)


10990
10991
10992
10993
10994
10995
10996
# File 'string.c', line 10990

static VALUE
rb_str_valid_encoding_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return RBOOL(cr != ENC_CODERANGE_BROKEN);
}