Class: String

Inherits:
Object show all
Includes:
Comparable
Defined in:
string.c

Direct Known Subclasses

Warning::buffer

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Comparable

#<, #<=, #>, #>=, #between?, #clamp

Constructor Details

#new(string = '', **opts) ⇒ Object

:include: doc/string/new.rdoc


1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
# File 'string.c', line 1983

static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
    static ID keyword_ids[2];
    VALUE orig, opt, venc, vcapa;
    VALUE kwargs[2];
    rb_encoding *enc = 0;
    int n;

    if (!keyword_ids[0]) {
        keyword_ids[0] = rb_id_encoding();
        CONST_ID(keyword_ids[1], "capacity");
    }

    n = rb_scan_args(argc, argv, "01:", &orig, &opt);
    if (!NIL_P(opt)) {
        rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
        venc = kwargs[0];
        vcapa = kwargs[1];
        if (!UNDEF_P(venc) && !NIL_P(venc)) {
            enc = rb_to_encoding(venc);
        }
        if (!UNDEF_P(vcapa) && !NIL_P(vcapa)) {
            long capa = NUM2LONG(vcapa);
            long len = 0;
            int termlen = enc ? rb_enc_mbminlen(enc) : 1;

            if (capa < STR_BUF_MIN_SIZE) {
                capa = STR_BUF_MIN_SIZE;
            }
            if (n == 1) {
                StringValue(orig);
                len = RSTRING_LEN(orig);
                if (capa < len) {
                    capa = len;
                }
                if (orig == str) n = 0;
            }
            str_modifiable(str);
            if (STR_EMBED_P(str) || FL_TEST(str, STR_SHARED|STR_NOFREE)) {
                /* make noembed always */
                const size_t size = (size_t)capa + termlen;
                const char *const old_ptr = RSTRING_PTR(str);
                const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
                char *new_ptr = ALLOC_N(char, size);
                if (STR_EMBED_P(str)) RUBY_ASSERT((long)osize <= str_embed_capa(str));
                memcpy(new_ptr, old_ptr, osize < size ? osize : size);
                FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
                RSTRING(str)->as.heap.ptr = new_ptr;
            }
            else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) {
                SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
                        (size_t)capa + termlen, STR_HEAP_SIZE(str));
            }
            STR_SET_LEN(str, len);
            TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
            if (n == 1) {
                memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
                rb_enc_cr_str_exact_copy(str, orig);
            }
            FL_SET(str, STR_NOEMBED);
            RSTRING(str)->as.heap.aux.capa = capa;
        }
        else if (n == 1) {
            rb_str_replace(str, orig);
        }
        if (enc) {
            rb_enc_associate(str, enc);
            ENC_CODERANGE_CLEAR(str);
        }
    }
    else if (n == 1) {
        rb_str_replace(str, orig);
    }
    return str;
}

Class Method Details

.new(*args) ⇒ Object

:nodoc:


2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
# File 'string.c', line 2061

static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
    if (klass != rb_cString) {
        return rb_class_new_instance_pass_kw(argc, argv, klass);
    }

    static ID keyword_ids[2];
    VALUE orig, opt, encoding = Qnil, capacity = Qnil;
    VALUE kwargs[2];
    rb_encoding *enc = NULL;

    int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
    if (NIL_P(opt)) {
        return rb_class_new_instance_pass_kw(argc, argv, klass);
    }

    keyword_ids[0] = rb_id_encoding();
    CONST_ID(keyword_ids[1], "capacity");
    rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
    encoding = kwargs[0];
    capacity = kwargs[1];

    if (n == 1) {
        orig = StringValue(orig);
    }
    else {
        orig = Qnil;
    }

    if (UNDEF_P(encoding)) {
        if (!NIL_P(orig)) {
            encoding = rb_obj_encoding(orig);
        }
    }

    if (!UNDEF_P(encoding)) {
        enc = rb_to_encoding(encoding);
    }

    // If capacity is nil, we're basically just duping `orig`.
    if (UNDEF_P(capacity)) {
        if (NIL_P(orig)) {
            VALUE empty_str = str_new(klass, "", 0);
            if (enc) {
                rb_enc_associate(empty_str, enc);
            }
            return empty_str;
        }
        VALUE copy = str_duplicate(klass, orig);
        rb_enc_associate(copy, enc);
        ENC_CODERANGE_CLEAR(copy);
        return copy;
    }

    long capa = 0;
    capa = NUM2LONG(capacity);
    if (capa < 0) {
        capa = 0;
    }

    if (!NIL_P(orig)) {
        long orig_capa = rb_str_capacity(orig);
        if (orig_capa > capa) {
            capa = orig_capa;
        }
    }

    VALUE str = str_enc_new(klass, NULL, capa, enc);
    STR_SET_LEN(str, 0);
    TERM_FILL(RSTRING_PTR(str), enc ? rb_enc_mbmaxlen(enc) : 1);

    if (!NIL_P(orig)) {
        rb_str_buf_append(str, orig);
    }

    return str;
}

.try_convert(object) ⇒ Object?

If object is a String object, returns object.

Otherwise if object responds to :to_str, calls object.to_str and returns the result.

Returns nil if object does not respond to :to_str.

Raises an exception unless object.to_str returns a String object.

Returns:


2873
2874
2875
2876
2877
# File 'string.c', line 2873

static VALUE
rb_str_s_try_convert(VALUE dummy, VALUE str)
{
    return rb_check_string_type(str);
}

Instance Method Details

#%(object) ⇒ Object

Returns the result of formatting object into the format specification self (see Kernel#sprintf for formatting details):

"%05d" % 123 # => "00123"

If self contains multiple substitutions, object must be an Array or Hash containing the values to be substituted:

"%-5s: %016x" % [ "ID", self.object_id ] # => "ID   : 00002b054ec93168"
"foo = %{foo}" % {foo: 'bar'} # => "foo = bar"
"foo = %{foo}, baz = %{baz}" % {foo: 'bar', baz: 'bat'} # => "foo = bar, baz = bat"

2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
# File 'string.c', line 2551

static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
    VALUE tmp = rb_check_array_type(arg);

    if (!NIL_P(tmp)) {
        return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
    }
    return rb_str_format(1, &arg, str);
}

#*(integer) ⇒ Object

Returns a new String containing integer copies of self:

"Ho! " * 3 # => "Ho! Ho! Ho! "
"Ho! " * 0 # => ""

2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
# File 'string.c', line 2475

VALUE
rb_str_times(VALUE str, VALUE times)
{
    VALUE str2;
    long n, len;
    char *ptr2;
    int termlen;

    if (times == INT2FIX(1)) {
        return str_duplicate(rb_cString, str);
    }
    if (times == INT2FIX(0)) {
        str2 = str_alloc_embed(rb_cString, 0);
        rb_enc_copy(str2, str);
        return str2;
    }
    len = NUM2LONG(times);
    if (len < 0) {
        rb_raise(rb_eArgError, "negative argument");
    }
    if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
        if (STR_EMBEDDABLE_P(len, 1)) {
            str2 = str_alloc_embed(rb_cString, len + 1);
            memset(RSTRING_PTR(str2), 0, len + 1);
        }
        else {
            str2 = str_alloc_heap(rb_cString);
            RSTRING(str2)->as.heap.aux.capa = len;
            RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
        }
        STR_SET_LEN(str2, len);
        rb_enc_copy(str2, str);
        return str2;
    }
    if (len && LONG_MAX/len <  RSTRING_LEN(str)) {
        rb_raise(rb_eArgError, "argument too big");
    }

    len *= RSTRING_LEN(str);
    termlen = TERM_LEN(str);
    str2 = str_enc_new(rb_cString, 0, len, STR_ENC_GET(str));
    ptr2 = RSTRING_PTR(str2);
    if (len) {
        n = RSTRING_LEN(str);
        memcpy(ptr2, RSTRING_PTR(str), n);
        while (n <= len/2) {
            memcpy(ptr2 + n, ptr2, n);
            n *= 2;
        }
        memcpy(ptr2 + n, ptr2, len-n);
    }
    STR_SET_LEN(str2, len);
    TERM_FILL(&ptr2[len], termlen);
    rb_enc_cr_str_copy_for_substr(str2, str);

    return str2;
}

#+(other_string) ⇒ Object

Returns a new String containing other_string concatenated to self:

"Hello from " + self.to_s # => "Hello from main"

2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
# File 'string.c', line 2403

VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
    VALUE str3;
    rb_encoding *enc;
    char *ptr1, *ptr2, *ptr3;
    long len1, len2;
    int termlen;

    StringValue(str2);
    enc = rb_enc_check_str(str1, str2);
    RSTRING_GETMEM(str1, ptr1, len1);
    RSTRING_GETMEM(str2, ptr2, len2);
    termlen = rb_enc_mbminlen(enc);
    if (len1 > LONG_MAX - len2) {
        rb_raise(rb_eArgError, "string size too big");
    }
    str3 = str_enc_new(rb_cString, 0, len1+len2, enc);
    ptr3 = RSTRING_PTR(str3);
    memcpy(ptr3, ptr1, len1);
    memcpy(ptr3+len1, ptr2, len2);
    TERM_FILL(&ptr3[len1+len2], termlen);

    ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
                           ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
    RB_GC_GUARD(str1);
    RB_GC_GUARD(str2);
    return str3;
}

#+self

Returns self if self is not frozen and can be mutated without warning issuance.

Otherwise returns self.dup, which is not frozen.

Returns:

  • (self)

3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
# File 'string.c', line 3200

static VALUE
str_uplus(VALUE str)
{
    if (OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
        return rb_str_dup(str);
    }
    else {
        return str;
    }
}

#-Object #dedupObject Also known as: dedup

Returns a frozen, possibly pre-existing copy of the string.

The returned String will be deduplicated as long as it does not have any instance variables set on it and is not a String subclass.

Note that -string variant is more convenient for defining constants:

FILENAME = -'config/database.yml'

while dedup is better suitable for using the method in chains of calculations:

@url_list.concat(urls.map(&:dedup))

3232
3233
3234
3235
3236
3237
3238
3239
# File 'string.c', line 3232

static VALUE
str_uminus(VALUE str)
{
    if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) {
        str = rb_str_dup(str);
    }
    return rb_fstring(str);
}

#<<(object) ⇒ String

Concatenates object to self and returns self:

s = 'foo'
s << 'bar' # => "foobar"
s          # => "foobar"

If object is an Integer, the value is considered a codepoint and converted to a character before concatenation:

s = 'foo'
s << 33 # => "foo!"

If that codepoint is not representable in the encoding of string, RangeError is raised.

s = 'foo'
s.encoding              # => <Encoding:UTF-8>
s << 0x00110000         # 1114112 out of char range (RangeError)
s = 'foo'.encode('EUC-JP')
s << 0x00800080         # invalid codepoint 0x800080 in EUC-JP (RangeError)

If the encoding is US-ASCII and the codepoint is 0..0xff, string is automatically promoted to ASCII-8BIT.

s = 'foo'.encode('US-ASCII')
s << 0xff
s.encoding              # => #<Encoding:BINARY (ASCII-8BIT)>

Related: String#concat, which takes multiple arguments.

Returns:


3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
# File 'string.c', line 3921

VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
    unsigned int code;
    rb_encoding *enc = STR_ENC_GET(str1);
    int encidx;

    if (RB_INTEGER_TYPE_P(str2)) {
        if (rb_num_to_uint(str2, &code) == 0) {
        }
        else if (FIXNUM_P(str2)) {
            rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
        }
        else {
            rb_raise(rb_eRangeError, "bignum out of char range");
        }
    }
    else {
        return rb_str_append(str1, str2);
    }

    encidx = rb_ascii8bit_appendable_encoding_index(enc, code);

    if (encidx >= 0) {
        rb_str_buf_cat_byte(str1, (unsigned char)code);
    }
    else {
        long pos = RSTRING_LEN(str1);
        int cr = ENC_CODERANGE(str1);
        int len;
        char *buf;

        switch (len = rb_enc_codelen(code, enc)) {
          case ONIGERR_INVALID_CODE_POINT_VALUE:
            rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
            break;
          case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
          case 0:
            rb_raise(rb_eRangeError, "%u out of char range", code);
            break;
        }
        buf = ALLOCA_N(char, len + 1);
        rb_enc_mbcput(code, buf, enc);
        if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
            rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
        }
        rb_str_resize(str1, pos+len);
        memcpy(RSTRING_PTR(str1) + pos, buf, len);
        if (cr == ENC_CODERANGE_7BIT && code > 127) {
            cr = ENC_CODERANGE_VALID;
        }
        else if (cr == ENC_CODERANGE_BROKEN) {
            cr = ENC_CODERANGE_UNKNOWN;
        }
        ENC_CODERANGE_SET(str1, cr);
    }
    return str1;
}

#<=>(other_string) ⇒ -1, ...

Compares self and other_string, returning:

  • -1 if other_string is larger.

  • 0 if the two are equal.

  • 1 if other_string is smaller.

  • nil if the two are incomparable.

Examples:

'foo' <=> 'foo' # => 0
'foo' <=> 'food' # => -1
'food' <=> 'foo' # => 1
'FOO' <=> 'foo' # => -1
'foo' <=> 'FOO' # => 1
'foo' <=> 1 # => nil

Returns:

  • (-1, 0, 1, nil)

4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
# File 'string.c', line 4211

static VALUE
rb_str_cmp_m(VALUE str1, VALUE str2)
{
    int result;
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return rb_invcmp(str1, str2);
    }
    result = rb_str_cmp(str1, s);
    return INT2FIX(result);
}

#==(object) ⇒ Boolean #===(object) ⇒ Boolean

Returns true if object has the same length and content; as self; false otherwise:

s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false

Returns false if the two strings’ encodings are not compatible:

"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false

If object is not an instance of String but responds to to_str, then the two strings are compared using object.==.

Overloads:

  • #==(object) ⇒ Boolean

    Returns:

    • (Boolean)
  • #===(object) ⇒ Boolean

    Returns:

    • (Boolean)

4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
# File 'string.c', line 4150

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
        if (!rb_respond_to(str2, idTo_str)) {
            return Qfalse;
        }
        return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#==(object) ⇒ Boolean #===(object) ⇒ Boolean

Returns true if object has the same length and content; as self; false otherwise:

s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false

Returns false if the two strings’ encodings are not compatible:

"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false

If object is not an instance of String but responds to to_str, then the two strings are compared using object.==.

Overloads:

  • #==(object) ⇒ Boolean

    Returns:

    • (Boolean)
  • #===(object) ⇒ Boolean

    Returns:

    • (Boolean)

4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
# File 'string.c', line 4150

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
        if (!rb_respond_to(str2, idTo_str)) {
            return Qfalse;
        }
        return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#=~(regexp) ⇒ Integer? #=~(object) ⇒ Integer?

Returns the Integer index of the first substring that matches the given regexp, or nil if no match found:

'foo' =~ /f/ # => 0
'foo' =~ /o/ # => 1
'foo' =~ /x/ # => nil

Note: also updates Regexp@Global+Variables.

If the given object is not a Regexp, returns the value returned by object =~ self.

Note that string =~ regexp is different from regexp =~ string (see Regexp#=~):

number= nil
"no. 9" =~ /(?<number>\d+)/
number # => nil (not assigned)
/(?<number>\d+)/ =~ "no. 9"
number #=> "9"

Overloads:


4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
# File 'string.c', line 4926

static VALUE
rb_str_match(VALUE x, VALUE y)
{
    switch (OBJ_BUILTIN_TYPE(y)) {
      case T_STRING:
        rb_raise(rb_eTypeError, "type mismatch: String given");

      case T_REGEXP:
        return rb_reg_match(y, x);

      default:
        return rb_funcall(y, idEqTilde, 1, x);
    }
}

#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil

Returns the substring of self specified by the arguments. See examples at String Slices.

Overloads:

  • #[](index) ⇒ nil

    Returns:

    • (nil)
  • #[](start, length) ⇒ nil

    Returns:

    • (nil)
  • #[](range) ⇒ nil

    Returns:

    • (nil)
  • #[](regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #[](substring) ⇒ nil

    Returns:

    • (nil)

5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
# File 'string.c', line 5684

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            return rb_str_subpat(str, argv[0], argv[1]);
        }
        else {
            return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
        }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#[]=(index) ⇒ Object #[]=(start, length) ⇒ Object #[]=(range) ⇒ Object #[]=(regexp, capture = 0) ⇒ Object #[]=(substring) ⇒ Object

Replaces all, some, or none of the contents of self; returns new_string. See String Slices.

A few examples:

s = 'foo'
s[2] = 'rtune'     # => "rtune"
s                  # => "fortune"
s[1, 5] = 'init'   # => "init"
s                  # => "finite"
s[3..4] = 'al'     # => "al"
s                  # => "finale"
s[/e$/] = 'ly'     # => "ly"
s                  # => "finally"
s['lly'] = 'ncial' # => "ncial"
s                  # => "financial"

5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
# File 'string.c', line 5920

static VALUE
rb_str_aset_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 3) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
        }
        else {
            rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
        }
        return argv[2];
    }
    rb_check_arity(argc, 2, 3);
    return rb_str_aset(str, argv[0], argv[1]);
}

#append_as_bytes(*objects) ⇒ String

Concatenates each object in objects into self without any encoding validation or conversion and returns self:

s = 'foo'
s.append_as_bytes(" \xE2\x82")  # => "foo \xE2\x82"
s.valid_encoding?               # => false
s.append_as_bytes("\xAC 12")
s.valid_encoding?               # => true

For each given object object that is an Integer, the value is considered a Byte. If the Integer is bigger than one byte, only the lower byte is considered, similar to String#setbyte:

s = ""
s.append_as_bytes(0, 257)             # =>  "\u0000\u0001"

Related: String#<<, String#concat, which do an encoding aware concatenation.

Returns:


3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
# File 'string.c', line 3775

VALUE
rb_str_append_as_bytes(int argc, VALUE *argv, VALUE str)
{
    long needed_capacity = 0;
    volatile VALUE t0;
    enum ruby_value_type *types = ALLOCV_N(enum ruby_value_type, t0, argc);

    for (int index = 0; index < argc; index++) {
        VALUE obj = argv[index];
        enum ruby_value_type type = types[index] = rb_type(obj);
        switch (type) {
          case T_FIXNUM:
          case T_BIGNUM:
            needed_capacity++;
            break;
          case T_STRING:
            needed_capacity += RSTRING_LEN(obj);
            break;
          default:
            rb_raise(
                rb_eTypeError,
                "wrong argument type %"PRIsVALUE" (expected String or Integer)",
                rb_obj_class(obj)
            );
            break;
        }
    }

    str_ensure_available_capa(str, needed_capacity);
    char *sptr = RSTRING_END(str);

    for (int index = 0; index < argc; index++) {
        VALUE obj = argv[index];
        enum ruby_value_type type = types[index];
        switch (type) {
          case T_FIXNUM:
          case T_BIGNUM: {
            argv[index] = obj = rb_int_and(obj, INT2FIX(0xff));
            char byte = (char)(NUM2INT(obj) & 0xFF);
            *sptr = byte;
            sptr++;
            break;
          }
          case T_STRING: {
            const char *ptr;
            long len;
            RSTRING_GETMEM(obj, ptr, len);
            memcpy(sptr, ptr, len);
            sptr += len;
            break;
          }
          default:
            rb_bug("append_as_bytes arguments should have been validated");
        }
    }

    STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
    TERM_FILL(sptr, TERM_LEN(str)); /* sentinel */

    int cr = ENC_CODERANGE(str);
    switch (cr) {
      case ENC_CODERANGE_7BIT: {
        for (int index = 0; index < argc; index++) {
            VALUE obj = argv[index];
            enum ruby_value_type type = types[index];
            switch (type) {
              case T_FIXNUM:
              case T_BIGNUM: {
                if (!ISASCII(NUM2INT(obj))) {
                    goto clear_cr;
                }
                break;
              }
              case T_STRING: {
                if (ENC_CODERANGE(obj) != ENC_CODERANGE_7BIT) {
                    goto clear_cr;
                }
                break;
              }
              default:
                rb_bug("append_as_bytes arguments should have been validated");
            }
        }
        break;
      }
      case ENC_CODERANGE_VALID:
        if (ENCODING_GET_INLINED(str) == ENCINDEX_ASCII_8BIT) {
            goto keep_cr;
        }
        else {
            goto clear_cr;
        }
        break;
      default:
        goto clear_cr;
        break;
    }

    RB_GC_GUARD(t0);

  clear_cr:
    // If no fast path was hit, we clear the coderange.
    // append_as_bytes is predominently meant to be used in
    // buffering situation, hence it's likely the coderange
    // will never be scanned, so it's not worth spending time
    // precomputing the coderange except for simple and common
    // situations.
    ENC_CODERANGE_CLEAR(str);
  keep_cr:
    return str;
}

#ascii_only?Boolean

Returns true if self contains only ASCII characters, false otherwise:

'abc'.ascii_only?         # => true
"abc\u{6666}".ascii_only? # => false

Returns:

  • (Boolean)

11444
11445
11446
11447
11448
11449
11450
# File 'string.c', line 11444

static VALUE
rb_str_is_ascii_only_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return RBOOL(cr == ENC_CODERANGE_7BIT);
}

#bString

:include: doc/string/b.rdoc

Returns:


11380
11381
11382
11383
11384
11385
11386
11387
11388
11389
11390
11391
11392
11393
11394
11395
11396
11397
11398
11399
11400
11401
11402
11403
11404
11405
11406
11407
11408
11409
11410
11411
# File 'string.c', line 11380

static VALUE
rb_str_b(VALUE str)
{
    VALUE str2;
    if (STR_EMBED_P(str)) {
        str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
    }
    else {
        str2 = str_alloc_heap(rb_cString);
    }
    str_replace_shared_without_enc(str2, str);

    if (rb_enc_asciicompat(STR_ENC_GET(str))) {
        // BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
        // If we know the receiver's code range then we know the result's code range.
        int cr = ENC_CODERANGE(str);
        switch (cr) {
          case ENC_CODERANGE_7BIT:
            ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
            break;
          case ENC_CODERANGE_BROKEN:
          case ENC_CODERANGE_VALID:
            ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
            break;
          default:
            ENC_CODERANGE_CLEAR(str2);
            break;
        }
    }

    return str2;
}

#byteindex(substring, offset = 0) ⇒ Integer? #byteindex(regexp, offset = 0) ⇒ Integer?

Returns the Integer byte-based index of the first occurrence of the given substring, or nil if none found:

'foo'.byteindex('f') # => 0
'foo'.byteindex('o') # => 1
'foo'.byteindex('oo') # => 1
'foo'.byteindex('ooo') # => nil

Returns the Integer byte-based index of the first match for the given Regexp regexp, or nil if none found:

'foo'.byteindex(/f/) # => 0
'foo'.byteindex(/o/) # => 1
'foo'.byteindex(/oo/) # => 1
'foo'.byteindex(/ooo/) # => nil

Integer argument offset, if given, specifies the byte-based position in the string to begin the search:

'foo'.byteindex('o', 1) # => 1
'foo'.byteindex('o', 2) # => 2
'foo'.byteindex('o', 3) # => nil

If offset is negative, counts backward from the end of self:

'foo'.byteindex('o', -1) # => 2
'foo'.byteindex('o', -2) # => 1
'foo'.byteindex('o', -3) # => 1
'foo'.byteindex('o', -4) # => nil

If offset does not land on character (codepoint) boundary, IndexError is raised.

Related: String#index, String#byterindex.

Overloads:

  • #byteindex(substring, offset = 0) ⇒ Integer?

    Returns:

  • #byteindex(regexp, offset = 0) ⇒ Integer?

    Returns:


4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
# File 'string.c', line 4545

static VALUE
rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    long pos;

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        long slen = RSTRING_LEN(str);
        pos = NUM2LONG(initpos);
        if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
    }
    else {
        pos = 0;
    }

    str_ensure_byte_pos(str, pos);

    if (RB_TYPE_P(sub, T_REGEXP)) {
        if (rb_reg_search(sub, str, pos, 0) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = BEG(0);
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_byteindex(str, sub, pos);
        if (pos >= 0) return LONG2NUM(pos);
    }
    return Qnil;
}

#byterindex(substring, offset = self.bytesize) ⇒ Integer? #byterindex(regexp, offset = self.bytesize) ⇒ Integer?

Returns the Integer byte-based index of the last occurrence of the given substring, or nil if none found:

'foo'.byterindex('f') # => 0
'foo'.byterindex('o') # => 2
'foo'.byterindex('oo') # => 1
'foo'.byterindex('ooo') # => nil

Returns the Integer byte-based index of the last match for the given Regexp regexp, or nil if none found:

'foo'.byterindex(/f/) # => 0
'foo'.byterindex(/o/) # => 2
'foo'.byterindex(/oo/) # => 1
'foo'.byterindex(/ooo/) # => nil

The last match means starting at the possible last position, not the last of longest matches.

'foo'.byterindex(/o+/) # => 2
$~ #=> #<MatchData "o">

To get the last longest match, needs to combine with negative lookbehind.

'foo'.byterindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">

Or String#byteindex with negative lookforward.

'foo'.byteindex(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">

Integer argument offset, if given and non-negative, specifies the maximum starting byte-based position in the string to end the search:

'foo'.byterindex('o', 0) # => nil
'foo'.byterindex('o', 1) # => 1
'foo'.byterindex('o', 2) # => 2
'foo'.byterindex('o', 3) # => 2

If offset is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset:

'foo'.byterindex('o', -1) # => 2
'foo'.byterindex('o', -2) # => 1
'foo'.byterindex('o', -3) # => nil
'foo'.byterindex('o', -4) # => nil

If offset does not land on character (codepoint) boundary, IndexError is raised.

Related: String#byteindex.

Overloads:

  • #byterindex(substring, offset = self.bytesize) ⇒ Integer?

    Returns:

  • #byterindex(regexp, offset = self.bytesize) ⇒ Integer?

    Returns:


4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
# File 'string.c', line 4859

static VALUE
rb_str_byterindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    long pos, len = RSTRING_LEN(str);

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        pos = NUM2LONG(initpos);
        if (pos < 0 && (pos += len) < 0) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
        if (pos > len) pos = len;
    }
    else {
        pos = len;
    }

    str_ensure_byte_pos(str, pos);

    if (RB_TYPE_P(sub, T_REGEXP)) {
        if (rb_reg_search(sub, str, pos, 1) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = BEG(0);
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_byterindex(str, sub, pos);
        if (pos >= 0) return LONG2NUM(pos);
    }
    return Qnil;
}

#bytesObject

:include: doc/string/bytes.rdoc


9678
9679
9680
9681
9682
9683
# File 'string.c', line 9678

static VALUE
rb_str_bytes(VALUE str)
{
    VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str));
    return rb_str_enumerate_bytes(str, ary);
}

#bytesizeInteger

:include: doc/string/bytesize.rdoc

Returns:


2369
2370
2371
2372
2373
# File 'string.c', line 2369

VALUE
rb_str_bytesize(VALUE str)
{
    return LONG2NUM(RSTRING_LEN(str));
}

#byteslice(index, length = 1) ⇒ String? #byteslice(range) ⇒ String?

Returns a substring of self, or nil if the substring cannot be constructed.

With integer arguments index and length given, returns the substring beginning at the given index of the given length (if possible), or nil if length is negative or index falls outside of self:

s = '0123456789' # => "0123456789"
s.byteslice(2)   # => "2"
s.byteslice(200) # => nil
s.byteslice(4, 3)  # => "456"
s.byteslice(4, 30) # => "456789"
s.byteslice(4, -1) # => nil
s.byteslice(40, 2) # => nil

In either case above, counts backwards from the end of self if index is negative:

s = '0123456789'   # => "0123456789"
s.byteslice(-4)    # => "6"
s.byteslice(-4, 3) # => "678"

With Range argument range given, returns byteslice(range.begin, range.size):

s = '0123456789'    # => "0123456789"
s.byteslice(4..6)   # => "456"
s.byteslice(-6..-4) # => "456"
s.byteslice(5..2)   # => "" # range.size is zero.
s.byteslice(40..42) # => nil

In all cases, a returned string has the same encoding as self:

s.encoding              # => #<Encoding:UTF-8>
s.byteslice(4).encoding # => #<Encoding:UTF-8>

Overloads:


6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
# File 'string.c', line 6740

static VALUE
rb_str_byteslice(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        long beg = NUM2LONG(argv[0]);
        long len = NUM2LONG(argv[1]);
        return str_byte_substr(str, beg, len, TRUE);
    }
    rb_check_arity(argc, 1, 2);
    return str_byte_aref(str, argv[0]);
}

#bytesplice(index, length, str) ⇒ String #bytesplice(index, length, str, str_index, str_length) ⇒ String #bytesplice(range, str) ⇒ String #bytesplice(range, str, str_range) ⇒ String

Replaces some or all of the content of self with str, and returns self. The portion of the string affected is determined using the same criteria as String#byteslice, except that length cannot be omitted. If the replacement string is not the same length as the text it is replacing, the string will be adjusted accordingly.

If str_index and str_length, or str_range are given, the content of self is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); however the substring of str is not allocated as a new string.

The form that take an Integer will raise an IndexError if the value is out of range; the Range form will raise a RangeError. If the beginning or ending offset does not land on character (codepoint) boundary, an IndexError will be raised.

Overloads:

  • #bytesplice(index, length, str) ⇒ String

    Returns:

  • #bytesplice(index, length, str, str_index, str_length) ⇒ String

    Returns:

  • #bytesplice(range, str) ⇒ String

    Returns:

  • #bytesplice(range, str, str_range) ⇒ String

    Returns:


6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
# File 'string.c', line 6796

static VALUE
rb_str_bytesplice(int argc, VALUE *argv, VALUE str)
{
    long beg, len, vbeg, vlen;
    VALUE val;
    int cr;

    rb_check_arity(argc, 2, 5);
    if (!(argc == 2 || argc == 3 || argc == 5)) {
        rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
    }
    if (argc == 2 || (argc == 3 && !RB_INTEGER_TYPE_P(argv[0]))) {
        if (!rb_range_beg_len(argv[0], &beg, &len, RSTRING_LEN(str), 2)) {
            rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
                     rb_builtin_class_name(argv[0]));
        }
        val = argv[1];
        StringValue(val);
        if (argc == 2) {
            /* bytesplice(range, str) */
            vbeg = 0;
            vlen = RSTRING_LEN(val);
        }
        else {
            /* bytesplice(range, str, str_range) */
            if (!rb_range_beg_len(argv[2], &vbeg, &vlen, RSTRING_LEN(val), 2)) {
                rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
                         rb_builtin_class_name(argv[2]));
            }
        }
    }
    else {
        beg = NUM2LONG(argv[0]);
        len = NUM2LONG(argv[1]);
        val = argv[2];
        StringValue(val);
        if (argc == 3) {
            /* bytesplice(index, length, str) */
            vbeg = 0;
            vlen = RSTRING_LEN(val);
        }
        else {
            /* bytesplice(index, length, str, str_index, str_length) */
            vbeg = NUM2LONG(argv[3]);
            vlen = NUM2LONG(argv[4]);
        }
    }
    str_check_beg_len(str, &beg, &len);
    str_check_beg_len(val, &vbeg, &vlen);
    str_modify_keep_cr(str);

    if (RB_UNLIKELY(ENCODING_GET_INLINED(str) != ENCODING_GET_INLINED(val))) {
        rb_enc_associate(str, rb_enc_check(str, val));
    }

    rb_str_update_1(str, beg, len, val, vbeg, vlen);
    cr = ENC_CODERANGE_AND(ENC_CODERANGE(str), ENC_CODERANGE(val));
    if (cr != ENC_CODERANGE_BROKEN)
        ENC_CODERANGE_SET(str, cr);
    return str;
}

#capitalize(*options) ⇒ String

Returns a string containing the characters in self; the first character is upcased; the remaining characters are downcased:

s = 'hello World!' # => "hello World!"
s.capitalize       # => "Hello world!"

The casing may be affected by the given options; see Case Mapping.

Related: String#capitalize!.

Returns:


8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
# File 'string.c', line 8173

static VALUE
rb_str_capitalize(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#capitalize!(*options) ⇒ self?

Upcases the first character in self; downcases the remaining characters; returns self if any changes were made, nil otherwise:

s = 'hello World!' # => "hello World!"
s.capitalize!      # => "Hello world!"
s                  # => "Hello world!"
s.capitalize!      # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#capitalize.

Returns:

  • (self, nil)

8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
# File 'string.c', line 8135

static VALUE
rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#casecmp(other_string) ⇒ -1, ...

Compares self.downcase and other_string.downcase; returns:

  • -1 if other_string.downcase is larger.

  • 0 if the two are equal.

  • 1 if other_string.downcase is smaller.

  • nil if the two are incomparable.

Examples:

'foo'.casecmp('foo') # => 0
'foo'.casecmp('food') # => -1
'food'.casecmp('foo') # => 1
'FOO'.casecmp('foo') # => 0
'foo'.casecmp('FOO') # => 0
'foo'.casecmp(1) # => nil

See Case Mapping.

Related: String#casecmp?.

Returns:

  • (-1, 0, 1, nil)

4252
4253
4254
4255
4256
4257
4258
4259
4260
# File 'string.c', line 4252

static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return Qnil;
    }
    return str_casecmp(str1, s);
}

#casecmp?(other_string) ⇒ true, ...

Returns true if self and other_string are equal after Unicode case folding, otherwise false:

'foo'.casecmp?('foo') # => true
'foo'.casecmp?('food') # => false
'food'.casecmp?('foo') # => false
'FOO'.casecmp?('foo') # => true
'foo'.casecmp?('FOO') # => true

Returns nil if the two values are incomparable:

'foo'.casecmp?(1) # => nil

See Case Mapping.

Related: String#casecmp.

Returns:

  • (true, false, nil)

4342
4343
4344
4345
4346
4347
4348
4349
4350
# File 'string.c', line 4342

static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
        return Qnil;
    }
    return str_casecmp_p(str1, s);
}

#center(size, pad_string = ' ') ⇒ Object

:include: doc/string/center.rdoc

Related: String#ljust, String#rjust.


10985
10986
10987
10988
10989
# File 'string.c', line 10985

static VALUE
rb_str_center(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'c');
}

#charsObject

:include: doc/string/chars.rdoc


9747
9748
9749
9750
9751
9752
# File 'string.c', line 9747

static VALUE
rb_str_chars(VALUE str)
{
    VALUE ary = WANTARRAY("chars", rb_str_strlen(str));
    return rb_str_enumerate_chars(str, ary);
}

#chomp(line_sep = $/) ⇒ Object

:include: doc/string/chomp.rdoc


10211
10212
10213
10214
10215
10216
10217
# File 'string.c', line 10211

static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
    VALUE rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, 0, chompped_length(str, rs));
}

#chomp!(line_sep = $/) ⇒ self?

Like String#chomp, but modifies self in place; returns nil if no modification made, self otherwise.

Returns:

  • (self, nil)

10191
10192
10193
10194
10195
10196
10197
10198
10199
10200
# File 'string.c', line 10191

static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE rs;
    str_modifiable(str);
    if (RSTRING_LEN(str) == 0 && argc < 2) return Qnil;
    rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return Qnil;
    return rb_str_chomp_string(str, rs);
}

#chopObject

:include: doc/string/chop.rdoc


10038
10039
10040
10041
10042
# File 'string.c', line 10038

static VALUE
rb_str_chop(VALUE str)
{
    return rb_str_subseq(str, 0, chopped_length(str));
}

#chop!self?

Like String#chop, but modifies self in place; returns nil if self is empty, self otherwise.

Related: String#chomp!.

Returns:

  • (self, nil)

10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
# File 'string.c', line 10012

static VALUE
rb_str_chop_bang(VALUE str)
{
    str_modify_keep_cr(str);
    if (RSTRING_LEN(str) > 0) {
        long len;
        len = chopped_length(str);
        STR_SET_LEN(str, len);
        TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
        if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
            ENC_CODERANGE_CLEAR(str);
        }
        return str;
    }
    return Qnil;
}

#chrString

Returns a string containing the first character of self:

s = 'foo' # => "foo"
s.chr     # => "f"

Returns:


6526
6527
6528
6529
6530
# File 'string.c', line 6526

static VALUE
rb_str_chr(VALUE str)
{
    return rb_str_substr(str, 0, 1);
}

#clearself

Removes the contents of self:

s = 'foo' # => "foo"
s.clear   # => ""

Returns:

  • (self)

6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
# File 'string.c', line 6501

static VALUE
rb_str_clear(VALUE str)
{
    str_discard(str);
    STR_SET_EMBED(str);
    STR_SET_LEN(str, 0);
    RSTRING_PTR(str)[0] = 0;
    if (rb_enc_asciicompat(STR_ENC_GET(str)))
        ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
    else
        ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
    return str;
}

#codepointsObject

:include: doc/string/codepoints.rdoc


9807
9808
9809
9810
9811
9812
# File 'string.c', line 9807

static VALUE
rb_str_codepoints(VALUE str)
{
    VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str));
    return rb_str_enumerate_codepoints(str, ary);
}

#concat(*objects) ⇒ String

Concatenates each object in objects to self and returns self:

s = 'foo'
s.concat('bar', 'baz') # => "foobarbaz"
s                      # => "foobarbaz"

For each given object object that is an Integer, the value is considered a codepoint and converted to a character before concatenation:

s = 'foo'
s.concat(32, 'bar', 32, 'baz') # => "foo bar baz"

Related: String#<<, which takes a single argument.

Returns:


3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
# File 'string.c', line 3731

static VALUE
rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
        return rb_str_concat(str, argv[0]);
    }
    else if (argc > 1) {
        int i;
        VALUE arg_str = rb_str_tmp_new(0);
        rb_enc_copy(arg_str, str);
        for (i = 0; i < argc; i++) {
            rb_str_concat(arg_str, argv[i]);
        }
        rb_str_buf_append(str, arg_str);
    }

    return str;
}

#count(*selectors) ⇒ Integer

Returns the total number of characters in self that are specified by the given selectors (see Multiple Character Selectors):

a = "hello world"
a.count "lo"                   #=> 5
a.count "lo", "o"              #=> 2
a.count "hello", "^l"          #=> 4
a.count "ej-m"                 #=> 4

"hello^world".count "\\^aeiou" #=> 4
"hello-world".count "a\\-eo"   #=> 4

c = "hello world\\r\\n"
c.count "\\"                   #=> 2
c.count "\\A"                  #=> 0
c.count "X-\\w"                #=> 3

Returns:


9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
# File 'string.c', line 9018

static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
    char table[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0, tstr;
    char *s, *send;
    int i;
    int ascompat;
    size_t n = 0;

    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);

    tstr = argv[0];
    StringValue(tstr);
    enc = rb_enc_check(str, tstr);
    if (argc == 1) {
        const char *ptstr;
        if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
            (ptstr = RSTRING_PTR(tstr),
             ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
            !is_broken_string(str)) {
            int clen;
            unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);

            s = RSTRING_PTR(str);
            if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
            send = RSTRING_END(str);
            while (s < send) {
                if (*(unsigned char*)s++ == c) n++;
            }
            return SIZET2NUM(n);
        }
    }

    tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
    for (i=1; i<argc; i++) {
        tstr = argv[i];
        StringValue(tstr);
        enc = rb_enc_check(str, tstr);
        tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
    }

    s = RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
    send = RSTRING_END(str);
    ascompat = rb_enc_asciicompat(enc);
    while (s < send) {
        unsigned int c;

        if (ascompat && (c = *(unsigned char*)s) < 0x80) {
            if (table[c]) {
                n++;
            }
            s++;
        }
        else {
            int clen;
            c = rb_enc_codepoint_len(s, send, &clen, enc);
            if (tr_find(c, table, del, nodel)) {
                n++;
            }
            s += clen;
        }
    }

    return SIZET2NUM(n);
}

#crypt(salt_str) ⇒ Object

Returns the string generated by calling crypt(3) standard library function with str and salt_str, in this order, as its arguments. Please do not use this method any longer. It is legacy; provided only for backward compatibility with ruby scripts in earlier days. It is bad to use in contemporary programs for several reasons:

  • Behaviour of C’s crypt(3) depends on the OS it is run. The generated string lacks data portability.

  • On some OSes such as Mac OS, crypt(3) never fails (i.e. silently ends up in unexpected results).

  • On some OSes such as Mac OS, crypt(3) is not thread safe.

  • So-called “traditional” usage of crypt(3) is very very very weak. According to its manpage, Linux’s traditional crypt(3) output has only 2**56 variations; too easy to brute force today. And this is the default behaviour.

  • In order to make things robust some OSes implement so-called “modular” usage. To go through, you have to do a complex build-up of the salt_str parameter, by hand. Failure in generation of a proper salt string tends not to yield any errors; typos in parameters are normally not detectable.

    • For instance, in the following example, the second invocation of String#crypt is wrong; it has a typo in “round=” (lacks “s”). However the call does not fail and something unexpected is generated.

      "foo".crypt("$5$rounds=1000$salt$") # OK, proper usage
      "foo".crypt("$5$round=1000$salt$")  # Typo not detected
      
  • Even in the “modular” mode, some hash functions are considered archaic and no longer recommended at all; for instance module $1$ is officially abandoned by its author: see phk.freebsd.dk/sagas/md5crypt_eol/ . For another instance module $3$ is considered completely broken: see the manpage of FreeBSD.

  • On some OS such as Mac OS, there is no modular mode. Yet, as written above, crypt(3) on Mac OS never fails. This means even if you build up a proper salt string it generates a traditional DES hash anyways, and there is no way for you to be aware of.

    "foo".crypt("$5$rounds=1000$salt$") # => "$5fNPQMxC5j6."
    

If for some reason you cannot migrate to other secure contemporary password hashing algorithms, install the string-crypt gem and require 'string/crypt' to continue using it.


10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764
10765
10766
10767
10768
10769
# File 'string.c', line 10715

static VALUE
rb_str_crypt(VALUE str, VALUE salt)
{
#ifdef HAVE_CRYPT_R
    VALUE databuf;
    struct crypt_data *data;
#   define CRYPT_END() ALLOCV_END(databuf)
#else
    extern char *crypt(const char *, const char *);
#   define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
#endif
    VALUE result;
    const char *s, *saltp;
    char *res;
#ifdef BROKEN_CRYPT
    char salt_8bit_clean[3];
#endif

    StringValue(salt);
    mustnot_wchar(str);
    mustnot_wchar(salt);
    s = StringValueCStr(str);
    saltp = RSTRING_PTR(salt);
    if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
        rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
    }

#ifdef BROKEN_CRYPT
    if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
        salt_8bit_clean[0] = saltp[0] & 0x7f;
        salt_8bit_clean[1] = saltp[1] & 0x7f;
        salt_8bit_clean[2] = '\0';
        saltp = salt_8bit_clean;
    }
#endif
#ifdef HAVE_CRYPT_R
    data = ALLOCV(databuf, sizeof(struct crypt_data));
# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
    data->initialized = 0;
# endif
    res = crypt_r(s, saltp, data);
#else
    crypt_mutex_initialize();
    rb_nativethread_lock_lock(&crypt_mutex.lock);
    res = crypt(s, saltp);
#endif
    if (!res) {
        int err = errno;
        CRYPT_END();
        rb_syserr_fail(err, "crypt");
    }
    result = rb_str_new_cstr(res);
    CRYPT_END();
    return result;
}

#delete(*selectors) ⇒ Object

Returns a copy of self with characters specified by selectors removed (see Multiple Character Selectors):

"hello".delete "l","lo"        #=> "heo"
"hello".delete "lo"            #=> "he"
"hello".delete "aeiou", "^e"   #=> "hell"
"hello".delete "ej-m"          #=> "ho"

8835
8836
8837
8838
8839
8840
8841
# File 'string.c', line 8835

static VALUE
rb_str_delete(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_delete_bang(argc, argv, str);
    return str;
}

#delete!(*selectors) ⇒ self?

Like String#delete, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)

8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
# File 'string.c', line 8759

static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    char *s, *send, *t;
    VALUE del = 0, nodel = 0;
    int modify = 0;
    int i, ascompat, cr;

    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
    for (i=0; i<argc; i++) {
        VALUE s = argv[i];

        StringValue(s);
        enc = rb_enc_check(str, s);
        tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
    }

    str_modify_keep_cr(str);
    ascompat = rb_enc_asciicompat(enc);
    s = t = RSTRING_PTR(str);
    send = RSTRING_END(str);
    cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
    while (s < send) {
        unsigned int c;
        int clen;

        if (ascompat && (c = *(unsigned char*)s) < 0x80) {
            if (squeez[c]) {
                modify = 1;
            }
            else {
                if (t != s) *t = c;
                t++;
            }
            s++;
        }
        else {
            c = rb_enc_codepoint_len(s, send, &clen, enc);

            if (tr_find(c, squeez, del, nodel)) {
                modify = 1;
            }
            else {
                if (t != s) rb_enc_mbcput(c, t, enc);
                t += clen;
                if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
            }
            s += clen;
        }
    }
    TERM_FILL(t, TERM_LEN(str));
    STR_SET_LEN(str, t - RSTRING_PTR(str));
    ENC_CODERANGE_SET(str, cr);

    if (modify) return str;
    return Qnil;
}

#delete_prefix(prefix) ⇒ Object

:include: doc/string/delete_prefix.rdoc


11220
11221
11222
11223
11224
11225
11226
11227
11228
11229
# File 'string.c', line 11220

static VALUE
rb_str_delete_prefix(VALUE str, VALUE prefix)
{
    long prefixlen;

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return str_duplicate(rb_cString, str);

    return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
}

#delete_prefix!(prefix) ⇒ self?

Like String#delete_prefix, except that self is modified in place. Returns self if the prefix is removed, nil otherwise.

Returns:

  • (self, nil)

11200
11201
11202
11203
11204
11205
11206
11207
11208
11209
11210
# File 'string.c', line 11200

static VALUE
rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
{
    long prefixlen;
    str_modify_keep_cr(str);

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return Qnil;

    return rb_str_drop_bytes(str, prefixlen);
}

#delete_suffix(suffix) ⇒ Object

:include: doc/string/delete_suffix.rdoc


11303
11304
11305
11306
11307
11308
11309
11310
11311
11312
# File 'string.c', line 11303

static VALUE
rb_str_delete_suffix(VALUE str, VALUE suffix)
{
    long suffixlen;

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return str_duplicate(rb_cString, str);

    return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
}

#delete_suffix!(suffix) ⇒ self?

Like String#delete_suffix, except that self is modified in place. Returns self if the suffix is removed, nil otherwise.

Returns:

  • (self, nil)

11275
11276
11277
11278
11279
11280
11281
11282
11283
11284
11285
11286
11287
11288
11289
11290
11291
11292
11293
# File 'string.c', line 11275

static VALUE
rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
{
    long olen, suffixlen, len;
    str_modifiable(str);

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return Qnil;

    olen = RSTRING_LEN(str);
    str_modify_keep_cr(str);
    len = olen - suffixlen;
    STR_SET_LEN(str, len);
    TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
    if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
        ENC_CODERANGE_CLEAR(str);
    }
    return str;
}

#downcase(*options) ⇒ String

Returns a string containing the downcased characters in self:

s = 'Hello World!' # => "Hello World!"
s.downcase         # => "hello world!"

The casing may be affected by the given options; see Case Mapping.

Related: String#downcase!, String#upcase, String#upcase!.

Returns:


8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
# File 'string.c', line 8089

static VALUE
rb_str_downcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy_direct(ret, str);
        downcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#downcase!(*options) ⇒ self?

Downcases the characters in self; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.downcase!        # => "hello world!"
s                  # => "hello world!"
s.downcase!        # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#downcase, String#upcase, String#upcase!.

Returns:

  • (self, nil)

8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
# File 'string.c', line 8050

static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (downcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#dumpString

Returns a printable version of self, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:

"hello \n ''".dump    # => "\"hello \\n ''\""
"\f\x00\xff\\\"".dump # => "\"\\f\\x00\\xFF\\\\\\\"\""

Related: String#undump (inverse of String#dump).

Returns:


7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
# File 'string.c', line 7314

VALUE
rb_str_dump(VALUE str)
{
    int encidx = rb_enc_get_index(str);
    rb_encoding *enc = rb_enc_from_index(encidx);
    long len;
    const char *p, *pend;
    char *q, *qend;
    VALUE result;
    int u8 = (encidx == rb_utf8_encindex());
    static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")";

    len = 2;			/* "" */
    if (!rb_enc_asciicompat(enc)) {
        len += strlen(nonascii_suffix) - rb_strlen_lit("%s");
        len += strlen(enc->name);
    }

    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    while (p < pend) {
        int clen;
        unsigned char c = *p++;

        switch (c) {
          case '"':  case '\\':
          case '\n': case '\r':
          case '\t': case '\f':
          case '\013': case '\010': case '\007': case '\033':
            clen = 2;
            break;

          case '#':
            clen = IS_EVSTR(p, pend) ? 2 : 1;
            break;

          default:
            if (ISPRINT(c)) {
                clen = 1;
            }
            else {
                if (u8 && c > 0x7F) {	/* \u notation */
                    int n = rb_enc_precise_mbclen(p-1, pend, enc);
                    if (MBCLEN_CHARFOUND_P(n)) {
                        unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
                        if (cc <= 0xFFFF)
                            clen = 6;  /* \uXXXX */
                        else if (cc <= 0xFFFFF)
                            clen = 9;  /* \u{XXXXX} */
                        else
                            clen = 10; /* \u{XXXXXX} */
                        p += MBCLEN_CHARFOUND_LEN(n)-1;
                        break;
                    }
                }
                clen = 4;	/* \xNN */
            }
            break;
        }

        if (clen > LONG_MAX - len) {
            rb_raise(rb_eRuntimeError, "string size too big");
        }
        len += clen;
    }

    result = rb_str_new(0, len);
    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    q = RSTRING_PTR(result); qend = q + len + 1;

    *q++ = '"';
    while (p < pend) {
        unsigned char c = *p++;

        if (c == '"' || c == '\\') {
            *q++ = '\\';
            *q++ = c;
        }
        else if (c == '#') {
            if (IS_EVSTR(p, pend)) *q++ = '\\';
            *q++ = '#';
        }
        else if (c == '\n') {
            *q++ = '\\';
            *q++ = 'n';
        }
        else if (c == '\r') {
            *q++ = '\\';
            *q++ = 'r';
        }
        else if (c == '\t') {
            *q++ = '\\';
            *q++ = 't';
        }
        else if (c == '\f') {
            *q++ = '\\';
            *q++ = 'f';
        }
        else if (c == '\013') {
            *q++ = '\\';
            *q++ = 'v';
        }
        else if (c == '\010') {
            *q++ = '\\';
            *q++ = 'b';
        }
        else if (c == '\007') {
            *q++ = '\\';
            *q++ = 'a';
        }
        else if (c == '\033') {
            *q++ = '\\';
            *q++ = 'e';
        }
        else if (ISPRINT(c)) {
            *q++ = c;
        }
        else {
            *q++ = '\\';
            if (u8) {
                int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
                if (MBCLEN_CHARFOUND_P(n)) {
                    int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
                    p += n;
                    if (cc <= 0xFFFF)
                        snprintf(q, qend-q, "u%04X", cc);    /* \uXXXX */
                    else
                        snprintf(q, qend-q, "u{%X}", cc);  /* \u{XXXXX} or \u{XXXXXX} */
                    q += strlen(q);
                    continue;
                }
            }
            snprintf(q, qend-q, "x%02X", c);
            q += 3;
        }
    }
    *q++ = '"';
    *q = '\0';
    if (!rb_enc_asciicompat(enc)) {
        snprintf(q, qend-q, nonascii_suffix, enc->name);
        encidx = rb_ascii8bit_encindex();
    }
    /* result from dump is ASCII */
    rb_enc_associate_index(result, encidx);
    ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
    return result;
}

#dupObject

:nodoc:


1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
# File 'string.c', line 1926

VALUE
rb_str_dup_m(VALUE str)
{
    if (LIKELY(BARE_STRING_P(str))) {
        return str_duplicate(rb_obj_class(str), str);
    }
    else {
        return rb_obj_dup(str);
    }
}

#each_byte {|byte| ... } ⇒ self #each_byteObject

:include: doc/string/each_byte.rdoc

Overloads:

  • #each_byte {|byte| ... } ⇒ self

    Yields:

    • (byte)

    Returns:

    • (self)

9663
9664
9665
9666
9667
9668
# File 'string.c', line 9663

static VALUE
rb_str_each_byte(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
    return rb_str_enumerate_bytes(str, 0);
}

#each_char {|c| ... } ⇒ self #each_charObject

:include: doc/string/each_char.rdoc

Overloads:

  • #each_char {|c| ... } ⇒ self

    Yields:

    • (c)

    Returns:

    • (self)

9732
9733
9734
9735
9736
9737
# File 'string.c', line 9732

static VALUE
rb_str_each_char(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_chars(str, 0);
}

#each_codepoint {|integer| ... } ⇒ self #each_codepointObject

:include: doc/string/each_codepoint.rdoc

Overloads:

  • #each_codepoint {|integer| ... } ⇒ self

    Yields:

    • (integer)

    Returns:

    • (self)

9792
9793
9794
9795
9796
9797
# File 'string.c', line 9792

static VALUE
rb_str_each_codepoint(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_codepoints(str, 0);
}

#each_grapheme_cluster {|gc| ... } ⇒ self #each_grapheme_clusterObject

:include: doc/string/each_grapheme_cluster.rdoc

Overloads:

  • #each_grapheme_cluster {|gc| ... } ⇒ self

    Yields:

    • (gc)

    Returns:

    • (self)

9962
9963
9964
9965
9966
9967
# File 'string.c', line 9962

static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
    return rb_str_enumerate_grapheme_clusters(str, 0);
}

#each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self #each_line(line_sep = $/, chomp: false) ⇒ Object

:include: doc/string/each_line.rdoc

Overloads:

  • #each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self

    Yields:

    • (substring)

    Returns:

    • (self)

9611
9612
9613
9614
9615
9616
# File 'string.c', line 9611

static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, argc, argv, 0);
    return rb_str_enumerate_lines(argc, argv, str, 0);
}

#empty?Boolean

Returns true if the length of self is zero, false otherwise:

"hello".empty? # => false
" ".empty? # => false
"".empty? # => true

Returns:

  • (Boolean)

2387
2388
2389
2390
2391
# File 'string.c', line 2387

static VALUE
rb_str_empty(VALUE str)
{
    return RBOOL(RSTRING_LEN(str) == 0);
}

#encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String

:include: doc/string/encode.rdoc

Overloads:

  • #encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String

    Returns:

  • #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String

    Returns:


2905
2906
2907
2908
2909
2910
2911
# File 'transcode.c', line 2905

static VALUE
str_encode(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr = str;
    int encidx = str_transcode(argc, argv, &newstr);
    return encoded_dup(newstr, str, encidx);
}

#encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self

Like #encode, but applies encoding changes to self; returns self.

Overloads:

  • #encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self

    Returns:

    • (self)
  • #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self

    Returns:

    • (self)

2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
# File 'transcode.c', line 2874

static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr;
    int encidx;

    rb_check_frozen(str);

    newstr = str;
    encidx = str_transcode(argc, argv, &newstr);

    if (encidx < 0) return str;
    if (newstr == str) {
        rb_enc_associate_index(str, encidx);
        return str;
    }
    rb_str_shared_replace(str, newstr);
    return str_encode_associate(str, encidx);
}

#encodingEncoding

Returns the Encoding object that represents the encoding of obj.

Returns:


1162
1163
1164
1165
1166
1167
1168
1169
1170
# File 'encoding.c', line 1162

VALUE
rb_obj_encoding(VALUE obj)
{
    int idx = rb_enc_get_index(obj);
    if (idx < 0) {
        rb_raise(rb_eTypeError, "unknown encoding");
    }
    return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}

#end_with?(*strings) ⇒ Boolean

:include: doc/string/end_with_p.rdoc

Returns:

  • (Boolean)

11116
11117
11118
11119
11120
11121
11122
11123
11124
11125
11126
11127
11128
11129
11130
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
# File 'string.c', line 11116

static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
    int i;

    for (i=0; i<argc; i++) {
        VALUE tmp = argv[i];
        const char *p, *s, *e;
        long slen, tlen;
        rb_encoding *enc;

        StringValue(tmp);
        enc = rb_enc_check(str, tmp);
        if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
        if ((slen = RSTRING_LEN(str)) < tlen) continue;
        p = RSTRING_PTR(str);
        e = p + slen;
        s = e - tlen;
        if (!at_char_boundary(p, s, e, enc))
            continue;
        if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
            return Qtrue;
    }
    return Qfalse;
}

#eql?(object) ⇒ Boolean

Returns true if object has the same length and content;

as +self+; +false+ otherwise:

  s = 'foo'
  s.eql?('foo') # => true
  s.eql?('food') # => false
  s.eql?('FOO') # => false

Returns +false+ if the two strings' encodings are not compatible:

  "\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false

Returns:

  • (Boolean)

4181
4182
4183
4184
4185
4186
4187
# File 'string.c', line 4181

VALUE
rb_str_eql(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
    return rb_str_eql_internal(str1, str2);
}

#force_encoding(encoding) ⇒ self

:include: doc/string/force_encoding.rdoc

Returns:

  • (self)

11347
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358
11359
11360
11361
11362
11363
11364
11365
11366
11367
11368
11369
11370
# File 'string.c', line 11347

static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
    str_modifiable(str);

    rb_encoding *encoding = rb_to_encoding(enc);
    int idx = rb_enc_to_index(encoding);

    // If the encoding is unchanged, we do nothing.
    if (ENCODING_GET(str) == idx) {
        return str;
    }

    rb_enc_associate_index(str, idx);

    // If the coderange was 7bit and the new encoding is ASCII-compatible
    // we can keep the coderange.
    if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
        return str;
    }

    ENC_CODERANGE_CLEAR(str);
    return str;
}

#freezeObject

:nodoc:


3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
# File 'string.c', line 3179

VALUE
rb_str_freeze(VALUE str)
{
    if (CHILLED_STRING_P(str)) {
        FL_UNSET_RAW(str, STR_CHILLED);
    }

    if (OBJ_FROZEN(str)) return str;
    rb_str_resize(str, RSTRING_LEN(str));
    return rb_obj_freeze(str);
}

#getbyte(index) ⇒ Integer?

Returns the byte at zero-based index as an integer, or nil if index is out of range:

s = 'abcde'   # => "abcde"
s.getbyte(0)  # => 97
s.getbyte(-1) # => 101
s.getbyte(5)  # => nil

Related: String#setbyte.

Returns:


6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
# File 'string.c', line 6545

VALUE
rb_str_getbyte(VALUE str, VALUE index)
{
    long pos = NUM2LONG(index);

    if (pos < 0)
        pos += RSTRING_LEN(str);
    if (pos < 0 ||  RSTRING_LEN(str) <= pos)
        return Qnil;

    return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
}

#grapheme_clustersObject

:include: doc/string/grapheme_clusters.rdoc


9977
9978
9979
9980
9981
9982
# File 'string.c', line 9977

static VALUE
rb_str_grapheme_clusters(VALUE str)
{
    VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str));
    return rb_str_enumerate_grapheme_clusters(str, ary);
}

#gsub(pattern, replacement) ⇒ Object #gsub(pattern) {|match| ... } ⇒ Object #gsub(pattern) ⇒ Object

Returns a copy of self with all occurrences of the given pattern replaced.

See Substitution Methods.

Returns an Enumerator if no replacement and no block given.

Related: String#sub, String#sub!, String#gsub!.

Overloads:

  • #gsub(pattern) {|match| ... } ⇒ Object

    Yields:


6461
6462
6463
6464
6465
# File 'string.c', line 6461

static VALUE
rb_str_gsub(int argc, VALUE *argv, VALUE str)
{
    return str_gsub(argc, argv, str, 0);
}

#gsub!(pattern, replacement) ⇒ self? #gsub!(pattern) {|match| ... } ⇒ self? #gsub!(pattern) ⇒ Object

Performs the specified substring replacement(s) on self; returns self if any replacement occurred, nil otherwise.

See Substitution Methods.

Returns an Enumerator if no replacement and no block given.

Related: String#sub, String#gsub, String#sub!.

Overloads:

  • #gsub!(pattern, replacement) ⇒ self?

    Returns:

    • (self, nil)
  • #gsub!(pattern) {|match| ... } ⇒ self?

    Yields:

    Returns:

    • (self, nil)

6437
6438
6439
6440
6441
6442
# File 'string.c', line 6437

static VALUE
rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
{
    str_modify_keep_cr(str);
    return str_gsub(argc, argv, str, 1);
}

#hashInteger

Returns the integer hash value for self. The value is based on the length, content and encoding of self.

Related: Object#hash.

Returns:


4070
4071
4072
4073
4074
4075
# File 'string.c', line 4070

static VALUE
rb_str_hash_m(VALUE str)
{
    st_index_t hval = rb_str_hash(str);
    return ST2FIX(hval);
}

#hexInteger

Interprets the leading substring of self as a string of hexadecimal digits (with an optional sign and an optional 0x) and returns the corresponding number; returns zero if there is no such leading substring:

'0x0a'.hex        # => 10
'-1234'.hex       # => -4660
'0'.hex           # => 0
'non-numeric'.hex # => 0

Related: String#oct.

Returns:


10608
10609
10610
10611
10612
# File 'string.c', line 10608

static VALUE
rb_str_hex(VALUE str)
{
    return rb_str_to_inum(str, 16, FALSE);
}

#include?(other_string) ⇒ Boolean

Returns true if self contains other_string, false otherwise:

s = 'foo'
s.include?('f')    # => true
s.include?('fo')   # => true
s.include?('food') # => false

Returns:

  • (Boolean)

6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
# File 'string.c', line 6971

VALUE
rb_str_include(VALUE str, VALUE arg)
{
    long i;

    StringValue(arg);
    i = rb_str_index(str, arg, 0);

    return RBOOL(i != -1);
}

#index(substring, offset = 0) ⇒ Integer? #index(regexp, offset = 0) ⇒ Integer?

:include: doc/string/index.rdoc

Overloads:


4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
# File 'string.c', line 4442

static VALUE
rb_str_index_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    rb_encoding *enc = STR_ENC_GET(str);
    long pos;

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        long slen = str_strlen(str, enc); /* str's enc */
        pos = NUM2LONG(initpos);
        if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
    }
    else {
        pos = 0;
    }

    if (RB_TYPE_P(sub, T_REGEXP)) {
        pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
                         enc, single_byte_optimizable(str));

        if (rb_reg_search(sub, str, pos, 0) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = rb_str_sublen(str, BEG(0));
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_index(str, sub, pos);
        if (pos >= 0) {
            pos = rb_str_sublen(str, pos);
            return LONG2NUM(pos);
        }
    }
    return Qnil;
}

#replace(other_string) ⇒ self

Replaces the contents of self with the contents of other_string:

s = 'foo'        # => "foo"
s.replace('bar') # => "bar"

Returns:

  • (self)

6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
# File 'string.c', line 6479

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#insert(index, other_string) ⇒ self

Inserts the given other_string into self; returns self.

If the Integer index is positive, inserts other_string at offset index:

'foo'.insert(1, 'bar') # => "fbaroo"

If the Integer index is negative, counts backward from the end of self and inserts other_string at offset index+1 (that is, after self[index]):

'foo'.insert(-2, 'bar') # => "fobaro"

Returns:

  • (self)

5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
# File 'string.c', line 5954

static VALUE
rb_str_insert(VALUE str, VALUE idx, VALUE str2)
{
    long pos = NUM2LONG(idx);

    if (pos == -1) {
        return rb_str_append(str, str2);
    }
    else if (pos < 0) {
        pos++;
    }
    rb_str_update(str, pos, 0, str2);
    return str;
}

#inspectString

Returns a printable version of self, enclosed in double-quotes, and with special characters escaped:

s = "foo\tbar\tbaz\n"
s.inspect
# => "\"foo\\tbar\\tbaz\\n\""

Returns:


7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
# File 'string.c', line 7200

VALUE
rb_str_inspect(VALUE str)
{
    int encidx = ENCODING_GET(str);
    rb_encoding *enc = rb_enc_from_index(encidx);
    const char *p, *pend, *prev;
    char buf[CHAR_ESC_LEN + 1];
    VALUE result = rb_str_buf_new(0);
    rb_encoding *resenc = rb_default_internal_encoding();
    int unicode_p = rb_enc_unicode_p(enc);
    int asciicompat = rb_enc_asciicompat(enc);

    if (resenc == NULL) resenc = rb_default_external_encoding();
    if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
    rb_enc_associate(result, resenc);
    str_buf_cat2(result, "\"");

    p = RSTRING_PTR(str); pend = RSTRING_END(str);
    prev = p;
    while (p < pend) {
        unsigned int c, cc;
        int n;

        n = rb_enc_precise_mbclen(p, pend, enc);
        if (!MBCLEN_CHARFOUND_P(n)) {
            if (p > prev) str_buf_cat(result, prev, p - prev);
            n = rb_enc_mbminlen(enc);
            if (pend < p + n)
                n = (int)(pend - p);
            while (n--) {
                snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
                str_buf_cat(result, buf, strlen(buf));
                prev = ++p;
            }
            continue;
        }
        n = MBCLEN_CHARFOUND_LEN(n);
        c = rb_enc_mbc_to_codepoint(p, pend, enc);
        p += n;
        if ((asciicompat || unicode_p) &&
          (c == '"'|| c == '\\' ||
            (c == '#' &&
             p < pend &&
             MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
             (cc = rb_enc_codepoint(p,pend,enc),
              (cc == '$' || cc == '@' || cc == '{'))))) {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            str_buf_cat2(result, "\\");
            if (asciicompat || enc == resenc) {
                prev = p - n;
                continue;
            }
        }
        switch (c) {
          case '\n': cc = 'n'; break;
          case '\r': cc = 'r'; break;
          case '\t': cc = 't'; break;
          case '\f': cc = 'f'; break;
          case '\013': cc = 'v'; break;
          case '\010': cc = 'b'; break;
          case '\007': cc = 'a'; break;
          case 033: cc = 'e'; break;
          default: cc = 0; break;
        }
        if (cc) {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            buf[0] = '\\';
            buf[1] = (char)cc;
            str_buf_cat(result, buf, 2);
            prev = p;
            continue;
        }
        /* The special casing of 0x85 (NEXT_LINE) here is because
         * Oniguruma historically treats it as printable, but it
         * doesn't match the print POSIX bracket class or character
         * property in regexps.
         *
         * See Ruby Bug #16842 for details:
         * https://bugs.ruby-lang.org/issues/16842
         */
        if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
            (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
            continue;
        }
        else {
            if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
            rb_str_buf_cat_escaped_char(result, c, unicode_p);
            prev = p;
            continue;
        }
    }
    if (p > prev) str_buf_cat(result, prev, p - prev);
    str_buf_cat2(result, "\"");

    return result;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"

877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
# File 'symbol.c', line 877

VALUE
rb_str_intern(VALUE str)
{
    VALUE sym;

    GLOBAL_SYMBOLS_ENTER(symbols);
    {
        sym = lookup_str_sym_with_lock(symbols, str);

        if (sym) {
            // ok
        }
        else if (USE_SYMBOL_GC) {
            rb_encoding *enc = rb_enc_get(str);
            rb_encoding *ascii = rb_usascii_encoding();
            if (enc != ascii && sym_check_asciionly(str, false)) {
                str = rb_str_dup(str);
                rb_enc_associate(str, ascii);
                OBJ_FREEZE(str);
                enc = ascii;
            }
            else {
                str = rb_str_dup(str);
                OBJ_FREEZE(str);
            }
            str = rb_fstring(str);
            int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
            if (type < 0) type = ID_JUNK;
            sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
        }
        else {
            ID id = intern_str(str, 0);
            sym = ID2SYM(id);
        }
    }
    GLOBAL_SYMBOLS_LEAVE();
    return sym;
}

#lengthInteger

:include: doc/string/length.rdoc

Returns:


2355
2356
2357
2358
2359
# File 'string.c', line 2355

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#lines(Line_sep = $/, chomp: false) ⇒ Object

Forms substrings (“lines”) of self according to the given arguments (see String#each_line for details); returns the lines in an array.


9627
9628
9629
9630
9631
9632
# File 'string.c', line 9627

static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
    VALUE ary = WANTARRAY("lines", 0);
    return rb_str_enumerate_lines(argc, argv, str, ary);
}

#ljust(size, pad_string = ' ') ⇒ Object

:include: doc/string/ljust.rdoc

Related: String#rjust, String#center.


10952
10953
10954
10955
10956
# File 'string.c', line 10952

static VALUE
rb_str_ljust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'l');
}

#lstripObject

Returns a copy of self with leading whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s        # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.lstrip # => "abc\u0000\t\n\v\f\r "

Related: String#rstrip, String#strip.


10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
# File 'string.c', line 10290

static VALUE
rb_str_lstrip(VALUE str)
{
    char *start;
    long len, loffset;
    RSTRING_GETMEM(str, start, len);
    loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
    if (loffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, loffset, len - loffset);
}

#lstrip!self?

Like String#lstrip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#rstrip!, String#strip!.

Returns:

  • (self, nil)

10252
10253
10254
10255
10256
10257
10258
10259
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271
10272
# File 'string.c', line 10252

static VALUE
rb_str_lstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start, *s;
    long olen, loffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    if (loffset > 0) {
        long len = olen-loffset;
        s = start + loffset;
        memmove(start, s, len);
        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#match(pattern, offset = 0) ⇒ MatchData? #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object

Returns a MatchData object (or nil) based on self and the given pattern.

Note: also updates Regexp@Global+Variables.

  • Computes regexp by converting pattern (if not already a Regexp).

    regexp = Regexp.new(pattern)
    
  • Computes matchdata, which will be either a MatchData object or nil (see Regexp#match):

    matchdata = <tt>regexp.match(self)
    

With no block given, returns the computed matchdata:

'foo'.match('f') # => #<MatchData "f">
'foo'.match('o') # => #<MatchData "o">
'foo'.match('x') # => nil

If Integer argument offset is given, the search begins at index offset:

'foo'.match('f', 1) # => nil
'foo'.match('o', 1) # => #<MatchData "o">

With a block given, calls the block with the computed matchdata and returns the block’s return value:

'foo'.match(/o/) {|matchdata| matchdata } # => #<MatchData "o">
'foo'.match(/x/) {|matchdata| matchdata } # => nil
'foo'.match(/f/, 1) {|matchdata| matchdata } # => nil

Overloads:

  • #match(pattern, offset = 0) ⇒ MatchData?

    Returns:

  • #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object

    Yields:

    • (matchdata)

    Returns:


4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
# File 'string.c', line 4980

static VALUE
rb_str_match_m(int argc, VALUE *argv, VALUE str)
{
    VALUE re, result;
    if (argc < 1)
        rb_check_arity(argc, 1, 2);
    re = argv[0];
    argv[0] = str;
    result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv);
    if (!NIL_P(result) && rb_block_given_p()) {
        return rb_yield(result);
    }
    return result;
}

#match?(pattern, offset = 0) ⇒ Boolean

Returns true or false based on whether a match is found for self and pattern.

Note: does not update Regexp@Global+Variables.

Computes regexp by converting pattern (if not already a Regexp).

regexp = Regexp.new(pattern)

Returns true if self+.match(regexp) returns a MatchData object, false otherwise:

'foo'.match?(/o/) # => true
'foo'.match?('o') # => true
'foo'.match?(/x/) # => false

If Integer argument offset is given, the search begins at index offset:

'foo'.match?('f', 1) # => false
'foo'.match?('o', 1) # => true

Returns:

  • (Boolean)

5019
5020
5021
5022
5023
5024
5025
5026
# File 'string.c', line 5019

static VALUE
rb_str_match_m_p(int argc, VALUE *argv, VALUE str)
{
    VALUE re;
    rb_check_arity(argc, 1, 2);
    re = get_pat(argv[0]);
    return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);
}

#succString

Returns the successor to self. The successor is calculated by incrementing characters.

The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:

'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'

The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:

'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"

The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:

'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"

The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:

s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"

Carrying can occur between and among mixtures of alphanumeric characters:

s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"

The successor to an empty String is a new empty String:

''.succ # => ""

Returns:


5271
5272
5273
5274
5275
5276
5277
5278
# File 'string.c', line 5271

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!self

Equivalent to String#succ, but modifies self in place; returns self.

Returns:

  • (self)

5375
5376
5377
5378
5379
5380
5381
# File 'string.c', line 5375

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#octInteger

Interprets the leading substring of self as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:

'123'.oct             # => 83
'-377'.oct            # => -255
'0377non-numeric'.oct # => 255
'non-numeric'.oct     # => 0

If self starts with 0, radix indicators are honored; see Kernel#Integer.

Related: String#hex.

Returns:


10635
10636
10637
10638
10639
# File 'string.c', line 10635

static VALUE
rb_str_oct(VALUE str)
{
    return rb_str_to_inum(str, -8, FALSE);
}

#ordInteger

:include: doc/string/ord.rdoc

Returns:


10780
10781
10782
10783
10784
10785
10786
10787
# File 'string.c', line 10780

static VALUE
rb_str_ord(VALUE s)
{
    unsigned int c;

    c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
    return UINT2NUM(c);
}

#partition(string_or_regexp) ⇒ Array

:include: doc/string/partition.rdoc

Returns:


10999
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009
11010
11011
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021
11022
11023
11024
11025
11026
# File 'string.c', line 10999

static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
    long pos;

    sep = get_pat_quoted(sep, 0);
    if (RB_TYPE_P(sep, T_REGEXP)) {
        if (rb_reg_search(sep, str, 0, 0) < 0) {
            goto failed;
        }
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);

        pos = BEG(0);
        sep = rb_str_subseq(str, pos, END(0) - pos);
    }
    else {
        pos = rb_str_index(str, sep, 0);
        if (pos < 0) goto failed;
    }
    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
                          sep,
                          rb_str_subseq(str, pos+RSTRING_LEN(sep),
                                             RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));

  failed:
    return rb_ary_new3(3, str_duplicate(rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
}

#prepend(*other_strings) ⇒ String

Prepends each string in other_strings to self and returns self:

s = 'foo'
s.prepend('bar', 'baz') # => "barbazfoo"
s                       # => "barbazfoo"

Related: String#concat.

Returns:


4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
# File 'string.c', line 4013

static VALUE
rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
        rb_str_update(str, 0L, 0L, argv[0]);
    }
    else if (argc > 1) {
        int i;
        VALUE arg_str = rb_str_tmp_new(0);
        rb_enc_copy(arg_str, str);
        for (i = 0; i < argc; i++) {
            rb_str_append(arg_str, argv[i]);
        }
        rb_str_update(str, 0L, 0L, arg_str);
    }

    return str;
}

#replace(other_string) ⇒ self

Replaces the contents of self with the contents of other_string:

s = 'foo'        # => "foo"
s.replace('bar') # => "bar"

Returns:

  • (self)

6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
# File 'string.c', line 6479

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#reverseString

Returns a new string with the characters from self in reverse order.

'stressed'.reverse # => "desserts"

Returns:


6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
# File 'string.c', line 6868

static VALUE
rb_str_reverse(VALUE str)
{
    rb_encoding *enc;
    VALUE rev;
    char *s, *e, *p;
    int cr;

    if (RSTRING_LEN(str) <= 1) return str_duplicate(rb_cString, str);
    enc = STR_ENC_GET(str);
    rev = rb_str_new(0, RSTRING_LEN(str));
    s = RSTRING_PTR(str); e = RSTRING_END(str);
    p = RSTRING_END(rev);
    cr = ENC_CODERANGE(str);

    if (RSTRING_LEN(str) > 1) {
        if (single_byte_optimizable(str)) {
            while (s < e) {
                *--p = *s++;
            }
        }
        else if (cr == ENC_CODERANGE_VALID) {
            while (s < e) {
                int clen = rb_enc_fast_mbclen(s, e, enc);

                p -= clen;
                memcpy(p, s, clen);
                s += clen;
            }
        }
        else {
            cr = rb_enc_asciicompat(enc) ?
                ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
            while (s < e) {
                int clen = rb_enc_mbclen(s, e, enc);

                if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
                p -= clen;
                memcpy(p, s, clen);
                s += clen;
            }
        }
    }
    STR_SET_LEN(rev, RSTRING_LEN(str));
    str_enc_copy_direct(rev, str);
    ENC_CODERANGE_SET(rev, cr);

    return rev;
}

#reverse!self

Returns self with its characters reversed:

s = 'stressed'
s.reverse! # => "desserts"
s          # => "desserts"

Returns:

  • (self)

6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
# File 'string.c', line 6931

static VALUE
rb_str_reverse_bang(VALUE str)
{
    if (RSTRING_LEN(str) > 1) {
        if (single_byte_optimizable(str)) {
            char *s, *e, c;

            str_modify_keep_cr(str);
            s = RSTRING_PTR(str);
            e = RSTRING_END(str) - 1;
            while (s < e) {
                c = *s;
                *s++ = *e;
                *e-- = c;
            }
        }
        else {
            str_shared_replace(str, rb_str_reverse(str));
        }
    }
    else {
        str_modify_keep_cr(str);
    }
    return str;
}

#rindex(substring, offset = self.length) ⇒ Integer? #rindex(regexp, offset = self.length) ⇒ Integer?

Returns the Integer index of the last occurrence of the given substring, or nil if none found:

'foo'.rindex('f') # => 0
'foo'.rindex('o') # => 2
'foo'.rindex('oo') # => 1
'foo'.rindex('ooo') # => nil

Returns the Integer index of the last match for the given Regexp regexp, or nil if none found:

'foo'.rindex(/f/) # => 0
'foo'.rindex(/o/) # => 2
'foo'.rindex(/oo/) # => 1
'foo'.rindex(/ooo/) # => nil

The last match means starting at the possible last position, not the last of longest matches.

'foo'.rindex(/o+/) # => 2
$~ #=> #<MatchData "o">

To get the last longest match, needs to combine with negative lookbehind.

'foo'.rindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">

Or String#index with negative lookforward.

'foo'.index(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">

Integer argument offset, if given and non-negative, specifies the maximum starting position in the string to end the search:

'foo'.rindex('o', 0) # => nil
'foo'.rindex('o', 1) # => 1
'foo'.rindex('o', 2) # => 2
'foo'.rindex('o', 3) # => 2

If offset is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset:

'foo'.rindex('o', -1) # => 2
'foo'.rindex('o', -2) # => 1
'foo'.rindex('o', -3) # => nil
'foo'.rindex('o', -4) # => nil

Related: String#index.

Overloads:

  • #rindex(substring, offset = self.length) ⇒ Integer?

    Returns:

  • #rindex(regexp, offset = self.length) ⇒ Integer?

    Returns:


4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
# File 'string.c', line 4723

static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    rb_encoding *enc = STR_ENC_GET(str);
    long pos, len = str_strlen(str, enc); /* str's enc */

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
        pos = NUM2LONG(initpos);
        if (pos < 0 && (pos += len) < 0) {
            if (RB_TYPE_P(sub, T_REGEXP)) {
                rb_backref_set(Qnil);
            }
            return Qnil;
        }
        if (pos > len) pos = len;
    }
    else {
        pos = len;
    }

    if (RB_TYPE_P(sub, T_REGEXP)) {
        /* enc = rb_enc_check(str, sub); */
        pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
                         enc, single_byte_optimizable(str));

        if (rb_reg_search(sub, str, pos, 1) >= 0) {
            VALUE match = rb_backref_get();
            struct re_registers *regs = RMATCH_REGS(match);
            pos = rb_str_sublen(str, BEG(0));
            return LONG2NUM(pos);
        }
    }
    else {
        StringValue(sub);
        pos = rb_str_rindex(str, sub, pos);
        if (pos >= 0) {
            pos = rb_str_sublen(str, pos);
            return LONG2NUM(pos);
        }
    }
    return Qnil;
}

#rjust(size, pad_string = ' ') ⇒ Object

:include: doc/string/rjust.rdoc

Related: String#ljust, String#center.


10968
10969
10970
10971
10972
# File 'string.c', line 10968

static VALUE
rb_str_rjust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'r');
}

#rpartition(sep) ⇒ Array

:include: doc/string/rpartition.rdoc

Returns:


11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057
11058
11059
11060
11061
11062
11063
11064
11065
11066
# File 'string.c', line 11036

static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
    long pos = RSTRING_LEN(str);

    sep = get_pat_quoted(sep, 0);
    if (RB_TYPE_P(sep, T_REGEXP)) {
        if (rb_reg_search(sep, str, pos, 1) < 0) {
            goto failed;
        }
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);

        pos = BEG(0);
        sep = rb_str_subseq(str, pos, END(0) - pos);
    }
    else {
        pos = rb_str_sublen(str, pos);
        pos = rb_str_rindex(str, sep, pos);
        if (pos < 0) {
            goto failed;
        }
    }

    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
                          sep,
                          rb_str_subseq(str, pos+RSTRING_LEN(sep),
                                        RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
  failed:
    return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(rb_cString, str));
}

#rstripObject

Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s        # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.rstrip # => "\u0000\t\n\v\f\r abc"

Related: String#lstrip, String#strip.


10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388
10389
10390
# File 'string.c', line 10377

static VALUE
rb_str_rstrip(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);

    if (roffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, 0, olen-roffset);
}

#rstrip!self?

Like String#rstrip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#lstrip!, String#strip!.

Returns:

  • (self, nil)

10340
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359
# File 'string.c', line 10340

static VALUE
rb_str_rstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);
    if (roffset > 0) {
        long len = olen - roffset;

        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#scan(string_or_regexp) ⇒ Array #scan(string_or_regexp) {|matches| ... } ⇒ self

Matches a pattern against self; the pattern is:

  • string_or_regexp itself, if it is a Regexp.

  • Regexp.quote(string_or_regexp), if string_or_regexp is a string.

Iterates through self, generating a collection of matching results:

  • If the pattern contains no groups, each result is the matched string, $&.

  • If the pattern contains groups, each result is an array containing one entry per group.

With no block given, returns an array of the results:

s = 'cruel world'
s.scan(/\w+/)      # => ["cruel", "world"]
s.scan(/.../)      # => ["cru", "el ", "wor"]
s.scan(/(...)/)    # => [["cru"], ["el "], ["wor"]]
s.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]]

With a block given, calls the block with each result; returns self:

s.scan(/\w+/) {|w| print "<<#{w}>> " }
print "\n"
s.scan(/(.)(.)/) {|x,y| print y, x }
print "\n"

Output:

<<cruel>> <<world>>
rceu lowlr

Overloads:

  • #scan(string_or_regexp) ⇒ Array

    Returns:

  • #scan(string_or_regexp) {|matches| ... } ⇒ self

    Yields:

    • (matches)

    Returns:

    • (self)

10556
10557
10558
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
# File 'string.c', line 10556

static VALUE
rb_str_scan(VALUE str, VALUE pat)
{
    VALUE result;
    long start = 0;
    long last = -1, prev = 0;
    char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);

    pat = get_pat_quoted(pat, 1);
    mustnot_broken(str);
    if (!rb_block_given_p()) {
        VALUE ary = rb_ary_new();

        while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
            last = prev;
            prev = start;
            rb_ary_push(ary, result);
        }
        if (last >= 0) rb_pat_search(pat, str, last, 1);
        else rb_backref_set(Qnil);
        return ary;
    }

    while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
        last = prev;
        prev = start;
        rb_yield(result);
        str_mod_check(str, p, len);
    }
    if (last >= 0) rb_pat_search(pat, str, last, 1);
    return str;
}

#scrub(replacement_string = default_replacement) ⇒ Object #scrub {|bytes| ... } ⇒ Object

:include: doc/string/scrub.rdoc

Overloads:


11779
11780
11781
11782
11783
11784
11785
# File 'string.c', line 11779

static VALUE
str_scrub(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    return NIL_P(new) ? str_duplicate(rb_cString, str): new;
}

#scrub!self #scrub!(replacement_string = default_replacement) ⇒ self #scrub! {|bytes| ... } ⇒ self

Like String#scrub, except that any replacements are made in self.

Overloads:

  • #scrub!self

    Returns:

    • (self)
  • #scrub!(replacement_string = default_replacement) ⇒ self

    Returns:

    • (self)
  • #scrub! {|bytes| ... } ⇒ self

    Yields:

    Returns:

    • (self)

11796
11797
11798
11799
11800
11801
11802
11803
# File 'string.c', line 11796

static VALUE
str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    if (!NIL_P(new)) rb_str_replace(str, new);
    return str;
}

#setbyte(index, integer) ⇒ Integer

Sets the byte at zero-based index to integer; returns integer:

s = 'abcde'      # => "abcde"
s.setbyte(0, 98) # => 98
s                # => "bbcde"

Related: String#getbyte.

Returns:


6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
# File 'string.c', line 6570

VALUE
rb_str_setbyte(VALUE str, VALUE index, VALUE value)
{
    long pos = NUM2LONG(index);
    long len = RSTRING_LEN(str);
    char *ptr, *head, *left = 0;
    rb_encoding *enc;
    int cr = ENC_CODERANGE_UNKNOWN, width, nlen;

    if (pos < -len || len <= pos)
        rb_raise(rb_eIndexError, "index %ld out of string", pos);
    if (pos < 0)
        pos += len;

    VALUE v = rb_to_int(value);
    VALUE w = rb_int_and(v, INT2FIX(0xff));
    char byte = (char)(NUM2INT(w) & 0xFF);

    if (!str_independent(str))
        str_make_independent(str);
    enc = STR_ENC_GET(str);
    head = RSTRING_PTR(str);
    ptr = &head[pos];
    if (!STR_EMBED_P(str)) {
        cr = ENC_CODERANGE(str);
        switch (cr) {
          case ENC_CODERANGE_7BIT:
            left = ptr;
            *ptr = byte;
            if (ISASCII(byte)) goto end;
            nlen = rb_enc_precise_mbclen(left, head+len, enc);
            if (!MBCLEN_CHARFOUND_P(nlen))
                ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
            else
                ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
            goto end;
          case ENC_CODERANGE_VALID:
            left = rb_enc_left_char_head(head, ptr, head+len, enc);
            width = rb_enc_precise_mbclen(left, head+len, enc);
            *ptr = byte;
            nlen = rb_enc_precise_mbclen(left, head+len, enc);
            if (!MBCLEN_CHARFOUND_P(nlen))
                ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
            else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte))
                ENC_CODERANGE_CLEAR(str);
            goto end;
        }
    }
    ENC_CODERANGE_CLEAR(str);
    *ptr = byte;

  end:
    return value;
}

#lengthInteger

:include: doc/string/length.rdoc

Returns:


2355
2356
2357
2358
2359
# File 'string.c', line 2355

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil

Returns the substring of self specified by the arguments. See examples at String Slices.

Overloads:

  • #[](index) ⇒ nil

    Returns:

    • (nil)
  • #[](start, length) ⇒ nil

    Returns:

    • (nil)
  • #[](range) ⇒ nil

    Returns:

    • (nil)
  • #[](regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #[](substring) ⇒ nil

    Returns:

    • (nil)

5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
# File 'string.c', line 5684

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
        if (RB_TYPE_P(argv[0], T_REGEXP)) {
            return rb_str_subpat(str, argv[0], argv[1]);
        }
        else {
            return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
        }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#slice!(index) ⇒ nil #slice!(start, length) ⇒ nil #slice!(range) ⇒ nil #slice!(regexp, capture = 0) ⇒ nil #slice!(substring) ⇒ nil

Removes and returns the substring of self specified by the arguments. See String Slices.

A few examples:

string = "This is a string"
string.slice!(2)        #=> "i"
string.slice!(3..6)     #=> " is "
string.slice!(/s.*t/)   #=> "sa st"
string.slice!("r")      #=> "r"
string                  #=> "Thing"

Overloads:

  • #slice!(index) ⇒ nil

    Returns:

    • (nil)
  • #slice!(start, length) ⇒ nil

    Returns:

    • (nil)
  • #slice!(range) ⇒ nil

    Returns:

    • (nil)
  • #slice!(regexp, capture = 0) ⇒ nil

    Returns:

    • (nil)
  • #slice!(substring) ⇒ nil

    Returns:

    • (nil)

5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
# File 'string.c', line 5992

static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE result = Qnil;
    VALUE indx;
    long beg, len = 1;
    char *p;

    rb_check_arity(argc, 1, 2);
    str_modify_keep_cr(str);
    indx = argv[0];
    if (RB_TYPE_P(indx, T_REGEXP)) {
        if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
        VALUE match = rb_backref_get();
        struct re_registers *regs = RMATCH_REGS(match);
        int nth = 0;
        if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
            if ((nth += regs->num_regs) <= 0) return Qnil;
        }
        else if (nth >= regs->num_regs) return Qnil;
        beg = BEG(nth);
        len = END(nth) - beg;
        goto subseq;
    }
    else if (argc == 2) {
        beg = NUM2LONG(indx);
        len = NUM2LONG(argv[1]);
        goto num_index;
    }
    else if (FIXNUM_P(indx)) {
        beg = FIX2LONG(indx);
        if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
        if (!len) return Qnil;
        beg = p - RSTRING_PTR(str);
        goto subseq;
    }
    else if (RB_TYPE_P(indx, T_STRING)) {
        beg = rb_str_index(str, indx, 0);
        if (beg == -1) return Qnil;
        len = RSTRING_LEN(indx);
        result = str_duplicate(rb_cString, indx);
        goto squash;
    }
    else {
        switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) {
          case Qnil:
            return Qnil;
          case Qfalse:
            beg = NUM2LONG(indx);
            if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
            if (!len) return Qnil;
            beg = p - RSTRING_PTR(str);
            goto subseq;
          default:
            goto num_index;
        }
    }

  num_index:
    if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
    beg = p - RSTRING_PTR(str);

  subseq:
    result = rb_str_new(RSTRING_PTR(str)+beg, len);
    rb_enc_cr_str_copy_for_substr(result, str);

  squash:
    if (len > 0) {
        if (beg == 0) {
            rb_str_drop_bytes(str, len);
        }
        else {
            char *sptr = RSTRING_PTR(str);
            long slen = RSTRING_LEN(str);
            if (beg + len > slen) /* pathological check */
                len = slen - beg;
            memmove(sptr + beg,
                    sptr + beg + len,
                    slen - (beg + len));
            slen -= len;
            STR_SET_LEN(str, slen);
            TERM_FILL(&sptr[slen], TERM_LEN(str));
        }
    }
    return result;
}

#split(field_sep = $;, limit = 0) ⇒ Array #split(field_sep = $;, limit = 0) {|substring| ... } ⇒ self

:include: doc/string/split.rdoc

Overloads:

  • #split(field_sep = $;, limit = 0) ⇒ Array

    Returns:

  • #split(field_sep = $;, limit = 0) {|substring| ... } ⇒ self

    Yields:

    • (substring)

    Returns:

    • (self)

9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
# File 'string.c', line 9185

static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    VALUE spat;
    VALUE limit;
    split_type_t split_type;
    long beg, end, i = 0, empty_count = -1;
    int lim = 0;
    VALUE result, tmp;

    result = rb_block_given_p() ? Qfalse : Qnil;
    if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
        lim = NUM2INT(limit);
        if (lim <= 0) limit = Qnil;
        else if (lim == 1) {
            if (RSTRING_LEN(str) == 0)
                return result ? rb_ary_new2(0) : str;
            tmp = str_duplicate(rb_cString, str);
            if (!result) {
                rb_yield(tmp);
                return str;
            }
            return rb_ary_new3(1, tmp);
        }
        i = 1;
    }
    if (NIL_P(limit) && !lim) empty_count = 0;

    enc = STR_ENC_GET(str);
    split_type = SPLIT_TYPE_REGEXP;
    if (!NIL_P(spat)) {
        spat = get_pat_quoted(spat, 0);
    }
    else if (NIL_P(spat = rb_fs)) {
        split_type = SPLIT_TYPE_AWK;
    }
    else if (!(spat = rb_fs_check(spat))) {
        rb_raise(rb_eTypeError, "value of $; must be String or Regexp");
    }
    else {
        rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "$; is set to non-nil value");
    }
    if (split_type != SPLIT_TYPE_AWK) {
        switch (BUILTIN_TYPE(spat)) {
          case T_REGEXP:
            rb_reg_options(spat); /* check if uninitialized */
            tmp = RREGEXP_SRC(spat);
            split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
            if (split_type == SPLIT_TYPE_AWK) {
                spat = tmp;
                split_type = SPLIT_TYPE_STRING;
            }
            break;

          case T_STRING:
            mustnot_broken(spat);
            split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
            break;

          default:
            UNREACHABLE_RETURN(Qnil);
        }
    }

#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))

    beg = 0;
    char *ptr = RSTRING_PTR(str);
    char *eptr = RSTRING_END(str);
    if (split_type == SPLIT_TYPE_AWK) {
        char *bptr = ptr;
        int skip = 1;
        unsigned int c;

        if (result) result = rb_ary_new();
        end = beg;
        if (is_ascii_string(str)) {
            while (ptr < eptr) {
                c = (unsigned char)*ptr++;
                if (skip) {
                    if (ascii_isspace(c)) {
                        beg = ptr - bptr;
                    }
                    else {
                        end = ptr - bptr;
                        skip = 0;
                        if (!NIL_P(limit) && lim <= i) break;
                    }
                }
                else if (ascii_isspace(c)) {
                    SPLIT_STR(beg, end-beg);
                    skip = 1;
                    beg = ptr - bptr;
                    if (!NIL_P(limit)) ++i;
                }
                else {
                    end = ptr - bptr;
                }
            }
        }
        else {
            while (ptr < eptr) {
                int n;

                c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
                ptr += n;
                if (skip) {
                    if (rb_isspace(c)) {
                        beg = ptr - bptr;
                    }
                    else {
                        end = ptr - bptr;
                        skip = 0;
                        if (!NIL_P(limit) && lim <= i) break;
                    }
                }
                else if (rb_isspace(c)) {
                    SPLIT_STR(beg, end-beg);
                    skip = 1;
                    beg = ptr - bptr;
                    if (!NIL_P(limit)) ++i;
                }
                else {
                    end = ptr - bptr;
                }
            }
        }
    }
    else if (split_type == SPLIT_TYPE_STRING) {
        char *str_start = ptr;
        char *substr_start = ptr;
        char *sptr = RSTRING_PTR(spat);
        long slen = RSTRING_LEN(spat);

        if (result) result = rb_ary_new();
        mustnot_broken(str);
        enc = rb_enc_check(str, spat);
        while (ptr < eptr &&
               (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
            /* Check we are at the start of a char */
            char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
            if (t != ptr + end) {
                ptr = t;
                continue;
            }
            SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
            ptr += end + slen;
            substr_start = ptr;
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        beg = ptr - str_start;
    }
    else if (split_type == SPLIT_TYPE_CHARS) {
        char *str_start = ptr;
        int n;

        if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
        mustnot_broken(str);
        enc = rb_enc_get(str);
        while (ptr < eptr &&
               (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
            SPLIT_STR(ptr - str_start, n);
            ptr += n;
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        beg = ptr - str_start;
    }
    else {
        if (result) result = rb_ary_new();
        long len = RSTRING_LEN(str);
        long start = beg;
        long idx;
        int last_null = 0;
        struct re_registers *regs;
        VALUE match = 0;

        for (; rb_reg_search(spat, str, start, 0) >= 0;
             (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
            match = rb_backref_get();
            if (!result) rb_match_busy(match);
            regs = RMATCH_REGS(match);
            end = BEG(0);
            if (start == end && BEG(0) == END(0)) {
                if (!ptr) {
                    SPLIT_STR(0, 0);
                    break;
                }
                else if (last_null == 1) {
                    SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
                    beg = start;
                }
                else {
                    if (start == len)
                        start++;
                    else
                        start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
                    last_null = 1;
                    continue;
                }
            }
            else {
                SPLIT_STR(beg, end-beg);
                beg = start = END(0);
            }
            last_null = 0;

            for (idx=1; idx < regs->num_regs; idx++) {
                if (BEG(idx) == -1) continue;
                SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
            }
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        if (match) rb_match_unbusy(match);
    }
    if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
        SPLIT_STR(beg, RSTRING_LEN(str)-beg);
    }

    return result ? result : str;
}

#squeeze(*selectors) ⇒ Object

Returns a copy of self with characters specified by selectors “squeezed” (see Multiple Character Selectors):

“Squeezed” means that each multiple-character run of a selected character is squeezed down to a single character; with no arguments given, squeezes all characters:

"yellow moon".squeeze                  #=> "yelow mon"
"  now   is  the".squeeze(" ")         #=> " now is the"
"putters shoot balls".squeeze("m-z")   #=> "puters shot balls"

8945
8946
8947
8948
8949
8950
8951
# File 'string.c', line 8945

static VALUE
rb_str_squeeze(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_squeeze_bang(argc, argv, str);
    return str;
}

#squeeze!(*selectors) ⇒ self?

Like String#squeeze, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)

8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
# File 'string.c', line 8852

static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0;
    unsigned char *s, *send, *t;
    int i, modify = 0;
    int ascompat, singlebyte = single_byte_optimizable(str);
    unsigned int save;

    if (argc == 0) {
        enc = STR_ENC_GET(str);
    }
    else {
        for (i=0; i<argc; i++) {
            VALUE s = argv[i];

            StringValue(s);
            enc = rb_enc_check(str, s);
            if (singlebyte && !single_byte_optimizable(s))
                singlebyte = 0;
            tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
        }
    }

    str_modify_keep_cr(str);
    s = t = (unsigned char *)RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return Qnil;
    send = (unsigned char *)RSTRING_END(str);
    save = -1;
    ascompat = rb_enc_asciicompat(enc);

    if (singlebyte) {
        while (s < send) {
            unsigned int c = *s++;
            if (c != save || (argc > 0 && !squeez[c])) {
                *t++ = save = c;
            }
        }
    }
    else {
        while (s < send) {
            unsigned int c;
            int clen;

            if (ascompat && (c = *s) < 0x80) {
                if (c != save || (argc > 0 && !squeez[c])) {
                    *t++ = save = c;
                }
                s++;
            }
            else {
                c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);

                if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
                    if (t != s) rb_enc_mbcput(c, t, enc);
                    save = c;
                    t += clen;
                }
                s += clen;
            }
        }
    }

    TERM_FILL((char *)t, TERM_LEN(str));
    if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
        STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
        modify = 1;
    }

    if (modify) return str;
    return Qnil;
}

#start_with?(*string_or_regexp) ⇒ Boolean

:include: doc/string/start_with_p.rdoc

Returns:

  • (Boolean)

11076
11077
11078
11079
11080
11081
11082
11083
11084
11085
11086
11087
11088
11089
11090
11091
11092
11093
11094
11095
11096
11097
11098
11099
11100
11101
11102
11103
11104
11105
11106
# File 'string.c', line 11076

static VALUE
rb_str_start_with(int argc, VALUE *argv, VALUE str)
{
    int i;

    for (i=0; i<argc; i++) {
        VALUE tmp = argv[i];
        if (RB_TYPE_P(tmp, T_REGEXP)) {
            if (rb_reg_start_with_p(tmp, str))
                return Qtrue;
        }
        else {
            const char *p, *s, *e;
            long slen, tlen;
            rb_encoding *enc;

            StringValue(tmp);
            enc = rb_enc_check(str, tmp);
            if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
            if ((slen = RSTRING_LEN(str)) < tlen) continue;
            p = RSTRING_PTR(str);
            e = p + slen;
            s = p + tlen;
            if (!at_char_right_boundary(p, s, e, enc))
                continue;
            if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
                return Qtrue;
        }
    }
    return Qfalse;
}

#stripObject

Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:

whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s       # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.strip # => "abc"

Related: String#lstrip, String#rstrip.


10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457
10458
# File 'string.c', line 10445

static VALUE
rb_str_strip(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc = STR_ENC_GET(str);

    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str);
    return rb_str_subseq(str, loffset, olen-loffset-roffset);
}

#strip!self?

Like String#strip, except that any modifications are made in self; returns self if any modification are made, nil otherwise.

Related: String#lstrip!, String#strip!.

Returns:

  • (self, nil)

10403
10404
10405
10406
10407
10408
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419
10420
10421
10422
10423
10424
10425
10426
10427
# File 'string.c', line 10403

static VALUE
rb_str_strip_bang(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset > 0 || roffset > 0) {
        long len = olen-roffset;
        if (loffset > 0) {
            len -= loffset;
            memmove(start, start + loffset, len);
        }
        STR_SET_LEN(str, len);
        TERM_FILL(start+len, rb_enc_mbminlen(enc));
        return str;
    }
    return Qnil;
}

#sub(pattern, replacement) ⇒ Object #sub(pattern) {|match| ... } ⇒ Object

Returns a copy of self with only the first occurrence (not all occurrences) of the given pattern replaced.

See Substitution Methods.

Related: String#sub!, String#gsub, String#gsub!.

Overloads:

  • #sub(pattern) {|match| ... } ⇒ Object

    Yields:


6288
6289
6290
6291
6292
6293
6294
# File 'string.c', line 6288

static VALUE
rb_str_sub(int argc, VALUE *argv, VALUE str)
{
    str = str_duplicate(rb_cString, str);
    rb_str_sub_bang(argc, argv, str);
    return str;
}

#sub!(pattern, replacement) ⇒ self? #sub!(pattern) {|match| ... } ⇒ self?

Replaces the first occurrence (not all occurrences) of the given pattern on self; returns self if a replacement occurred, nil otherwise.

See Substitution Methods.

Related: String#sub, String#gsub, String#gsub!.

Overloads:

  • #sub!(pattern, replacement) ⇒ self?

    Returns:

    • (self, nil)
  • #sub!(pattern) {|match| ... } ⇒ self?

    Yields:

    Returns:

    • (self, nil)

6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
# File 'string.c', line 6163

static VALUE
rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE pat, repl, hash = Qnil;
    int iter = 0;
    long plen;
    int min_arity = rb_block_given_p() ? 1 : 2;
    long beg;

    rb_check_arity(argc, min_arity, 2);
    if (argc == 1) {
        iter = 1;
    }
    else {
        repl = argv[1];
        hash = rb_check_hash_type(argv[1]);
        if (NIL_P(hash)) {
            StringValue(repl);
        }
    }

    pat = get_pat_quoted(argv[0], 1);

    str_modifiable(str);
    beg = rb_pat_search(pat, str, 0, 1);
    if (beg >= 0) {
        rb_encoding *enc;
        int cr = ENC_CODERANGE(str);
        long beg0, end0;
        VALUE match, match0 = Qnil;
        struct re_registers *regs;
        char *p, *rp;
        long len, rlen;

        match = rb_backref_get();
        regs = RMATCH_REGS(match);
        if (RB_TYPE_P(pat, T_STRING)) {
            beg0 = beg;
            end0 = beg0 + RSTRING_LEN(pat);
            match0 = pat;
        }
        else {
            beg0 = BEG(0);
            end0 = END(0);
            if (iter) match0 = rb_reg_nth_match(0, match);
        }

        if (iter || !NIL_P(hash)) {
            p = RSTRING_PTR(str); len = RSTRING_LEN(str);

            if (iter) {
                repl = rb_obj_as_string(rb_yield(match0));
            }
            else {
                repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
                repl = rb_obj_as_string(repl);
            }
            str_mod_check(str, p, len);
            rb_check_frozen(str);
        }
        else {
            repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat);
        }

        enc = rb_enc_compatible(str, repl);
        if (!enc) {
            rb_encoding *str_enc = STR_ENC_GET(str);
            p = RSTRING_PTR(str); len = RSTRING_LEN(str);
            if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
                coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
                rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
                         rb_enc_inspect_name(str_enc),
                         rb_enc_inspect_name(STR_ENC_GET(repl)));
            }
            enc = STR_ENC_GET(repl);
        }
        rb_str_modify(str);
        rb_enc_associate(str, enc);
        if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
            int cr2 = ENC_CODERANGE(repl);
            if (cr2 == ENC_CODERANGE_BROKEN ||
                (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
                cr = ENC_CODERANGE_UNKNOWN;
            else
                cr = cr2;
        }
        plen = end0 - beg0;
        rlen = RSTRING_LEN(repl);
        len = RSTRING_LEN(str);
        if (rlen > plen) {
            RESIZE_CAPA(str, len + rlen - plen);
        }
        p = RSTRING_PTR(str);
        if (rlen != plen) {
            memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
        }
        rp = RSTRING_PTR(repl);
        memmove(p + beg0, rp, rlen);
        len += rlen - plen;
        STR_SET_LEN(str, len);
        TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
        ENC_CODERANGE_SET(str, cr);

        RB_GC_GUARD(match);

        return str;
    }
    return Qnil;
}

#succString

Returns the successor to self. The successor is calculated by incrementing characters.

The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:

'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'

The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:

'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"

The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:

'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"

The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:

s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"

Carrying can occur between and among mixtures of alphanumeric characters:

s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"

The successor to an empty String is a new empty String:

''.succ # => ""

Returns:


5271
5272
5273
5274
5275
5276
5277
5278
# File 'string.c', line 5271

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!self

Equivalent to String#succ, but modifies self in place; returns self.

Returns:

  • (self)

5375
5376
5377
5378
5379
5380
5381
# File 'string.c', line 5375

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#sum(n = 16) ⇒ Integer

:include: doc/string/sum.rdoc

Returns:


10796
10797
10798
10799
10800
10801
10802
10803
10804
10805
10806
10807
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831
10832
10833
10834
10835
10836
10837
10838
10839
10840
10841
10842
10843
10844
10845
10846
10847
# File 'string.c', line 10796

static VALUE
rb_str_sum(int argc, VALUE *argv, VALUE str)
{
    int bits = 16;
    char *ptr, *p, *pend;
    long len;
    VALUE sum = INT2FIX(0);
    unsigned long sum0 = 0;

    if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) {
        bits = 0;
    }
    ptr = p = RSTRING_PTR(str);
    len = RSTRING_LEN(str);
    pend = p + len;

    while (p < pend) {
        if (FIXNUM_MAX - UCHAR_MAX < sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            str_mod_check(str, ptr, len);
            sum0 = 0;
        }
        sum0 += (unsigned char)*p;
        p++;
    }

    if (bits == 0) {
        if (sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
        }
    }
    else {
        if (sum == INT2FIX(0)) {
            if (bits < (int)sizeof(long)*CHAR_BIT) {
                sum0 &= (((unsigned long)1)<<bits)-1;
            }
            sum = LONG2FIX(sum0);
        }
        else {
            VALUE mod;

            if (sum0) {
                sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            }

            mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits));
            mod = rb_funcall(mod, '-', 1, INT2FIX(1));
            sum = rb_funcall(sum, '&', 1, mod);
        }
    }
    return sum;
}

#swapcase(*options) ⇒ String

Returns a string containing the characters in self, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:

s = 'Hello World!' # => "Hello World!"
s.swapcase         # => "hELLO wORLD!"

The casing may be affected by the given options; see Case Mapping.

Related: String#swapcase!.

Returns:


8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
# File 'string.c', line 8251

static VALUE
rb_str_swapcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str_duplicate(rb_cString, str);
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#swapcase!(*options) ⇒ self?

Upcases each lowercase character in self; downcases uppercase character; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.swapcase!        # => "hELLO wORLD!"
s                  # => "hELLO wORLD!"
''.swapcase!       # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#swapcase.

Returns:

  • (self, nil)

8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
# File 'string.c', line 8214

static VALUE
rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#to_cObject

Returns self interpreted as a Complex object; leading whitespace and trailing garbage are ignored:

'9'.to_c                 # => (9+0i)
'2.5'.to_c               # => (2.5+0i)
'2.5/1'.to_c             # => ((5/2)+0i)
'-3/2'.to_c              # => ((-3/2)+0i)
'-i'.to_c                # => (0-1i)
'45i'.to_c               # => (0+45i)
'3-4i'.to_c              # => (3-4i)
'-4e2-4e-2i'.to_c        # => (-400.0-0.04i)
'-0.0-0.0i'.to_c         # => (-0.0-0.0i)
'1/2+3/4i'.to_c          # => ((1/2)+(3/4)*i)
'1.0@0'.to_c             # => (1+0.0i)
"1.0@#{Math::PI/2}".to_c # => (0.0+1i)
"1.0@#{Math::PI}".to_c   # => (-1+0.0i)

Returns Complex zero if the string cannot be converted:

'ruby'.to_c        # => (0+0i)

See Kernel#Complex.


2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
# File 'complex.c', line 2269

static VALUE
string_to_c(VALUE self)
{
    VALUE num;

    rb_must_asciicompat(self);

    (void)parse_comp(rb_str_fill_terminator(self, 1), FALSE, &num);

    return num;
}

#to_fFloat

Returns the result of interpreting leading characters in self as a Float:

'3.14159'.to_f  # => 3.14159
'1.234e-2'.to_f # => 0.01234

Characters past a leading valid number (in the given base) are ignored:

'3.14 (pi to two places)'.to_f # => 3.14

Returns zero if there is no leading valid number:

'abcdef'.to_f # => 0.0

Returns:


7046
7047
7048
7049
7050
# File 'string.c', line 7046

static VALUE
rb_str_to_f(VALUE str)
{
    return DBL2NUM(rb_str_to_dbl(str, FALSE));
}

#to_i(base = 10) ⇒ Integer

Returns the result of interpreting leading characters in self as an integer in the given base (which must be in (0, 2..36)):

'123456'.to_i     # => 123456
'123def'.to_i(16) # => 1195503

With base zero, string object may contain leading characters to specify the actual base:

'123def'.to_i(0)   # => 123
'0123def'.to_i(0)  # => 83
'0b123def'.to_i(0) # => 1
'0o123def'.to_i(0) # => 83
'0d123def'.to_i(0) # => 123
'0x123def'.to_i(0) # => 1195503

Characters past a leading valid number (in the given base) are ignored:

'12.345'.to_i   # => 12
'12345'.to_i(2) # => 1

Returns zero if there is no leading valid number:

'abcdef'.to_i # => 0
'2'.to_i(2)   # => 0

Returns:


7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
# File 'string.c', line 7015

static VALUE
rb_str_to_i(int argc, VALUE *argv, VALUE str)
{
    int base = 10;

    if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) {
        rb_raise(rb_eArgError, "invalid radix %d", base);
    }
    return rb_str_to_inum(str, base, FALSE);
}

#to_rObject

Returns the result of interpreting leading characters in str as a rational. Leading whitespace and extraneous characters past the end of a valid number are ignored. Digit sequences can be separated by an underscore. If there is not a valid number at the start of str, zero is returned. This method never raises an exception.

'  2  '.to_r       #=> (2/1)
'300/2'.to_r       #=> (150/1)
'-9.2'.to_r        #=> (-46/5)
'-9.2e2'.to_r      #=> (-920/1)
'1_234_567'.to_r   #=> (1234567/1)
'21 June 09'.to_r  #=> (21/1)
'21/06/09'.to_r    #=> (7/2)
'BWV 1079'.to_r    #=> (0/1)

NOTE: “0.3”.to_r isn’t the same as 0.3.to_r. The former is equivalent to “3/10”.to_r, but the latter isn’t so.

"0.3".to_r == 3/10r  #=> true
0.3.to_r   == 3/10r  #=> false

See also Kernel#Rational.


2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
# File 'rational.c', line 2529

static VALUE
string_to_r(VALUE self)
{
    VALUE num;

    rb_must_asciicompat(self);

    num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE);

    if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num))
        rb_raise(rb_eFloatDomainError, "Infinity");
    return num;
}

#to_sself, String

Returns self if self is a String, or self converted to a String if self is a subclass of String.

Returns:


7061
7062
7063
7064
7065
7066
7067
7068
# File 'string.c', line 7061

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
        return str_duplicate(rb_cString, str);
    }
    return str;
}

#to_sself, String

Returns self if self is a String, or self converted to a String if self is a subclass of String.

Returns:


7061
7062
7063
7064
7065
7066
7067
7068
# File 'string.c', line 7061

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
        return str_duplicate(rb_cString, str);
    }
    return str;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"

877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
# File 'symbol.c', line 877

VALUE
rb_str_intern(VALUE str)
{
    VALUE sym;

    GLOBAL_SYMBOLS_ENTER(symbols);
    {
        sym = lookup_str_sym_with_lock(symbols, str);

        if (sym) {
            // ok
        }
        else if (USE_SYMBOL_GC) {
            rb_encoding *enc = rb_enc_get(str);
            rb_encoding *ascii = rb_usascii_encoding();
            if (enc != ascii && sym_check_asciionly(str, false)) {
                str = rb_str_dup(str);
                rb_enc_associate(str, ascii);
                OBJ_FREEZE(str);
                enc = ascii;
            }
            else {
                str = rb_str_dup(str);
                OBJ_FREEZE(str);
            }
            str = rb_fstring(str);
            int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
            if (type < 0) type = ID_JUNK;
            sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
        }
        else {
            ID id = intern_str(str, 0);
            sym = ID2SYM(id);
        }
    }
    GLOBAL_SYMBOLS_LEAVE();
    return sym;
}

#tr(selector, replacements) ⇒ Object

Returns a copy of self with each character specified by string selector translated to the corresponding character in string replacements. The correspondence is positional:

  • Each occurrence of the first character specified by selector is translated to the first character in replacements.

  • Each occurrence of the second character specified by selector is translated to the second character in replacements.

  • And so on.

Example:

'hello'.tr('el', 'ip') #=> "hippo"

If replacements is shorter than selector, it is implicitly padded with its own last character:

'hello'.tr('aeiou', '-')   # => "h-ll-"
'hello'.tr('aeiou', 'AA-') # => "hAll-"

Arguments selector and replacements must be valid character selectors (see Character Selectors), and may use any of its valid forms, including negation, ranges, and escaping:

# Negation.
'hello'.tr('^aeiou', '-') # => "-e--o"
# Ranges.
'ibm'.tr('b-z', 'a-z') # => "hal"
# Escapes.
'hel^lo'.tr('\^aeiou', '-')     # => "h-l-l-"    # Escaped leading caret.
'i-b-m'.tr('b\-z', 'a-z')       # => "ibabm"     # Escaped embedded hyphen.
'foo\\bar'.tr('ab\\', 'XYZ')    # => "fooZYXr"   # Escaped backslash.

8654
8655
8656
8657
8658
8659
8660
# File 'string.c', line 8654

static VALUE
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
    str = str_duplicate(rb_cString, str);
    tr_trans(str, src, repl, 0);
    return str;
}

#tr!(selector, replacements) ⇒ self?

Like String#tr, but modifies self in place. Returns self if any changes were made, nil otherwise.

Returns:

  • (self, nil)

8608
8609
8610
8611
8612
# File 'string.c', line 8608

static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 0);
}

#tr_s(selector, replacements) ⇒ String

Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).

'hello'.tr_s('l', 'r')   #=> "hero"
'hello'.tr_s('el', '-')  #=> "h-o"
'hello'.tr_s('el', 'hx') #=> "hhxo"

Related: String#squeeze.

Returns:


8986
8987
8988
8989
8990
8991
8992
# File 'string.c', line 8986

static VALUE
rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
{
    str = str_duplicate(rb_cString, str);
    tr_trans(str, src, repl, 1);
    return str;
}

#tr_s!(selector, replacements) ⇒ self?

Like String#tr_s, but modifies self in place. Returns self if any changes were made, nil otherwise.

Related: String#squeeze!.

Returns:

  • (self, nil)

8964
8965
8966
8967
8968
# File 'string.c', line 8964

static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 1);
}

#undumpString

Returns an unescaped version of self:

s_orig = "\f\x00\xff\\\""    # => "\f\u0000\xFF\\\""
s_dumped = s_orig.dump       # => "\"\\f\\x00\\xFF\\\\\\\"\""
s_undumped = s_dumped.undump # => "\f\u0000\xFF\\\""
s_undumped == s_orig         # => true

Related: String#dump (inverse of String#undump).

Returns:


7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
# File 'string.c', line 7609

static VALUE
str_undump(VALUE str)
{
    const char *s = RSTRING_PTR(str);
    const char *s_end = RSTRING_END(str);
    rb_encoding *enc = rb_enc_get(str);
    VALUE undumped = rb_enc_str_new(s, 0L, enc);
    bool utf8 = false;
    bool binary = false;
    int w;

    rb_must_asciicompat(str);
    if (rb_str_is_ascii_only_p(str) == Qfalse) {
        rb_raise(rb_eRuntimeError, "non-ASCII character detected");
    }
    if (!str_null_check(str, &w)) {
        rb_raise(rb_eRuntimeError, "string contains null byte");
    }
    if (RSTRING_LEN(str) < 2) goto invalid_format;
    if (*s != '"') goto invalid_format;

    /* strip '"' at the start */
    s++;

    for (;;) {
        if (s >= s_end) {
            rb_raise(rb_eRuntimeError, "unterminated dumped string");
        }

        if (*s == '"') {
            /* epilogue */
            s++;
            if (s == s_end) {
                /* ascii compatible dumped string */
                break;
            }
            else {
                static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
                static const char dup_suffix[] = ".dup";
                const char *encname;
                int encidx;
                ptrdiff_t size;

                /* check separately for strings dumped by older versions */
                size = sizeof(dup_suffix) - 1;
                if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;

                size = sizeof(force_encoding_suffix) - 1;
                if (s_end - s <= size) goto invalid_format;
                if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
                s += size;

                if (utf8) {
                    rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
                }

                encname = s;
                s = memchr(s, '"', s_end-s);
                size = s - encname;
                if (!s) goto invalid_format;
                if (s_end - s != 2) goto invalid_format;
                if (s[0] != '"' || s[1] != ')') goto invalid_format;

                encidx = rb_enc_find_index2(encname, (long)size);
                if (encidx < 0) {
                    rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
                }
                rb_enc_associate_index(undumped, encidx);
            }
            break;
        }

        if (*s == '\\') {
            s++;
            if (s >= s_end) {
                rb_raise(rb_eRuntimeError, "invalid escape");
            }
            undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
        }
        else {
            rb_str_cat(undumped, s++, 1);
        }
    }

    RB_GC_GUARD(str);

    return undumped;
invalid_format:
    rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}

#unicode_normalize(form = :nfc) ⇒ String

Returns a copy of self with Unicode normalization applied.

Argument form must be one of the following symbols (see Unicode normalization forms):

  • :nfc: Canonical decomposition, followed by canonical composition.

  • :nfd: Canonical decomposition.

  • :nfkc: Compatibility decomposition, followed by canonical composition.

  • :nfkd: Compatibility decomposition.

The encoding of self must be one of:

  • Encoding::UTF_8

  • Encoding::UTF_16BE

  • Encoding::UTF_16LE

  • Encoding::UTF_32BE

  • Encoding::UTF_32LE

  • Encoding::GB18030

  • Encoding::UCS_2BE

  • Encoding::UCS_4BE

Examples:

"a\u0300".unicode_normalize      # => "a"
"\u00E0".unicode_normalize(:nfd) # => "a "

Related: String#unicode_normalize!, String#unicode_normalized?.

Returns:


11857
11858
11859
11860
11861
# File 'string.c', line 11857

static VALUE
rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalize);
}

#unicode_normalize!(form = :nfc) ⇒ self

Like String#unicode_normalize, except that the normalization is performed on self.

Related String#unicode_normalized?.

Returns:

  • (self)

11873
11874
11875
11876
11877
# File 'string.c', line 11873

static VALUE
rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
{
    return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
}

#unicode_normalized?(form = :nfc) ⇒ Boolean

Returns true if self is in the given form of Unicode normalization, false otherwise. The form must be one of :nfc, :nfd, :nfkc, or :nfkd.

Examples:

"a\u0300".unicode_normalized?       # => false
"a\u0300".unicode_normalized?(:nfd) # => true
"\u00E0".unicode_normalized?        # => true
"\u00E0".unicode_normalized?(:nfd)  # => false

Raises an exception if self is not in a Unicode encoding:

s = "\xE0".force_encoding('ISO-8859-1')
s.unicode_normalized? # Raises Encoding::CompatibilityError.

Related: String#unicode_normalize, String#unicode_normalize!.

Returns:

  • (Boolean)

11902
11903
11904
11905
11906
# File 'string.c', line 11902

static VALUE
rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalized_p);
}

#upcase(*options) ⇒ String

Returns a string containing the upcased characters in self:

s = 'Hello World!' # => "Hello World!"
s.upcase           # => "HELLO WORLD!"

The casing may be affected by the given options; see Case Mapping.

Related: String#upcase!, String#downcase, String#downcase!.

Returns:


7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
# File 'string.c', line 7987

static VALUE
rb_str_upcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy_direct(ret, str);
        upcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new(0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#upcase!(*options) ⇒ self?

Upcases the characters in self; returns self if any changes were made, nil otherwise:

s = 'Hello World!' # => "Hello World!"
s.upcase!          # => "HELLO WORLD!"
s                  # => "HELLO WORLD!"
s.upcase!          # => nil

The casing may be affected by the given options; see Case Mapping.

Related: String#upcase, String#downcase, String#downcase!.

Returns:

  • (self, nil)

7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
# File 'string.c', line 7948

static VALUE
rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (upcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
        str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#upto(other_string, exclusive = false) {|string| ... } ⇒ self #upto(other_string, exclusive = false) ⇒ Object

With a block given, calls the block with each String value returned by successive calls to String#succ; the first value is self, the next is self.succ, and so on; the sequence terminates when value other_string is reached; returns self:

'a8'.upto('b6') {|s| print s, ' ' } # => "a8"

Output:

a8 a9 b0 b1 b2 b3 b4 b5 b6

If argument exclusive is given as a truthy object, the last value is omitted:

'a8'.upto('b6', true) {|s| print s, ' ' } # => "a8"

Output:

a8 a9 b0 b1 b2 b3 b4 b5

If other_string would not be reached, does not call the block:

'25'.upto('5') {|s| fail s }
'aa'.upto('a') {|s| fail s }

With no block given, returns a new Enumerator:

'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")>

Overloads:

  • #upto(other_string, exclusive = false) {|string| ... } ⇒ self

    Yields:

    • (string)

    Returns:

    • (self)

5435
5436
5437
5438
5439
5440
5441
5442
5443
# File 'string.c', line 5435

static VALUE
rb_str_upto(int argc, VALUE *argv, VALUE beg)
{
    VALUE end, exclusive;

    rb_scan_args(argc, argv, "11", &end, &exclusive);
    RETURN_ENUMERATOR(beg, argc, argv);
    return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);
}

#valid_encoding?Boolean

Returns true if self is encoded correctly, false otherwise:

"\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
"\xc2".force_encoding("UTF-8").valid_encoding?     # => false
"\x80".force_encoding("UTF-8").valid_encoding?     # => false

Returns:

  • (Boolean)

11424
11425
11426
11427
11428
11429
11430
# File 'string.c', line 11424

static VALUE
rb_str_valid_encoding_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return RBOOL(cr != ENC_CODERANGE_BROKEN);
}