Class: String
Direct Known Subclasses
Class Method Summary collapse
-
.new(*args) ⇒ Object
:nodoc:.
-
.try_convert(object) ⇒ Object?
If
object
is aString
object, returnsobject
.
Instance Method Summary collapse
-
#%(object) ⇒ Object
Returns the result of formatting
object
into the format specificationself
(see Kernel#sprintf for formatting details):. -
#*(integer) ⇒ Object
Returns a new
String
containinginteger
copies ofself
:. -
#+(other_string) ⇒ Object
Returns a new
String
containingother_string
concatenated toself
:. -
#+ ⇒ self
Returns
self
ifself
is not frozen and can be mutated without warning issuance. -
#-@ ⇒ Object
(also: #dedup)
Returns a frozen, possibly pre-existing copy of the string.
-
#<<(object) ⇒ String
Concatenates
object
toself
and returnsself
:. -
#<=>(other_string) ⇒ -1, ...
Compares
self
andother_string
, returning:. -
#==(str2) ⇒ Object
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#===(str2) ⇒ Object
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#=~(y) ⇒ Object
Returns the Integer index of the first substring that matches the given
regexp
, ornil
if no match found:. -
#[](*args) ⇒ Object
Returns the substring of
self
specified by the arguments. -
#[]=(*args) ⇒ Object
Replaces all, some, or none of the contents of
self
; returnsnew_string
. -
#append_as_bytes(*objects) ⇒ String
Concatenates each object in
objects
intoself
without any encoding validation or conversion and returnsself
:. -
#ascii_only? ⇒ Boolean
Returns
true
ifself
contains only ASCII characters,false
otherwise:. -
#b ⇒ String
:include: doc/string/b.rdoc.
-
#byteindex(*args) ⇒ Object
Returns the Integer byte-based index of the first occurrence of the given
substring
, ornil
if none found:. -
#byterindex(*args) ⇒ Object
Returns the Integer byte-based index of the last occurrence of the given
substring
, ornil
if none found:. -
#bytes ⇒ Object
:include: doc/string/bytes.rdoc.
-
#bytesize ⇒ Integer
:include: doc/string/bytesize.rdoc.
-
#byteslice(*args) ⇒ Object
Returns a substring of
self
, ornil
if the substring cannot be constructed. -
#bytesplice(*args) ⇒ Object
Replaces some or all of the content of
self
withstr
, and returnsself
. -
#capitalize(*options) ⇒ String
Returns a string containing the characters in
self
; the first character is upcased; the remaining characters are downcased:. -
#capitalize!(*options) ⇒ self?
Upcases the first character in
self
; downcases the remaining characters; returnsself
if any changes were made,nil
otherwise:. -
#casecmp(other_string) ⇒ -1, ...
Compares
self.downcase
andother_string.downcase
; returns:. -
#casecmp?(other_string) ⇒ true, ...
Returns
true
ifself
andother_string
are equal after Unicode case folding, otherwisefalse
:. -
#center(size, pad_string = ' ') ⇒ Object
:include: doc/string/center.rdoc.
-
#chars ⇒ Object
:include: doc/string/chars.rdoc.
-
#chomp(line_sep = $/) ⇒ Object
:include: doc/string/chomp.rdoc.
-
#chomp!(line_sep = $/) ⇒ self?
Like String#chomp, but modifies
self
in place; returnsnil
if no modification made,self
otherwise. -
#chop ⇒ Object
:include: doc/string/chop.rdoc.
-
#chop! ⇒ self?
Like String#chop, but modifies
self
in place; returnsnil
ifself
is empty,self
otherwise. -
#chr ⇒ String
Returns a string containing the first character of
self
:. -
#clear ⇒ self
Removes the contents of
self
:. -
#codepoints ⇒ Object
:include: doc/string/codepoints.rdoc.
-
#concat(*objects) ⇒ String
Concatenates each object in
objects
toself
and returnsself
:. -
#count(*selectors) ⇒ Integer
Returns the total number of characters in
self
that are specified by the givenselectors
(see Multiple Character Selectors):. -
#crypt(salt_str) ⇒ Object
Returns the string generated by calling
crypt(3)
standard library function withstr
andsalt_str
, in this order, as its arguments. -
#delete(*selectors) ⇒ Object
Returns a copy of
self
with characters specified byselectors
removed (see Multiple Character Selectors):. -
#delete!(*selectors) ⇒ self?
Like String#delete, but modifies
self
in place. -
#delete_prefix(prefix) ⇒ Object
:include: doc/string/delete_prefix.rdoc.
-
#delete_prefix!(prefix) ⇒ self?
Like String#delete_prefix, except that
self
is modified in place. -
#delete_suffix(suffix) ⇒ Object
:include: doc/string/delete_suffix.rdoc.
-
#delete_suffix!(suffix) ⇒ self?
Like String#delete_suffix, except that
self
is modified in place. -
#downcase(*options) ⇒ String
Returns a string containing the downcased characters in
self
:. -
#downcase!(*options) ⇒ self?
Downcases the characters in
self
; returnsself
if any changes were made,nil
otherwise:. -
#dump ⇒ String
Returns a printable version of
self
, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:. -
#dup ⇒ Object
:nodoc:.
-
#each_byte ⇒ Object
:include: doc/string/each_byte.rdoc.
-
#each_char ⇒ Object
:include: doc/string/each_char.rdoc.
-
#each_codepoint ⇒ Object
:include: doc/string/each_codepoint.rdoc.
-
#each_grapheme_cluster ⇒ Object
:include: doc/string/each_grapheme_cluster.rdoc.
-
#each_line(*args) ⇒ Object
:include: doc/string/each_line.rdoc.
-
#empty? ⇒ Boolean
Returns
true
if the length ofself
is zero,false
otherwise:. -
#encode(*args) ⇒ Object
:include: doc/string/encode.rdoc.
-
#encode!(*args) ⇒ Object
Like #encode, but applies encoding changes to
self
; returnsself
. -
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
-
#end_with?(*strings) ⇒ Boolean
:include: doc/string/end_with_p.rdoc.
-
#eql?(object) ⇒ Boolean
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#force_encoding(encoding) ⇒ self
:include: doc/string/force_encoding.rdoc.
-
#freeze ⇒ Object
:nodoc:.
-
#getbyte(index) ⇒ Integer?
Returns the byte at zero-based
index
as an integer, ornil
ifindex
is out of range:. -
#grapheme_clusters ⇒ Object
:include: doc/string/grapheme_clusters.rdoc.
-
#gsub(*args) ⇒ Object
Returns a copy of
self
with all occurrences of the givenpattern
replaced. -
#gsub!(*args) ⇒ Object
Performs the specified substring replacement(s) on
self
; returnsself
if any replacement occurred,nil
otherwise. -
#hash ⇒ Integer
Returns the integer hash value for
self
. -
#hex ⇒ Integer
Interprets the leading substring of
self
as a string of hexadecimal digits (with an optional sign and an optional0x
) and returns the corresponding number; returns zero if there is no such leading substring:. -
#include?(other_string) ⇒ Boolean
Returns
true
ifself
containsother_string
,false
otherwise:. -
#index(*args) ⇒ Object
:include: doc/string/index.rdoc.
-
#new(string = '', **opts) ⇒ Object
constructor
:include: doc/string/new.rdoc.
-
#replace(other_string) ⇒ self
Replaces the contents of
self
with the contents ofother_string
:. -
#insert(index, other_string) ⇒ self
Inserts the given
other_string
intoself
; returnsself
. -
#inspect ⇒ String
Returns a printable version of
self
, enclosed in double-quotes, and with special characters escaped:. -
#intern ⇒ Object
Returns the
Symbol
corresponding to str, creating the symbol if it did not previously exist. -
#length ⇒ Integer
:include: doc/string/length.rdoc.
-
#lines(Line_sep = $/, chomp: false) ⇒ Object
Forms substrings (“lines”) of
self
according to the given arguments (see String#each_line for details); returns the lines in an array. -
#ljust(size, pad_string = ' ') ⇒ Object
:include: doc/string/ljust.rdoc.
-
#lstrip ⇒ Object
Returns a copy of
self
with leading whitespace removed; see Whitespace in Strings:. -
#lstrip! ⇒ self?
Like String#lstrip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#match(*args) ⇒ Object
Returns a MatchData object (or
nil
) based onself
and the givenpattern
. -
#match?(pattern, offset = 0) ⇒ Boolean
Returns
true
orfalse
based on whether a match is found forself
andpattern
. -
#succ ⇒ String
Returns the successor to
self
. -
#succ! ⇒ self
Equivalent to String#succ, but modifies
self
in place; returnsself
. -
#oct ⇒ Integer
Interprets the leading substring of
self
as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:. -
#ord ⇒ Integer
:include: doc/string/ord.rdoc.
-
#partition(string_or_regexp) ⇒ Array
:include: doc/string/partition.rdoc.
-
#prepend(*other_strings) ⇒ String
Prepends each string in
other_strings
toself
and returnsself
:. -
#replace(other_string) ⇒ self
Replaces the contents of
self
with the contents ofother_string
:. -
#reverse ⇒ String
Returns a new string with the characters from
self
in reverse order. -
#reverse! ⇒ self
Returns
self
with its characters reversed:. -
#rindex(*args) ⇒ Object
Returns the Integer index of the last occurrence of the given
substring
, ornil
if none found:. -
#rjust(size, pad_string = ' ') ⇒ Object
:include: doc/string/rjust.rdoc.
-
#rpartition(sep) ⇒ Array
:include: doc/string/rpartition.rdoc.
-
#rstrip ⇒ Object
Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:.
-
#rstrip! ⇒ self?
Like String#rstrip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#scan(pat) ⇒ Object
Matches a pattern against
self
; the pattern is:. -
#scrub(*args) ⇒ Object
:include: doc/string/scrub.rdoc.
-
#scrub!(*args) ⇒ Object
Like String#scrub, except that any replacements are made in
self
. -
#setbyte(index, integer) ⇒ Integer
Sets the byte at zero-based
index
tointeger
; returnsinteger
:. -
#length ⇒ Integer
:include: doc/string/length.rdoc.
-
#slice(*args) ⇒ Object
Returns the substring of
self
specified by the arguments. -
#slice!(*args) ⇒ Object
Removes and returns the substring of
self
specified by the arguments. -
#split(*args) ⇒ Object
:include: doc/string/split.rdoc.
-
#squeeze(*selectors) ⇒ Object
Returns a copy of
self
with characters specified byselectors
“squeezed” (see Multiple Character Selectors):. -
#squeeze!(*selectors) ⇒ self?
Like String#squeeze, but modifies
self
in place. -
#start_with?(*string_or_regexp) ⇒ Boolean
:include: doc/string/start_with_p.rdoc.
-
#strip ⇒ Object
Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:.
-
#strip! ⇒ self?
Like String#strip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#sub(*args) ⇒ Object
Returns a copy of
self
with only the first occurrence (not all occurrences) of the givenpattern
replaced. -
#sub!(*args) ⇒ Object
Replaces the first occurrence (not all occurrences) of the given
pattern
onself
; returnsself
if a replacement occurred,nil
otherwise. -
#succ ⇒ String
Returns the successor to
self
. -
#succ! ⇒ self
Equivalent to String#succ, but modifies
self
in place; returnsself
. -
#sum(n = 16) ⇒ Integer
:include: doc/string/sum.rdoc.
-
#swapcase(*options) ⇒ String
Returns a string containing the characters in
self
, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:. -
#swapcase!(*options) ⇒ self?
Upcases each lowercase character in
self
; downcases uppercase character; returnsself
if any changes were made,nil
otherwise:. -
#to_c ⇒ Object
Returns
self
interpreted as a Complex object; leading whitespace and trailing garbage are ignored:. -
#to_f ⇒ Float
Returns the result of interpreting leading characters in
self
as a Float:. -
#to_i(base = 10) ⇒ Integer
Returns the result of interpreting leading characters in
self
as an integer in the givenbase
(which must be in (0, 2..36)):. -
#to_r ⇒ Object
Returns the result of interpreting leading characters in
str
as a rational. -
#to_s ⇒ self, String
Returns
self
ifself
is aString
, orself
converted to aString
ifself
is a subclass ofString
. -
#to_s ⇒ self, String
Returns
self
ifself
is aString
, orself
converted to aString
ifself
is a subclass ofString
. -
#to_sym ⇒ Object
Returns the
Symbol
corresponding to str, creating the symbol if it did not previously exist. -
#tr(selector, replacements) ⇒ Object
Returns a copy of
self
with each character specified by stringselector
translated to the corresponding character in stringreplacements
. -
#tr!(selector, replacements) ⇒ self?
Like String#tr, but modifies
self
in place. -
#tr_s(selector, replacements) ⇒ String
Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).
-
#tr_s!(selector, replacements) ⇒ self?
Like String#tr_s, but modifies
self
in place. -
#undump ⇒ String
Returns an unescaped version of
self
:. -
#unicode_normalize(form = :nfc) ⇒ String
Returns a copy of
self
with Unicode normalization applied. -
#unicode_normalize!(form = :nfc) ⇒ self
Like String#unicode_normalize, except that the normalization is performed on
self
. -
#unicode_normalized?(form = :nfc) ⇒ Boolean
Returns
true
ifself
is in the givenform
of Unicode normalization,false
otherwise. -
#upcase(*options) ⇒ String
Returns a string containing the upcased characters in
self
:. -
#upcase!(*options) ⇒ self?
Upcases the characters in
self
; returnsself
if any changes were made,nil
otherwise:. -
#upto(*args) ⇒ Object
With a block given, calls the block with each
String
value returned by successive calls to String#succ; the first value isself
, the next isself.succ
, and so on; the sequence terminates when valueother_string
is reached; returnsself
:. -
#valid_encoding? ⇒ Boolean
Returns
true
ifself
is encoded correctly,false
otherwise:.
Methods included from Comparable
#<, #<=, #>, #>=, #between?, #clamp
Constructor Details
#new(string = '', **opts) ⇒ Object
:include: doc/string/new.rdoc
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 |
# File 'string.c', line 1979
static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
static ID keyword_ids[2];
VALUE orig, opt, venc, vcapa;
VALUE kwargs[2];
rb_encoding *enc = 0;
int n;
if (!keyword_ids[0]) {
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
}
n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (!NIL_P(opt)) {
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
venc = kwargs[0];
vcapa = kwargs[1];
if (!UNDEF_P(venc) && !NIL_P(venc)) {
enc = rb_to_encoding(venc);
}
if (!UNDEF_P(vcapa) && !NIL_P(vcapa)) {
long capa = NUM2LONG(vcapa);
long len = 0;
int termlen = enc ? rb_enc_mbminlen(enc) : 1;
if (capa < STR_BUF_MIN_SIZE) {
capa = STR_BUF_MIN_SIZE;
}
if (n == 1) {
StringValue(orig);
len = RSTRING_LEN(orig);
if (capa < len) {
capa = len;
}
if (orig == str) n = 0;
}
str_modifiable(str);
if (STR_EMBED_P(str) || FL_TEST(str, STR_SHARED|STR_NOFREE)) {
/* make noembed always */
const size_t size = (size_t)capa + termlen;
const char *const old_ptr = RSTRING_PTR(str);
const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
char *new_ptr = ALLOC_N(char, size);
if (STR_EMBED_P(str)) RUBY_ASSERT((long)osize <= str_embed_capa(str));
memcpy(new_ptr, old_ptr, osize < size ? osize : size);
FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) {
SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
(size_t)capa + termlen, STR_HEAP_SIZE(str));
}
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
if (n == 1) {
memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
rb_enc_cr_str_exact_copy(str, orig);
}
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
}
else if (n == 1) {
rb_str_replace(str, orig);
}
if (enc) {
rb_enc_associate(str, enc);
ENC_CODERANGE_CLEAR(str);
}
}
else if (n == 1) {
rb_str_replace(str, orig);
}
return str;
}
|
Class Method Details
.new(*args) ⇒ Object
:nodoc:
2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 |
# File 'string.c', line 2057
static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
if (klass != rb_cString) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
static ID keyword_ids[2];
VALUE orig, opt, encoding = Qnil, capacity = Qnil;
VALUE kwargs[2];
rb_encoding *enc = NULL;
int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (NIL_P(opt)) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
encoding = kwargs[0];
capacity = kwargs[1];
if (n == 1) {
orig = StringValue(orig);
}
else {
orig = Qnil;
}
if (UNDEF_P(encoding)) {
if (!NIL_P(orig)) {
encoding = rb_obj_encoding(orig);
}
}
if (!UNDEF_P(encoding)) {
enc = rb_to_encoding(encoding);
}
// If capacity is nil, we're basically just duping `orig`.
if (UNDEF_P(capacity)) {
if (NIL_P(orig)) {
VALUE empty_str = str_new(klass, "", 0);
if (enc) {
rb_enc_associate(empty_str, enc);
}
return empty_str;
}
VALUE copy = str_duplicate(klass, orig);
rb_enc_associate(copy, enc);
ENC_CODERANGE_CLEAR(copy);
return copy;
}
long capa = 0;
capa = NUM2LONG(capacity);
if (capa < 0) {
capa = 0;
}
if (!NIL_P(orig)) {
long orig_capa = rb_str_capacity(orig);
if (orig_capa > capa) {
capa = orig_capa;
}
}
VALUE str = str_enc_new(klass, NULL, capa, enc);
STR_SET_LEN(str, 0);
TERM_FILL(RSTRING_PTR(str), enc ? rb_enc_mbmaxlen(enc) : 1);
if (!NIL_P(orig)) {
rb_str_buf_append(str, orig);
}
return str;
}
|
.try_convert(object) ⇒ Object?
If object
is a String
object, returns object
.
Otherwise if object
responds to :to_str
, calls object.to_str
and returns the result.
Returns nil
if object
does not respond to :to_str
.
Raises an exception unless object.to_str
returns a String
object.
2869 2870 2871 2872 2873 |
# File 'string.c', line 2869
static VALUE
rb_str_s_try_convert(VALUE dummy, VALUE str)
{
return rb_check_string_type(str);
}
|
Instance Method Details
#%(object) ⇒ Object
Returns the result of formatting object
into the format specification self
(see Kernel#sprintf for formatting details):
"%05d" % 123 # => "00123"
If self
contains multiple substitutions, object
must be an Array or Hash containing the values to be substituted:
"%-5s: %016x" % [ "ID", self.object_id ] # => "ID : 00002b054ec93168"
"foo = %{foo}" % {foo: 'bar'} # => "foo = bar"
"foo = %{foo}, baz = %{baz}" % {foo: 'bar', baz: 'bat'} # => "foo = bar, baz = bat"
2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 |
# File 'string.c', line 2547
static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
VALUE tmp = rb_check_array_type(arg);
if (!NIL_P(tmp)) {
return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
}
return rb_str_format(1, &arg, str);
}
|
#*(integer) ⇒ Object
Returns a new String
containing integer
copies of self
:
"Ho! " * 3 # => "Ho! Ho! Ho! "
"Ho! " * 0 # => ""
2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 |
# File 'string.c', line 2471
VALUE
rb_str_times(VALUE str, VALUE times)
{
VALUE str2;
long n, len;
char *ptr2;
int termlen;
if (times == INT2FIX(1)) {
return str_duplicate(rb_cString, str);
}
if (times == INT2FIX(0)) {
str2 = str_alloc_embed(rb_cString, 0);
rb_enc_copy(str2, str);
return str2;
}
len = NUM2LONG(times);
if (len < 0) {
rb_raise(rb_eArgError, "negative argument");
}
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
if (STR_EMBEDDABLE_P(len, 1)) {
str2 = str_alloc_embed(rb_cString, len + 1);
memset(RSTRING_PTR(str2), 0, len + 1);
}
else {
str2 = str_alloc_heap(rb_cString);
RSTRING(str2)->as.heap.aux.capa = len;
RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
}
STR_SET_LEN(str2, len);
rb_enc_copy(str2, str);
return str2;
}
if (len && LONG_MAX/len < RSTRING_LEN(str)) {
rb_raise(rb_eArgError, "argument too big");
}
len *= RSTRING_LEN(str);
termlen = TERM_LEN(str);
str2 = str_enc_new(rb_cString, 0, len, STR_ENC_GET(str));
ptr2 = RSTRING_PTR(str2);
if (len) {
n = RSTRING_LEN(str);
memcpy(ptr2, RSTRING_PTR(str), n);
while (n <= len/2) {
memcpy(ptr2 + n, ptr2, n);
n *= 2;
}
memcpy(ptr2 + n, ptr2, len-n);
}
STR_SET_LEN(str2, len);
TERM_FILL(&ptr2[len], termlen);
rb_enc_cr_str_copy_for_substr(str2, str);
return str2;
}
|
#+(other_string) ⇒ Object
Returns a new String
containing other_string
concatenated to self
:
"Hello from " + self.to_s # => "Hello from main"
2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 |
# File 'string.c', line 2399
VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
VALUE str3;
rb_encoding *enc;
char *ptr1, *ptr2, *ptr3;
long len1, len2;
int termlen;
StringValue(str2);
enc = rb_enc_check_str(str1, str2);
RSTRING_GETMEM(str1, ptr1, len1);
RSTRING_GETMEM(str2, ptr2, len2);
termlen = rb_enc_mbminlen(enc);
if (len1 > LONG_MAX - len2) {
rb_raise(rb_eArgError, "string size too big");
}
str3 = str_enc_new(rb_cString, 0, len1+len2, enc);
ptr3 = RSTRING_PTR(str3);
memcpy(ptr3, ptr1, len1);
memcpy(ptr3+len1, ptr2, len2);
TERM_FILL(&ptr3[len1+len2], termlen);
ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
RB_GC_GUARD(str1);
RB_GC_GUARD(str2);
return str3;
}
|
#+ ⇒ self
Returns self
if self
is not frozen and can be mutated without warning issuance.
Otherwise returns self.dup
, which is not frozen.
3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 |
# File 'string.c', line 3196
static VALUE
str_uplus(VALUE str)
{
if (OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
return rb_str_dup(str);
}
else {
return str;
}
}
|
#- ⇒ Object #dedup ⇒ Object Also known as: dedup
Returns a frozen, possibly pre-existing copy of the string.
The returned String
will be deduplicated as long as it does not have any instance variables set on it and is not a String subclass.
Note that -string
variant is more convenient for defining constants:
FILENAME = -'config/database.yml'
while dedup
is better suitable for using the method in chains of calculations:
@url_list.concat(urls.map(&:dedup))
3228 3229 3230 3231 3232 3233 3234 3235 |
# File 'string.c', line 3228
static VALUE
str_uminus(VALUE str)
{
if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) {
str = rb_str_dup(str);
}
return rb_fstring(str);
}
|
#<<(object) ⇒ String
Concatenates object
to self
and returns self
:
s = 'foo'
s << 'bar' # => "foobar"
s # => "foobar"
If object
is an Integer, the value is considered a codepoint and converted to a character before concatenation:
s = 'foo'
s << 33 # => "foo!"
If that codepoint is not representable in the encoding of string, RangeError is raised.
s = 'foo'
s.encoding # => <Encoding:UTF-8>
s << 0x00110000 # 1114112 out of char range (RangeError)
s = 'foo'.encode('EUC-JP')
s << 0x00800080 # invalid codepoint 0x800080 in EUC-JP (RangeError)
If the encoding is US-ASCII and the codepoint is 0..0xff, string is automatically promoted to ASCII-8BIT.
s = 'foo'.encode('US-ASCII')
s << 0xff
s.encoding # => #<Encoding:BINARY (ASCII-8BIT)>
Related: String#concat, which takes multiple arguments.
3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 |
# File 'string.c', line 3917
VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
unsigned int code;
rb_encoding *enc = STR_ENC_GET(str1);
int encidx;
if (RB_INTEGER_TYPE_P(str2)) {
if (rb_num_to_uint(str2, &code) == 0) {
}
else if (FIXNUM_P(str2)) {
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
}
else {
rb_raise(rb_eRangeError, "bignum out of char range");
}
}
else {
return rb_str_append(str1, str2);
}
encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
if (encidx >= 0) {
rb_str_buf_cat_byte(str1, (unsigned char)code);
}
else {
long pos = RSTRING_LEN(str1);
int cr = ENC_CODERANGE(str1);
int len;
char *buf;
switch (len = rb_enc_codelen(code, enc)) {
case ONIGERR_INVALID_CODE_POINT_VALUE:
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
case 0:
rb_raise(rb_eRangeError, "%u out of char range", code);
break;
}
buf = ALLOCA_N(char, len + 1);
rb_enc_mbcput(code, buf, enc);
if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
}
rb_str_resize(str1, pos+len);
memcpy(RSTRING_PTR(str1) + pos, buf, len);
if (cr == ENC_CODERANGE_7BIT && code > 127) {
cr = ENC_CODERANGE_VALID;
}
else if (cr == ENC_CODERANGE_BROKEN) {
cr = ENC_CODERANGE_UNKNOWN;
}
ENC_CODERANGE_SET(str1, cr);
}
return str1;
}
|
#<=>(other_string) ⇒ -1, ...
Compares self
and other_string
, returning:
-
-1 if
other_string
is larger. -
0 if the two are equal.
-
1 if
other_string
is smaller. -
nil
if the two are incomparable.
Examples:
'foo' <=> 'foo' # => 0
'foo' <=> 'food' # => -1
'food' <=> 'foo' # => 1
'FOO' <=> 'foo' # => -1
'foo' <=> 'FOO' # => 1
'foo' <=> 1 # => nil
4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 |
# File 'string.c', line 4207
static VALUE
rb_str_cmp_m(VALUE str1, VALUE str2)
{
int result;
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return rb_invcmp(str1, str2);
}
result = rb_str_cmp(str1, s);
return INT2FIX(result);
}
|
#==(object) ⇒ Boolean #===(object) ⇒ Boolean
Returns true
if object
has the same length and content; as self
; false
otherwise:
s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false
Returns false
if the two strings’ encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false
If object
is not an instance of String
but responds to to_str
, then the two strings are compared using object.==
.
4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 |
# File 'string.c', line 4146
VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) {
if (!rb_respond_to(str2, idTo_str)) {
return Qfalse;
}
return rb_equal(str2, str1);
}
return rb_str_eql_internal(str1, str2);
}
|
#==(object) ⇒ Boolean #===(object) ⇒ Boolean
Returns true
if object
has the same length and content; as self
; false
otherwise:
s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false
Returns false
if the two strings’ encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false
If object
is not an instance of String
but responds to to_str
, then the two strings are compared using object.==
.
4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 |
# File 'string.c', line 4146
VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) {
if (!rb_respond_to(str2, idTo_str)) {
return Qfalse;
}
return rb_equal(str2, str1);
}
return rb_str_eql_internal(str1, str2);
}
|
#=~(regexp) ⇒ Integer? #=~(object) ⇒ Integer?
Returns the Integer index of the first substring that matches the given regexp
, or nil
if no match found:
'foo' =~ /f/ # => 0
'foo' =~ /o/ # => 1
'foo' =~ /x/ # => nil
Note: also updates Regexp@Global+Variables.
If the given object
is not a Regexp, returns the value returned by object =~ self
.
Note that string =~ regexp
is different from regexp =~ string
(see Regexp#=~):
number= nil
"no. 9" =~ /(?<number>\d+)/
number # => nil (not assigned)
/(?<number>\d+)/ =~ "no. 9"
number #=> "9"
4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 |
# File 'string.c', line 4922
static VALUE
rb_str_match(VALUE x, VALUE y)
{
switch (OBJ_BUILTIN_TYPE(y)) {
case T_STRING:
rb_raise(rb_eTypeError, "type mismatch: String given");
case T_REGEXP:
return rb_reg_match(y, x);
default:
return rb_funcall(y, idEqTilde, 1, x);
}
}
|
#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil
Returns the substring of self
specified by the arguments. See examples at String Slices.
5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 |
# File 'string.c', line 5680
static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
return rb_str_subpat(str, argv[0], argv[1]);
}
else {
return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
}
}
rb_check_arity(argc, 1, 2);
return rb_str_aref(str, argv[0]);
}
|
#[]=(index) ⇒ Object #[]=(start, length) ⇒ Object #[]=(range) ⇒ Object #[]=(regexp, capture = 0) ⇒ Object #[]=(substring) ⇒ Object
Replaces all, some, or none of the contents of self
; returns new_string
. See String Slices.
A few examples:
s = 'foo'
s[2] = 'rtune' # => "rtune"
s # => "fortune"
s[1, 5] = 'init' # => "init"
s # => "finite"
s[3..4] = 'al' # => "al"
s # => "finale"
s[/e$/] = 'ly' # => "ly"
s # => "finally"
s['lly'] = 'ncial' # => "ncial"
s # => "financial"
5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 |
# File 'string.c', line 5916
static VALUE
rb_str_aset_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 3) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
}
else {
rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
}
return argv[2];
}
rb_check_arity(argc, 2, 3);
return rb_str_aset(str, argv[0], argv[1]);
}
|
#append_as_bytes(*objects) ⇒ String
Concatenates each object in objects
into self
without any encoding validation or conversion and returns self
:
s = 'foo'
s.append_as_bytes(" \xE2\x82") # => "foo \xE2\x82"
s.valid_encoding? # => false
s.append_as_bytes("\xAC 12")
s.valid_encoding? # => true
For each given object object
that is an Integer, the value is considered a Byte. If the Integer is bigger than one byte, only the lower byte is considered, similar to String#setbyte:
s = ""
s.append_as_bytes(0, 257) # => "\u0000\u0001"
Related: String#<<, String#concat, which do an encoding aware concatenation.
3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 |
# File 'string.c', line 3771
VALUE
rb_str_append_as_bytes(int argc, VALUE *argv, VALUE str)
{
long needed_capacity = 0;
volatile VALUE t0;
enum ruby_value_type *types = ALLOCV_N(enum ruby_value_type, t0, argc);
for (int index = 0; index < argc; index++) {
VALUE obj = argv[index];
enum ruby_value_type type = types[index] = rb_type(obj);
switch (type) {
case T_FIXNUM:
case T_BIGNUM:
needed_capacity++;
break;
case T_STRING:
needed_capacity += RSTRING_LEN(obj);
break;
default:
rb_raise(
rb_eTypeError,
"wrong argument type %"PRIsVALUE" (expected String or Integer)",
rb_obj_class(obj)
);
break;
}
}
str_ensure_available_capa(str, needed_capacity);
char *sptr = RSTRING_END(str);
for (int index = 0; index < argc; index++) {
VALUE obj = argv[index];
enum ruby_value_type type = types[index];
switch (type) {
case T_FIXNUM:
case T_BIGNUM: {
argv[index] = obj = rb_int_and(obj, INT2FIX(0xff));
char byte = (char)(NUM2INT(obj) & 0xFF);
*sptr = byte;
sptr++;
break;
}
case T_STRING: {
const char *ptr;
long len;
RSTRING_GETMEM(obj, ptr, len);
memcpy(sptr, ptr, len);
sptr += len;
break;
}
default:
rb_bug("append_as_bytes arguments should have been validated");
}
}
STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
TERM_FILL(sptr, TERM_LEN(str)); /* sentinel */
int cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT: {
for (int index = 0; index < argc; index++) {
VALUE obj = argv[index];
enum ruby_value_type type = types[index];
switch (type) {
case T_FIXNUM:
case T_BIGNUM: {
if (!ISASCII(NUM2INT(obj))) {
goto clear_cr;
}
break;
}
case T_STRING: {
if (ENC_CODERANGE(obj) != ENC_CODERANGE_7BIT) {
goto clear_cr;
}
break;
}
default:
rb_bug("append_as_bytes arguments should have been validated");
}
}
break;
}
case ENC_CODERANGE_VALID:
if (ENCODING_GET_INLINED(str) == ENCINDEX_ASCII_8BIT) {
goto keep_cr;
}
else {
goto clear_cr;
}
break;
default:
goto clear_cr;
break;
}
RB_GC_GUARD(t0);
clear_cr:
// If no fast path was hit, we clear the coderange.
// append_as_bytes is predominently meant to be used in
// buffering situation, hence it's likely the coderange
// will never be scanned, so it's not worth spending time
// precomputing the coderange except for simple and common
// situations.
ENC_CODERANGE_CLEAR(str);
keep_cr:
return str;
}
|
#ascii_only? ⇒ Boolean
Returns true
if self
contains only ASCII characters, false
otherwise:
'abc'.ascii_only? # => true
"abc\u{6666}".ascii_only? # => false
11440 11441 11442 11443 11444 11445 11446 |
# File 'string.c', line 11440
static VALUE
rb_str_is_ascii_only_p(VALUE str)
{
int cr = rb_enc_str_coderange(str);
return RBOOL(cr == ENC_CODERANGE_7BIT);
}
|
#b ⇒ String
:include: doc/string/b.rdoc
11376 11377 11378 11379 11380 11381 11382 11383 11384 11385 11386 11387 11388 11389 11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404 11405 11406 11407 |
# File 'string.c', line 11376
static VALUE
rb_str_b(VALUE str)
{
VALUE str2;
if (STR_EMBED_P(str)) {
str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
}
else {
str2 = str_alloc_heap(rb_cString);
}
str_replace_shared_without_enc(str2, str);
if (rb_enc_asciicompat(STR_ENC_GET(str))) {
// BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
// If we know the receiver's code range then we know the result's code range.
int cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
break;
case ENC_CODERANGE_BROKEN:
case ENC_CODERANGE_VALID:
ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
break;
default:
ENC_CODERANGE_CLEAR(str2);
break;
}
}
return str2;
}
|
#byteindex(substring, offset = 0) ⇒ Integer? #byteindex(regexp, offset = 0) ⇒ Integer?
Returns the Integer byte-based index of the first occurrence of the given substring
, or nil
if none found:
'foo'.byteindex('f') # => 0
'foo'.byteindex('o') # => 1
'foo'.byteindex('oo') # => 1
'foo'.byteindex('ooo') # => nil
Returns the Integer byte-based index of the first match for the given Regexp regexp
, or nil
if none found:
'foo'.byteindex(/f/) # => 0
'foo'.byteindex(/o/) # => 1
'foo'.byteindex(/oo/) # => 1
'foo'.byteindex(/ooo/) # => nil
Integer argument offset
, if given, specifies the byte-based position in the string to begin the search:
'foo'.byteindex('o', 1) # => 1
'foo'.byteindex('o', 2) # => 2
'foo'.byteindex('o', 3) # => nil
If offset
is negative, counts backward from the end of self
:
'foo'.byteindex('o', -1) # => 2
'foo'.byteindex('o', -2) # => 1
'foo'.byteindex('o', -3) # => 1
'foo'.byteindex('o', -4) # => nil
If offset
does not land on character (codepoint) boundary, IndexError
is raised.
Related: String#index, String#byterindex.
4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 |
# File 'string.c', line 4541
static VALUE
rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
long pos;
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
long slen = RSTRING_LEN(str);
pos = NUM2LONG(initpos);
if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
}
else {
pos = 0;
}
str_ensure_byte_pos(str, pos);
if (RB_TYPE_P(sub, T_REGEXP)) {
if (rb_reg_search(sub, str, pos, 0) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_byteindex(str, sub, pos);
if (pos >= 0) return LONG2NUM(pos);
}
return Qnil;
}
|
#byterindex(substring, offset = self.bytesize) ⇒ Integer? #byterindex(regexp, offset = self.bytesize) ⇒ Integer?
Returns the Integer byte-based index of the last occurrence of the given substring
, or nil
if none found:
'foo'.byterindex('f') # => 0
'foo'.byterindex('o') # => 2
'foo'.byterindex('oo') # => 1
'foo'.byterindex('ooo') # => nil
Returns the Integer byte-based index of the last match for the given Regexp regexp
, or nil
if none found:
'foo'.byterindex(/f/) # => 0
'foo'.byterindex(/o/) # => 2
'foo'.byterindex(/oo/) # => 1
'foo'.byterindex(/ooo/) # => nil
The last match means starting at the possible last position, not the last of longest matches.
'foo'.byterindex(/o+/) # => 2
$~ #=> #<MatchData "o">
To get the last longest match, needs to combine with negative lookbehind.
'foo'.byterindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">
Or String#byteindex with negative lookforward.
'foo'.byteindex(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">
Integer argument offset
, if given and non-negative, specifies the maximum starting byte-based position in the string to end the search:
'foo'.byterindex('o', 0) # => nil
'foo'.byterindex('o', 1) # => 1
'foo'.byterindex('o', 2) # => 2
'foo'.byterindex('o', 3) # => 2
If offset
is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset
:
'foo'.byterindex('o', -1) # => 2
'foo'.byterindex('o', -2) # => 1
'foo'.byterindex('o', -3) # => nil
'foo'.byterindex('o', -4) # => nil
If offset
does not land on character (codepoint) boundary, IndexError
is raised.
Related: String#byteindex.
4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 |
# File 'string.c', line 4855
static VALUE
rb_str_byterindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
long pos, len = RSTRING_LEN(str);
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
pos = NUM2LONG(initpos);
if (pos < 0 && (pos += len) < 0) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
if (pos > len) pos = len;
}
else {
pos = len;
}
str_ensure_byte_pos(str, pos);
if (RB_TYPE_P(sub, T_REGEXP)) {
if (rb_reg_search(sub, str, pos, 1) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_byterindex(str, sub, pos);
if (pos >= 0) return LONG2NUM(pos);
}
return Qnil;
}
|
#bytes ⇒ Object
:include: doc/string/bytes.rdoc
9674 9675 9676 9677 9678 9679 |
# File 'string.c', line 9674
static VALUE
rb_str_bytes(VALUE str)
{
VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str));
return rb_str_enumerate_bytes(str, ary);
}
|
#bytesize ⇒ Integer
:include: doc/string/bytesize.rdoc
2365 2366 2367 2368 2369 |
# File 'string.c', line 2365
VALUE
rb_str_bytesize(VALUE str)
{
return LONG2NUM(RSTRING_LEN(str));
}
|
#byteslice(index, length = 1) ⇒ String? #byteslice(range) ⇒ String?
Returns a substring of self
, or nil
if the substring cannot be constructed.
With integer arguments index
and length
given, returns the substring beginning at the given index
of the given length
(if possible), or nil
if length
is negative or index
falls outside of self
:
s = '0123456789' # => "0123456789"
s.byteslice(2) # => "2"
s.byteslice(200) # => nil
s.byteslice(4, 3) # => "456"
s.byteslice(4, 30) # => "456789"
s.byteslice(4, -1) # => nil
s.byteslice(40, 2) # => nil
In either case above, counts backwards from the end of self
if index
is negative:
s = '0123456789' # => "0123456789"
s.byteslice(-4) # => "6"
s.byteslice(-4, 3) # => "678"
With Range argument range
given, returns byteslice(range.begin, range.size)
:
s = '0123456789' # => "0123456789"
s.byteslice(4..6) # => "456"
s.byteslice(-6..-4) # => "456"
s.byteslice(5..2) # => "" # range.size is zero.
s.byteslice(40..42) # => nil
In all cases, a returned string has the same encoding as self
:
s.encoding # => #<Encoding:UTF-8>
s.byteslice(4).encoding # => #<Encoding:UTF-8>
6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 |
# File 'string.c', line 6736
static VALUE
rb_str_byteslice(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
long beg = NUM2LONG(argv[0]);
long len = NUM2LONG(argv[1]);
return str_byte_substr(str, beg, len, TRUE);
}
rb_check_arity(argc, 1, 2);
return str_byte_aref(str, argv[0]);
}
|
#bytesplice(index, length, str) ⇒ String #bytesplice(index, length, str, str_index, str_length) ⇒ String #bytesplice(range, str) ⇒ String #bytesplice(range, str, str_range) ⇒ String
Replaces some or all of the content of self
with str
, and returns self
. The portion of the string affected is determined using the same criteria as String#byteslice, except that length
cannot be omitted. If the replacement string is not the same length as the text it is replacing, the string will be adjusted accordingly.
If str_index
and str_length
, or str_range
are given, the content of self
is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); however the substring of str
is not allocated as a new string.
The form that take an Integer will raise an IndexError if the value is out of range; the Range form will raise a RangeError. If the beginning or ending offset does not land on character (codepoint) boundary, an IndexError will be raised.
6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 |
# File 'string.c', line 6792
static VALUE
rb_str_bytesplice(int argc, VALUE *argv, VALUE str)
{
long beg, len, vbeg, vlen;
VALUE val;
int cr;
rb_check_arity(argc, 2, 5);
if (!(argc == 2 || argc == 3 || argc == 5)) {
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
}
if (argc == 2 || (argc == 3 && !RB_INTEGER_TYPE_P(argv[0]))) {
if (!rb_range_beg_len(argv[0], &beg, &len, RSTRING_LEN(str), 2)) {
rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
rb_builtin_class_name(argv[0]));
}
val = argv[1];
StringValue(val);
if (argc == 2) {
/* bytesplice(range, str) */
vbeg = 0;
vlen = RSTRING_LEN(val);
}
else {
/* bytesplice(range, str, str_range) */
if (!rb_range_beg_len(argv[2], &vbeg, &vlen, RSTRING_LEN(val), 2)) {
rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
rb_builtin_class_name(argv[2]));
}
}
}
else {
beg = NUM2LONG(argv[0]);
len = NUM2LONG(argv[1]);
val = argv[2];
StringValue(val);
if (argc == 3) {
/* bytesplice(index, length, str) */
vbeg = 0;
vlen = RSTRING_LEN(val);
}
else {
/* bytesplice(index, length, str, str_index, str_length) */
vbeg = NUM2LONG(argv[3]);
vlen = NUM2LONG(argv[4]);
}
}
str_check_beg_len(str, &beg, &len);
str_check_beg_len(val, &vbeg, &vlen);
str_modify_keep_cr(str);
if (RB_UNLIKELY(ENCODING_GET_INLINED(str) != ENCODING_GET_INLINED(val))) {
rb_enc_associate(str, rb_enc_check(str, val));
}
rb_str_update_1(str, beg, len, val, vbeg, vlen);
cr = ENC_CODERANGE_AND(ENC_CODERANGE(str), ENC_CODERANGE(val));
if (cr != ENC_CODERANGE_BROKEN)
ENC_CODERANGE_SET(str, cr);
return str;
}
|
#capitalize(*options) ⇒ String
Returns a string containing the characters in self
; the first character is upcased; the remaining characters are downcased:
s = 'hello World!' # => "hello World!"
s.capitalize # => "Hello world!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#capitalize!.
8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 |
# File 'string.c', line 8169
static VALUE
rb_str_capitalize(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#capitalize!(*options) ⇒ self?
Upcases the first character in self
; downcases the remaining characters; returns self
if any changes were made, nil
otherwise:
s = 'hello World!' # => "hello World!"
s.capitalize! # => "Hello world!"
s # => "Hello world!"
s.capitalize! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#capitalize.
8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 |
# File 'string.c', line 8131
static VALUE
rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#casecmp(other_string) ⇒ -1, ...
Compares self.downcase
and other_string.downcase
; returns:
-
-1 if
other_string.downcase
is larger. -
0 if the two are equal.
-
1 if
other_string.downcase
is smaller. -
nil
if the two are incomparable.
Examples:
'foo'.casecmp('foo') # => 0
'foo'.casecmp('food') # => -1
'food'.casecmp('foo') # => 1
'FOO'.casecmp('foo') # => 0
'foo'.casecmp('FOO') # => 0
'foo'.casecmp(1) # => nil
See Case Mapping.
Related: String#casecmp?.
4248 4249 4250 4251 4252 4253 4254 4255 4256 |
# File 'string.c', line 4248
static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return Qnil;
}
return str_casecmp(str1, s);
}
|
#casecmp?(other_string) ⇒ true, ...
Returns true
if self
and other_string
are equal after Unicode case folding, otherwise false
:
'foo'.casecmp?('foo') # => true
'foo'.casecmp?('food') # => false
'food'.casecmp?('foo') # => false
'FOO'.casecmp?('foo') # => true
'foo'.casecmp?('FOO') # => true
Returns nil
if the two values are incomparable:
'foo'.casecmp?(1) # => nil
See Case Mapping.
Related: String#casecmp.
4338 4339 4340 4341 4342 4343 4344 4345 4346 |
# File 'string.c', line 4338
static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return Qnil;
}
return str_casecmp_p(str1, s);
}
|
#center(size, pad_string = ' ') ⇒ Object
:include: doc/string/center.rdoc
Related: String#ljust, String#rjust.
10981 10982 10983 10984 10985 |
# File 'string.c', line 10981
static VALUE
rb_str_center(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'c');
}
|
#chars ⇒ Object
:include: doc/string/chars.rdoc
9743 9744 9745 9746 9747 9748 |
# File 'string.c', line 9743
static VALUE
rb_str_chars(VALUE str)
{
VALUE ary = WANTARRAY("chars", rb_str_strlen(str));
return rb_str_enumerate_chars(str, ary);
}
|
#chomp(line_sep = $/) ⇒ Object
:include: doc/string/chomp.rdoc
10207 10208 10209 10210 10211 10212 10213 |
# File 'string.c', line 10207
static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
VALUE rs = chomp_rs(argc, argv);
if (NIL_P(rs)) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, chompped_length(str, rs));
}
|
#chomp!(line_sep = $/) ⇒ self?
Like String#chomp, but modifies self
in place; returns nil
if no modification made, self
otherwise.
10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 |
# File 'string.c', line 10187
static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
VALUE rs;
str_modifiable(str);
if (RSTRING_LEN(str) == 0 && argc < 2) return Qnil;
rs = chomp_rs(argc, argv);
if (NIL_P(rs)) return Qnil;
return rb_str_chomp_string(str, rs);
}
|
#chop ⇒ Object
:include: doc/string/chop.rdoc
10034 10035 10036 10037 10038 |
# File 'string.c', line 10034
static VALUE
rb_str_chop(VALUE str)
{
return rb_str_subseq(str, 0, chopped_length(str));
}
|
#chop! ⇒ self?
Like String#chop, but modifies self
in place; returns nil
if self
is empty, self
otherwise.
Related: String#chomp!.
10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 |
# File 'string.c', line 10008
static VALUE
rb_str_chop_bang(VALUE str)
{
str_modify_keep_cr(str);
if (RSTRING_LEN(str) > 0) {
long len;
len = chopped_length(str);
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
ENC_CODERANGE_CLEAR(str);
}
return str;
}
return Qnil;
}
|
#chr ⇒ String
Returns a string containing the first character of self
:
s = 'foo' # => "foo"
s.chr # => "f"
6522 6523 6524 6525 6526 |
# File 'string.c', line 6522
static VALUE
rb_str_chr(VALUE str)
{
return rb_str_substr(str, 0, 1);
}
|
#clear ⇒ self
Removes the contents of self
:
s = 'foo' # => "foo"
s.clear # => ""
6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 |
# File 'string.c', line 6497
static VALUE
rb_str_clear(VALUE str)
{
str_discard(str);
STR_SET_EMBED(str);
STR_SET_LEN(str, 0);
RSTRING_PTR(str)[0] = 0;
if (rb_enc_asciicompat(STR_ENC_GET(str)))
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
else
ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
return str;
}
|
#codepoints ⇒ Object
:include: doc/string/codepoints.rdoc
9803 9804 9805 9806 9807 9808 |
# File 'string.c', line 9803
static VALUE
rb_str_codepoints(VALUE str)
{
VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str));
return rb_str_enumerate_codepoints(str, ary);
}
|
#concat(*objects) ⇒ String
Concatenates each object in objects
to self
and returns self
:
s = 'foo'
s.concat('bar', 'baz') # => "foobarbaz"
s # => "foobarbaz"
For each given object object
that is an Integer, the value is considered a codepoint and converted to a character before concatenation:
s = 'foo'
s.concat(32, 'bar', 32, 'baz') # => "foo bar baz"
Related: String#<<, which takes a single argument.
3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 |
# File 'string.c', line 3727
static VALUE
rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
{
str_modifiable(str);
if (argc == 1) {
return rb_str_concat(str, argv[0]);
}
else if (argc > 1) {
int i;
VALUE arg_str = rb_str_tmp_new(0);
rb_enc_copy(arg_str, str);
for (i = 0; i < argc; i++) {
rb_str_concat(arg_str, argv[i]);
}
rb_str_buf_append(str, arg_str);
}
return str;
}
|
#count(*selectors) ⇒ Integer
Returns the total number of characters in self
that are specified by the given selectors
(see Multiple Character Selectors):
a = "hello world"
a.count "lo" #=> 5
a.count "lo", "o" #=> 2
a.count "hello", "^l" #=> 4
a.count "ej-m" #=> 4
"hello^world".count "\\^aeiou" #=> 4
"hello-world".count "a\\-eo" #=> 4
c = "hello world\\r\\n"
c.count "\\" #=> 2
c.count "\\A" #=> 0
c.count "X-\\w" #=> 3
9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 |
# File 'string.c', line 9014
static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
char table[TR_TABLE_SIZE];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0, tstr;
char *s, *send;
int i;
int ascompat;
size_t n = 0;
rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
tstr = argv[0];
StringValue(tstr);
enc = rb_enc_check(str, tstr);
if (argc == 1) {
const char *ptstr;
if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
(ptstr = RSTRING_PTR(tstr),
ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
!is_broken_string(str)) {
int clen;
unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
send = RSTRING_END(str);
while (s < send) {
if (*(unsigned char*)s++ == c) n++;
}
return SIZET2NUM(n);
}
}
tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
for (i=1; i<argc; i++) {
tstr = argv[i];
StringValue(tstr);
enc = rb_enc_check(str, tstr);
tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
}
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
send = RSTRING_END(str);
ascompat = rb_enc_asciicompat(enc);
while (s < send) {
unsigned int c;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (table[c]) {
n++;
}
s++;
}
else {
int clen;
c = rb_enc_codepoint_len(s, send, &clen, enc);
if (tr_find(c, table, del, nodel)) {
n++;
}
s += clen;
}
}
return SIZET2NUM(n);
}
|
#crypt(salt_str) ⇒ Object
Returns the string generated by calling crypt(3)
standard library function with str
and salt_str
, in this order, as its arguments. Please do not use this method any longer. It is legacy; provided only for backward compatibility with ruby scripts in earlier days. It is bad to use in contemporary programs for several reasons:
-
Behaviour of C’s
crypt(3)
depends on the OS it is run. The generated string lacks data portability. -
On some OSes such as Mac OS,
crypt(3)
never fails (i.e. silently ends up in unexpected results). -
On some OSes such as Mac OS,
crypt(3)
is not thread safe. -
So-called “traditional” usage of
crypt(3)
is very very very weak. According to its manpage, Linux’s traditionalcrypt(3)
output has only 2**56 variations; too easy to brute force today. And this is the default behaviour. -
In order to make things robust some OSes implement so-called “modular” usage. To go through, you have to do a complex build-up of the
salt_str
parameter, by hand. Failure in generation of a proper salt string tends not to yield any errors; typos in parameters are normally not detectable.-
For instance, in the following example, the second invocation of String#crypt is wrong; it has a typo in “round=” (lacks “s”). However the call does not fail and something unexpected is generated.
"foo".crypt("$5$rounds=1000$salt$") # OK, proper usage "foo".crypt("$5$round=1000$salt$") # Typo not detected
-
-
Even in the “modular” mode, some hash functions are considered archaic and no longer recommended at all; for instance module
$1$
is officially abandoned by its author: see phk.freebsd.dk/sagas/md5crypt_eol/ . For another instance module$3$
is considered completely broken: see the manpage of FreeBSD. -
On some OS such as Mac OS, there is no modular mode. Yet, as written above,
crypt(3)
on Mac OS never fails. This means even if you build up a proper salt string it generates a traditional DES hash anyways, and there is no way for you to be aware of."foo".crypt("$5$rounds=1000$salt$") # => "$5fNPQMxC5j6."
If for some reason you cannot migrate to other secure contemporary password hashing algorithms, install the string-crypt gem and require 'string/crypt'
to continue using it.
10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763 10764 10765 |
# File 'string.c', line 10711
static VALUE
rb_str_crypt(VALUE str, VALUE salt)
{
#ifdef HAVE_CRYPT_R
VALUE databuf;
struct crypt_data *data;
# define CRYPT_END() ALLOCV_END(databuf)
#else
extern char *crypt(const char *, const char *);
# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
#endif
VALUE result;
const char *s, *saltp;
char *res;
#ifdef BROKEN_CRYPT
char salt_8bit_clean[3];
#endif
StringValue(salt);
mustnot_wchar(str);
mustnot_wchar(salt);
s = StringValueCStr(str);
saltp = RSTRING_PTR(salt);
if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
}
#ifdef BROKEN_CRYPT
if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
salt_8bit_clean[0] = saltp[0] & 0x7f;
salt_8bit_clean[1] = saltp[1] & 0x7f;
salt_8bit_clean[2] = '\0';
saltp = salt_8bit_clean;
}
#endif
#ifdef HAVE_CRYPT_R
data = ALLOCV(databuf, sizeof(struct crypt_data));
# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
data->initialized = 0;
# endif
res = crypt_r(s, saltp, data);
#else
crypt_mutex_initialize();
rb_nativethread_lock_lock(&crypt_mutex.lock);
res = crypt(s, saltp);
#endif
if (!res) {
int err = errno;
CRYPT_END();
rb_syserr_fail(err, "crypt");
}
result = rb_str_new_cstr(res);
CRYPT_END();
return result;
}
|
#delete(*selectors) ⇒ Object
Returns a copy of self
with characters specified by selectors
removed (see Multiple Character Selectors):
"hello".delete "l","lo" #=> "heo"
"hello".delete "lo" #=> "he"
"hello".delete "aeiou", "^e" #=> "hell"
"hello".delete "ej-m" #=> "ho"
8831 8832 8833 8834 8835 8836 8837 |
# File 'string.c', line 8831
static VALUE
rb_str_delete(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_delete_bang(argc, argv, str);
return str;
}
|
#delete!(*selectors) ⇒ self?
Like String#delete, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 |
# File 'string.c', line 8755
static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
char squeez[TR_TABLE_SIZE];
rb_encoding *enc = 0;
char *s, *send, *t;
VALUE del = 0, nodel = 0;
int modify = 0;
int i, ascompat, cr;
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
for (i=0; i<argc; i++) {
VALUE s = argv[i];
StringValue(s);
enc = rb_enc_check(str, s);
tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
}
str_modify_keep_cr(str);
ascompat = rb_enc_asciicompat(enc);
s = t = RSTRING_PTR(str);
send = RSTRING_END(str);
cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (s < send) {
unsigned int c;
int clen;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (squeez[c]) {
modify = 1;
}
else {
if (t != s) *t = c;
t++;
}
s++;
}
else {
c = rb_enc_codepoint_len(s, send, &clen, enc);
if (tr_find(c, squeez, del, nodel)) {
modify = 1;
}
else {
if (t != s) rb_enc_mbcput(c, t, enc);
t += clen;
if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
}
s += clen;
}
}
TERM_FILL(t, TERM_LEN(str));
STR_SET_LEN(str, t - RSTRING_PTR(str));
ENC_CODERANGE_SET(str, cr);
if (modify) return str;
return Qnil;
}
|
#delete_prefix(prefix) ⇒ Object
:include: doc/string/delete_prefix.rdoc
11216 11217 11218 11219 11220 11221 11222 11223 11224 11225 |
# File 'string.c', line 11216
static VALUE
rb_str_delete_prefix(VALUE str, VALUE prefix)
{
long prefixlen;
prefixlen = deleted_prefix_length(str, prefix);
if (prefixlen <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
}
|
#delete_prefix!(prefix) ⇒ self?
Like String#delete_prefix, except that self
is modified in place. Returns self
if the prefix is removed, nil
otherwise.
11196 11197 11198 11199 11200 11201 11202 11203 11204 11205 11206 |
# File 'string.c', line 11196
static VALUE
rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
{
long prefixlen;
str_modify_keep_cr(str);
prefixlen = deleted_prefix_length(str, prefix);
if (prefixlen <= 0) return Qnil;
return rb_str_drop_bytes(str, prefixlen);
}
|
#delete_suffix(suffix) ⇒ Object
:include: doc/string/delete_suffix.rdoc
11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 |
# File 'string.c', line 11299
static VALUE
rb_str_delete_suffix(VALUE str, VALUE suffix)
{
long suffixlen;
suffixlen = deleted_suffix_length(str, suffix);
if (suffixlen <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
}
|
#delete_suffix!(suffix) ⇒ self?
Like String#delete_suffix, except that self
is modified in place. Returns self
if the suffix is removed, nil
otherwise.
11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281 11282 11283 11284 11285 11286 11287 11288 11289 |
# File 'string.c', line 11271
static VALUE
rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
{
long olen, suffixlen, len;
str_modifiable(str);
suffixlen = deleted_suffix_length(str, suffix);
if (suffixlen <= 0) return Qnil;
olen = RSTRING_LEN(str);
str_modify_keep_cr(str);
len = olen - suffixlen;
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
ENC_CODERANGE_CLEAR(str);
}
return str;
}
|
#downcase(*options) ⇒ String
Returns a string containing the downcased characters in self
:
s = 'Hello World!' # => "Hello World!"
s.downcase # => "hello world!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#downcase!, String#upcase, String#upcase!.
8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 |
# File 'string.c', line 8085
static VALUE
rb_str_downcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
str_enc_copy_direct(ret, str);
downcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#downcase!(*options) ⇒ self?
Downcases the characters in self
; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.downcase! # => "hello world!"
s # => "hello world!"
s.downcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#downcase, String#upcase, String#upcase!.
8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 |
# File 'string.c', line 8046
static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
if (downcase_single(str))
flags |= ONIGENC_CASE_MODIFIED;
}
else if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#dump ⇒ String
Returns a printable version of self
, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:
"hello \n ''".dump # => "\"hello \\n ''\""
"\f\x00\xff\\\"".dump # => "\"\\f\\x00\\xFF\\\\\\\"\""
Related: String#undump (inverse of String#dump).
7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 |
# File 'string.c', line 7310
VALUE
rb_str_dump(VALUE str)
{
int encidx = rb_enc_get_index(str);
rb_encoding *enc = rb_enc_from_index(encidx);
long len;
const char *p, *pend;
char *q, *qend;
VALUE result;
int u8 = (encidx == rb_utf8_encindex());
static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")";
len = 2; /* "" */
if (!rb_enc_asciicompat(enc)) {
len += strlen(nonascii_suffix) - rb_strlen_lit("%s");
len += strlen(enc->name);
}
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
while (p < pend) {
int clen;
unsigned char c = *p++;
switch (c) {
case '"': case '\\':
case '\n': case '\r':
case '\t': case '\f':
case '\013': case '\010': case '\007': case '\033':
clen = 2;
break;
case '#':
clen = IS_EVSTR(p, pend) ? 2 : 1;
break;
default:
if (ISPRINT(c)) {
clen = 1;
}
else {
if (u8 && c > 0x7F) { /* \u notation */
int n = rb_enc_precise_mbclen(p-1, pend, enc);
if (MBCLEN_CHARFOUND_P(n)) {
unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
if (cc <= 0xFFFF)
clen = 6; /* \uXXXX */
else if (cc <= 0xFFFFF)
clen = 9; /* \u{XXXXX} */
else
clen = 10; /* \u{XXXXXX} */
p += MBCLEN_CHARFOUND_LEN(n)-1;
break;
}
}
clen = 4; /* \xNN */
}
break;
}
if (clen > LONG_MAX - len) {
rb_raise(rb_eRuntimeError, "string size too big");
}
len += clen;
}
result = rb_str_new(0, len);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
q = RSTRING_PTR(result); qend = q + len + 1;
*q++ = '"';
while (p < pend) {
unsigned char c = *p++;
if (c == '"' || c == '\\') {
*q++ = '\\';
*q++ = c;
}
else if (c == '#') {
if (IS_EVSTR(p, pend)) *q++ = '\\';
*q++ = '#';
}
else if (c == '\n') {
*q++ = '\\';
*q++ = 'n';
}
else if (c == '\r') {
*q++ = '\\';
*q++ = 'r';
}
else if (c == '\t') {
*q++ = '\\';
*q++ = 't';
}
else if (c == '\f') {
*q++ = '\\';
*q++ = 'f';
}
else if (c == '\013') {
*q++ = '\\';
*q++ = 'v';
}
else if (c == '\010') {
*q++ = '\\';
*q++ = 'b';
}
else if (c == '\007') {
*q++ = '\\';
*q++ = 'a';
}
else if (c == '\033') {
*q++ = '\\';
*q++ = 'e';
}
else if (ISPRINT(c)) {
*q++ = c;
}
else {
*q++ = '\\';
if (u8) {
int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
p += n;
if (cc <= 0xFFFF)
snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */
else
snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */
q += strlen(q);
continue;
}
}
snprintf(q, qend-q, "x%02X", c);
q += 3;
}
}
*q++ = '"';
*q = '\0';
if (!rb_enc_asciicompat(enc)) {
snprintf(q, qend-q, nonascii_suffix, enc->name);
encidx = rb_ascii8bit_encindex();
}
/* result from dump is ASCII */
rb_enc_associate_index(result, encidx);
ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
return result;
}
|
#dup ⇒ Object
:nodoc:
1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 |
# File 'string.c', line 1922
VALUE
rb_str_dup_m(VALUE str)
{
if (LIKELY(BARE_STRING_P(str))) {
return str_duplicate(rb_obj_class(str), str);
}
else {
return rb_obj_dup(str);
}
}
|
#each_byte {|byte| ... } ⇒ self #each_byte ⇒ Object
:include: doc/string/each_byte.rdoc
9659 9660 9661 9662 9663 9664 |
# File 'string.c', line 9659
static VALUE
rb_str_each_byte(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
return rb_str_enumerate_bytes(str, 0);
}
|
#each_char {|c| ... } ⇒ self #each_char ⇒ Object
:include: doc/string/each_char.rdoc
9728 9729 9730 9731 9732 9733 |
# File 'string.c', line 9728
static VALUE
rb_str_each_char(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
return rb_str_enumerate_chars(str, 0);
}
|
#each_codepoint {|integer| ... } ⇒ self #each_codepoint ⇒ Object
:include: doc/string/each_codepoint.rdoc
9788 9789 9790 9791 9792 9793 |
# File 'string.c', line 9788
static VALUE
rb_str_each_codepoint(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
return rb_str_enumerate_codepoints(str, 0);
}
|
#each_grapheme_cluster {|gc| ... } ⇒ self #each_grapheme_cluster ⇒ Object
:include: doc/string/each_grapheme_cluster.rdoc
9958 9959 9960 9961 9962 9963 |
# File 'string.c', line 9958
static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
return rb_str_enumerate_grapheme_clusters(str, 0);
}
|
#each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self #each_line(line_sep = $/, chomp: false) ⇒ Object
:include: doc/string/each_line.rdoc
9607 9608 9609 9610 9611 9612 |
# File 'string.c', line 9607
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, argc, argv, 0);
return rb_str_enumerate_lines(argc, argv, str, 0);
}
|
#empty? ⇒ Boolean
Returns true
if the length of self
is zero, false
otherwise:
"hello".empty? # => false
" ".empty? # => false
"".empty? # => true
2383 2384 2385 2386 2387 |
# File 'string.c', line 2383
static VALUE
rb_str_empty(VALUE str)
{
return RBOOL(RSTRING_LEN(str) == 0);
}
|
#encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String
:include: doc/string/encode.rdoc
2905 2906 2907 2908 2909 2910 2911 |
# File 'transcode.c', line 2905
static VALUE
str_encode(int argc, VALUE *argv, VALUE str)
{
VALUE newstr = str;
int encidx = str_transcode(argc, argv, &newstr);
return encoded_dup(newstr, str, encidx);
}
|
#encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self
Like #encode, but applies encoding changes to self
; returns self
.
2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 |
# File 'transcode.c', line 2874
static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
VALUE newstr;
int encidx;
rb_check_frozen(str);
newstr = str;
encidx = str_transcode(argc, argv, &newstr);
if (encidx < 0) return str;
if (newstr == str) {
rb_enc_associate_index(str, encidx);
return str;
}
rb_str_shared_replace(str, newstr);
return str_encode_associate(str, encidx);
}
|
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
1162 1163 1164 1165 1166 1167 1168 1169 1170 |
# File 'encoding.c', line 1162
VALUE
rb_obj_encoding(VALUE obj)
{
int idx = rb_enc_get_index(obj);
if (idx < 0) {
rb_raise(rb_eTypeError, "unknown encoding");
}
return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}
|
#end_with?(*strings) ⇒ Boolean
:include: doc/string/end_with_p.rdoc
11112 11113 11114 11115 11116 11117 11118 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 11134 11135 11136 |
# File 'string.c', line 11112
static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
int i;
for (i=0; i<argc; i++) {
VALUE tmp = argv[i];
const char *p, *s, *e;
long slen, tlen;
rb_encoding *enc;
StringValue(tmp);
enc = rb_enc_check(str, tmp);
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
if ((slen = RSTRING_LEN(str)) < tlen) continue;
p = RSTRING_PTR(str);
e = p + slen;
s = e - tlen;
if (!at_char_boundary(p, s, e, enc))
continue;
if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
return Qtrue;
}
return Qfalse;
}
|
#eql?(object) ⇒ Boolean
Returns true
if object
has the same length and content;
as +self+; +false+ otherwise:
s = 'foo'
s.eql?('foo') # => true
s.eql?('food') # => false
s.eql?('FOO') # => false
Returns +false+ if the two strings' encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false
4177 4178 4179 4180 4181 4182 4183 |
# File 'string.c', line 4177
VALUE
rb_str_eql(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
return rb_str_eql_internal(str1, str2);
}
|
#force_encoding(encoding) ⇒ self
:include: doc/string/force_encoding.rdoc
11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 |
# File 'string.c', line 11343
static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
str_modifiable(str);
rb_encoding *encoding = rb_to_encoding(enc);
int idx = rb_enc_to_index(encoding);
// If the encoding is unchanged, we do nothing.
if (ENCODING_GET(str) == idx) {
return str;
}
rb_enc_associate_index(str, idx);
// If the coderange was 7bit and the new encoding is ASCII-compatible
// we can keep the coderange.
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
return str;
}
ENC_CODERANGE_CLEAR(str);
return str;
}
|
#freeze ⇒ Object
:nodoc:
3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 |
# File 'string.c', line 3175
VALUE
rb_str_freeze(VALUE str)
{
if (CHILLED_STRING_P(str)) {
FL_UNSET_RAW(str, STR_CHILLED);
}
if (OBJ_FROZEN(str)) return str;
rb_str_resize(str, RSTRING_LEN(str));
return rb_obj_freeze(str);
}
|
#getbyte(index) ⇒ Integer?
Returns the byte at zero-based index
as an integer, or nil
if index
is out of range:
s = 'abcde' # => "abcde"
s.getbyte(0) # => 97
s.getbyte(-1) # => 101
s.getbyte(5) # => nil
Related: String#setbyte.
6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 |
# File 'string.c', line 6541
VALUE
rb_str_getbyte(VALUE str, VALUE index)
{
long pos = NUM2LONG(index);
if (pos < 0)
pos += RSTRING_LEN(str);
if (pos < 0 || RSTRING_LEN(str) <= pos)
return Qnil;
return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
}
|
#grapheme_clusters ⇒ Object
:include: doc/string/grapheme_clusters.rdoc
9973 9974 9975 9976 9977 9978 |
# File 'string.c', line 9973
static VALUE
rb_str_grapheme_clusters(VALUE str)
{
VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str));
return rb_str_enumerate_grapheme_clusters(str, ary);
}
|
#gsub(pattern, replacement) ⇒ Object #gsub(pattern) {|match| ... } ⇒ Object #gsub(pattern) ⇒ Object
Returns a copy of self
with all occurrences of the given pattern
replaced.
See Substitution Methods.
Returns an Enumerator if no replacement
and no block given.
Related: String#sub, String#sub!, String#gsub!.
6457 6458 6459 6460 6461 |
# File 'string.c', line 6457
static VALUE
rb_str_gsub(int argc, VALUE *argv, VALUE str)
{
return str_gsub(argc, argv, str, 0);
}
|
#gsub!(pattern, replacement) ⇒ self? #gsub!(pattern) {|match| ... } ⇒ self? #gsub!(pattern) ⇒ Object
Performs the specified substring replacement(s) on self
; returns self
if any replacement occurred, nil
otherwise.
See Substitution Methods.
Returns an Enumerator if no replacement
and no block given.
Related: String#sub, String#gsub, String#sub!.
6433 6434 6435 6436 6437 6438 |
# File 'string.c', line 6433
static VALUE
rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
{
str_modify_keep_cr(str);
return str_gsub(argc, argv, str, 1);
}
|
#hash ⇒ Integer
Returns the integer hash value for self
. The value is based on the length, content and encoding of self
.
Related: Object#hash.
4066 4067 4068 4069 4070 4071 |
# File 'string.c', line 4066
static VALUE
rb_str_hash_m(VALUE str)
{
st_index_t hval = rb_str_hash(str);
return ST2FIX(hval);
}
|
#hex ⇒ Integer
Interprets the leading substring of self
as a string of hexadecimal digits (with an optional sign and an optional 0x
) and returns the corresponding number; returns zero if there is no such leading substring:
'0x0a'.hex # => 10
'-1234'.hex # => -4660
'0'.hex # => 0
'non-numeric'.hex # => 0
Related: String#oct.
10604 10605 10606 10607 10608 |
# File 'string.c', line 10604
static VALUE
rb_str_hex(VALUE str)
{
return rb_str_to_inum(str, 16, FALSE);
}
|
#include?(other_string) ⇒ Boolean
Returns true
if self
contains other_string
, false
otherwise:
s = 'foo'
s.include?('f') # => true
s.include?('fo') # => true
s.include?('food') # => false
6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 |
# File 'string.c', line 6967
VALUE
rb_str_include(VALUE str, VALUE arg)
{
long i;
StringValue(arg);
i = rb_str_index(str, arg, 0);
return RBOOL(i != -1);
}
|
#index(substring, offset = 0) ⇒ Integer? #index(regexp, offset = 0) ⇒ Integer?
:include: doc/string/index.rdoc
4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 |
# File 'string.c', line 4438
static VALUE
rb_str_index_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
rb_encoding *enc = STR_ENC_GET(str);
long pos;
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
long slen = str_strlen(str, enc); /* str's enc */
pos = NUM2LONG(initpos);
if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
}
else {
pos = 0;
}
if (RB_TYPE_P(sub, T_REGEXP)) {
pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
enc, single_byte_optimizable(str));
if (rb_reg_search(sub, str, pos, 0) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = rb_str_sublen(str, BEG(0));
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_index(str, sub, pos);
if (pos >= 0) {
pos = rb_str_sublen(str, pos);
return LONG2NUM(pos);
}
}
return Qnil;
}
|
#replace(other_string) ⇒ self
Replaces the contents of self
with the contents of other_string
:
s = 'foo' # => "foo"
s.replace('bar') # => "bar"
6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 |
# File 'string.c', line 6475
VALUE
rb_str_replace(VALUE str, VALUE str2)
{
str_modifiable(str);
if (str == str2) return str;
StringValue(str2);
str_discard(str);
return str_replace(str, str2);
}
|
#insert(index, other_string) ⇒ self
Inserts the given other_string
into self
; returns self
.
If the Integer index
is positive, inserts other_string
at offset index
:
'foo'.insert(1, 'bar') # => "fbaroo"
If the Integer index
is negative, counts backward from the end of self
and inserts other_string
at offset index+1
(that is, after self[index]
):
'foo'.insert(-2, 'bar') # => "fobaro"
5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 |
# File 'string.c', line 5950
static VALUE
rb_str_insert(VALUE str, VALUE idx, VALUE str2)
{
long pos = NUM2LONG(idx);
if (pos == -1) {
return rb_str_append(str, str2);
}
else if (pos < 0) {
pos++;
}
rb_str_update(str, pos, 0, str2);
return str;
}
|
#inspect ⇒ String
Returns a printable version of self
, enclosed in double-quotes, and with special characters escaped:
s = "foo\tbar\tbaz\n"
s.inspect
# => "\"foo\\tbar\\tbaz\\n\""
7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 |
# File 'string.c', line 7196
VALUE
rb_str_inspect(VALUE str)
{
int encidx = ENCODING_GET(str);
rb_encoding *enc = rb_enc_from_index(encidx);
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
VALUE result = rb_str_buf_new(0);
rb_encoding *resenc = rb_default_internal_encoding();
int unicode_p = rb_enc_unicode_p(enc);
int asciicompat = rb_enc_asciicompat(enc);
if (resenc == NULL) resenc = rb_default_external_encoding();
if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
rb_enc_associate(result, resenc);
str_buf_cat2(result, "\"");
p = RSTRING_PTR(str); pend = RSTRING_END(str);
prev = p;
while (p < pend) {
unsigned int c, cc;
int n;
n = rb_enc_precise_mbclen(p, pend, enc);
if (!MBCLEN_CHARFOUND_P(n)) {
if (p > prev) str_buf_cat(result, prev, p - prev);
n = rb_enc_mbminlen(enc);
if (pend < p + n)
n = (int)(pend - p);
while (n--) {
snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
str_buf_cat(result, buf, strlen(buf));
prev = ++p;
}
continue;
}
n = MBCLEN_CHARFOUND_LEN(n);
c = rb_enc_mbc_to_codepoint(p, pend, enc);
p += n;
if ((asciicompat || unicode_p) &&
(c == '"'|| c == '\\' ||
(c == '#' &&
p < pend &&
MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
(cc = rb_enc_codepoint(p,pend,enc),
(cc == '$' || cc == '@' || cc == '{'))))) {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
str_buf_cat2(result, "\\");
if (asciicompat || enc == resenc) {
prev = p - n;
continue;
}
}
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
case '\t': cc = 't'; break;
case '\f': cc = 'f'; break;
case '\013': cc = 'v'; break;
case '\010': cc = 'b'; break;
case '\007': cc = 'a'; break;
case 033: cc = 'e'; break;
default: cc = 0; break;
}
if (cc) {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
buf[0] = '\\';
buf[1] = (char)cc;
str_buf_cat(result, buf, 2);
prev = p;
continue;
}
/* The special casing of 0x85 (NEXT_LINE) here is because
* Oniguruma historically treats it as printable, but it
* doesn't match the print POSIX bracket class or character
* property in regexps.
*
* See Ruby Bug #16842 for details:
* https://bugs.ruby-lang.org/issues/16842
*/
if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
(asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
continue;
}
else {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
rb_str_buf_cat_escaped_char(result, c, unicode_p);
prev = p;
continue;
}
}
if (p > prev) str_buf_cat(result, prev, p - prev);
str_buf_cat2(result, "\"");
return result;
}
|
#intern ⇒ Object #to_sym ⇒ Object
Returns the Symbol
corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.
"Koala".intern #=> :Koala
s = 'cat'.to_sym #=> :cat
s == :cat #=> true
s = '@cat'.to_sym #=> :@cat
s == :@cat #=> true
This can also be used to create symbols that cannot be represented using the :xxx
notation.
'cat and dog'.to_sym #=> :"cat and dog"
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 |
# File 'symbol.c', line 877
VALUE
rb_str_intern(VALUE str)
{
VALUE sym;
GLOBAL_SYMBOLS_ENTER(symbols);
{
sym = lookup_str_sym_with_lock(symbols, str);
if (sym) {
// ok
}
else if (USE_SYMBOL_GC) {
rb_encoding *enc = rb_enc_get(str);
rb_encoding *ascii = rb_usascii_encoding();
if (enc != ascii && sym_check_asciionly(str, false)) {
str = rb_str_dup(str);
rb_enc_associate(str, ascii);
OBJ_FREEZE(str);
enc = ascii;
}
else {
str = rb_str_dup(str);
OBJ_FREEZE(str);
}
str = rb_fstring(str);
int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
if (type < 0) type = ID_JUNK;
sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
}
else {
ID id = intern_str(str, 0);
sym = ID2SYM(id);
}
}
GLOBAL_SYMBOLS_LEAVE();
return sym;
}
|
#length ⇒ Integer
:include: doc/string/length.rdoc
2351 2352 2353 2354 2355 |
# File 'string.c', line 2351
VALUE
rb_str_length(VALUE str)
{
return LONG2NUM(str_strlen(str, NULL));
}
|
#lines(Line_sep = $/, chomp: false) ⇒ Object
Forms substrings (“lines”) of self
according to the given arguments (see String#each_line for details); returns the lines in an array.
9623 9624 9625 9626 9627 9628 |
# File 'string.c', line 9623
static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
VALUE ary = WANTARRAY("lines", 0);
return rb_str_enumerate_lines(argc, argv, str, ary);
}
|
#ljust(size, pad_string = ' ') ⇒ Object
:include: doc/string/ljust.rdoc
Related: String#rjust, String#center.
10948 10949 10950 10951 10952 |
# File 'string.c', line 10948
static VALUE
rb_str_ljust(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'l');
}
|
#lstrip ⇒ Object
Returns a copy of self
with leading whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.lstrip # => "abc\u0000\t\n\v\f\r "
Related: String#rstrip, String#strip.
10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 |
# File 'string.c', line 10286
static VALUE
rb_str_lstrip(VALUE str)
{
char *start;
long len, loffset;
RSTRING_GETMEM(str, start, len);
loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
if (loffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, len - loffset);
}
|
#lstrip! ⇒ self?
Like String#lstrip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#rstrip!, String#strip!.
10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 |
# File 'string.c', line 10248
static VALUE
rb_str_lstrip_bang(VALUE str)
{
rb_encoding *enc;
char *start, *s;
long olen, loffset;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
if (loffset > 0) {
long len = olen-loffset;
s = start + loffset;
memmove(start, s, len);
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#match(pattern, offset = 0) ⇒ MatchData? #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object
Returns a MatchData object (or nil
) based on self
and the given pattern
.
Note: also updates Regexp@Global+Variables.
-
Computes
regexp
by convertingpattern
(if not already a Regexp).regexp = Regexp.new(pattern)
-
Computes
matchdata
, which will be either a MatchData object ornil
(see Regexp#match):matchdata = <tt>regexp.match(self)
With no block given, returns the computed matchdata
:
'foo'.match('f') # => #<MatchData "f">
'foo'.match('o') # => #<MatchData "o">
'foo'.match('x') # => nil
If Integer argument offset
is given, the search begins at index offset
:
'foo'.match('f', 1) # => nil
'foo'.match('o', 1) # => #<MatchData "o">
With a block given, calls the block with the computed matchdata
and returns the block’s return value:
'foo'.match(/o/) {|matchdata| matchdata } # => #<MatchData "o">
'foo'.match(/x/) {|matchdata| matchdata } # => nil
'foo'.match(/f/, 1) {|matchdata| matchdata } # => nil
4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 |
# File 'string.c', line 4976
static VALUE
rb_str_match_m(int argc, VALUE *argv, VALUE str)
{
VALUE re, result;
if (argc < 1)
rb_check_arity(argc, 1, 2);
re = argv[0];
argv[0] = str;
result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv);
if (!NIL_P(result) && rb_block_given_p()) {
return rb_yield(result);
}
return result;
}
|
#match?(pattern, offset = 0) ⇒ Boolean
Returns true
or false
based on whether a match is found for self
and pattern
.
Note: does not update Regexp@Global+Variables.
Computes regexp
by converting pattern
(if not already a Regexp).
regexp = Regexp.new(pattern)
Returns true
if self+.match(regexp)
returns a MatchData object, false
otherwise:
'foo'.match?(/o/) # => true
'foo'.match?('o') # => true
'foo'.match?(/x/) # => false
If Integer argument offset
is given, the search begins at index offset
:
'foo'.match?('f', 1) # => false
'foo'.match?('o', 1) # => true
5015 5016 5017 5018 5019 5020 5021 5022 |
# File 'string.c', line 5015
static VALUE
rb_str_match_m_p(int argc, VALUE *argv, VALUE str)
{
VALUE re;
rb_check_arity(argc, 1, 2);
re = get_pat(argv[0]);
return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);
}
|
#succ ⇒ String
Returns the successor to self
. The successor is calculated by incrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:
'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'
The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:
'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"
The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:
'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"
The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:
s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"
The successor to an empty String
is a new empty String
:
''.succ # => ""
5267 5268 5269 5270 5271 5272 5273 5274 |
# File 'string.c', line 5267
VALUE
rb_str_succ(VALUE orig)
{
VALUE str;
str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
rb_enc_cr_str_copy_for_substr(str, orig);
return str_succ(str);
}
|
#succ! ⇒ self
Equivalent to String#succ, but modifies self
in place; returns self
.
5371 5372 5373 5374 5375 5376 5377 |
# File 'string.c', line 5371
static VALUE
rb_str_succ_bang(VALUE str)
{
rb_str_modify(str);
str_succ(str);
return str;
}
|
#oct ⇒ Integer
Interprets the leading substring of self
as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:
'123'.oct # => 83
'-377'.oct # => -255
'0377non-numeric'.oct # => 255
'non-numeric'.oct # => 0
If self
starts with 0
, radix indicators are honored; see Kernel#Integer.
Related: String#hex.
10631 10632 10633 10634 10635 |
# File 'string.c', line 10631
static VALUE
rb_str_oct(VALUE str)
{
return rb_str_to_inum(str, -8, FALSE);
}
|
#ord ⇒ Integer
:include: doc/string/ord.rdoc
10776 10777 10778 10779 10780 10781 10782 10783 |
# File 'string.c', line 10776
static VALUE
rb_str_ord(VALUE s)
{
unsigned int c;
c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
return UINT2NUM(c);
}
|
#partition(string_or_regexp) ⇒ Array
:include: doc/string/partition.rdoc
10995 10996 10997 10998 10999 11000 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 11013 11014 11015 11016 11017 11018 11019 11020 11021 11022 |
# File 'string.c', line 10995
static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
long pos;
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
if (rb_reg_search(sep, str, 0, 0) < 0) {
goto failed;
}
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
pos = rb_str_index(str, sep, 0);
if (pos < 0) goto failed;
}
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
failed:
return rb_ary_new3(3, str_duplicate(rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
}
|
#prepend(*other_strings) ⇒ String
Prepends each string in other_strings
to self
and returns self
:
s = 'foo'
s.prepend('bar', 'baz') # => "barbazfoo"
s # => "barbazfoo"
Related: String#concat.
4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 |
# File 'string.c', line 4009
static VALUE
rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
{
str_modifiable(str);
if (argc == 1) {
rb_str_update(str, 0L, 0L, argv[0]);
}
else if (argc > 1) {
int i;
VALUE arg_str = rb_str_tmp_new(0);
rb_enc_copy(arg_str, str);
for (i = 0; i < argc; i++) {
rb_str_append(arg_str, argv[i]);
}
rb_str_update(str, 0L, 0L, arg_str);
}
return str;
}
|
#replace(other_string) ⇒ self
Replaces the contents of self
with the contents of other_string
:
s = 'foo' # => "foo"
s.replace('bar') # => "bar"
6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 |
# File 'string.c', line 6475
VALUE
rb_str_replace(VALUE str, VALUE str2)
{
str_modifiable(str);
if (str == str2) return str;
StringValue(str2);
str_discard(str);
return str_replace(str, str2);
}
|
#reverse ⇒ String
Returns a new string with the characters from self
in reverse order.
'stressed'.reverse # => "desserts"
6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 |
# File 'string.c', line 6864
static VALUE
rb_str_reverse(VALUE str)
{
rb_encoding *enc;
VALUE rev;
char *s, *e, *p;
int cr;
if (RSTRING_LEN(str) <= 1) return str_duplicate(rb_cString, str);
enc = STR_ENC_GET(str);
rev = rb_str_new(0, RSTRING_LEN(str));
s = RSTRING_PTR(str); e = RSTRING_END(str);
p = RSTRING_END(rev);
cr = ENC_CODERANGE(str);
if (RSTRING_LEN(str) > 1) {
if (single_byte_optimizable(str)) {
while (s < e) {
*--p = *s++;
}
}
else if (cr == ENC_CODERANGE_VALID) {
while (s < e) {
int clen = rb_enc_fast_mbclen(s, e, enc);
p -= clen;
memcpy(p, s, clen);
s += clen;
}
}
else {
cr = rb_enc_asciicompat(enc) ?
ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (s < e) {
int clen = rb_enc_mbclen(s, e, enc);
if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
p -= clen;
memcpy(p, s, clen);
s += clen;
}
}
}
STR_SET_LEN(rev, RSTRING_LEN(str));
str_enc_copy_direct(rev, str);
ENC_CODERANGE_SET(rev, cr);
return rev;
}
|
#reverse! ⇒ self
Returns self
with its characters reversed:
s = 'stressed'
s.reverse! # => "desserts"
s # => "desserts"
6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 |
# File 'string.c', line 6927
static VALUE
rb_str_reverse_bang(VALUE str)
{
if (RSTRING_LEN(str) > 1) {
if (single_byte_optimizable(str)) {
char *s, *e, c;
str_modify_keep_cr(str);
s = RSTRING_PTR(str);
e = RSTRING_END(str) - 1;
while (s < e) {
c = *s;
*s++ = *e;
*e-- = c;
}
}
else {
str_shared_replace(str, rb_str_reverse(str));
}
}
else {
str_modify_keep_cr(str);
}
return str;
}
|
#rindex(substring, offset = self.length) ⇒ Integer? #rindex(regexp, offset = self.length) ⇒ Integer?
Returns the Integer index of the last occurrence of the given substring
, or nil
if none found:
'foo'.rindex('f') # => 0
'foo'.rindex('o') # => 2
'foo'.rindex('oo') # => 1
'foo'.rindex('ooo') # => nil
Returns the Integer index of the last match for the given Regexp regexp
, or nil
if none found:
'foo'.rindex(/f/) # => 0
'foo'.rindex(/o/) # => 2
'foo'.rindex(/oo/) # => 1
'foo'.rindex(/ooo/) # => nil
The last match means starting at the possible last position, not the last of longest matches.
'foo'.rindex(/o+/) # => 2
$~ #=> #<MatchData "o">
To get the last longest match, needs to combine with negative lookbehind.
'foo'.rindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">
Or String#index with negative lookforward.
'foo'.index(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">
Integer argument offset
, if given and non-negative, specifies the maximum starting position in the string to end the search:
'foo'.rindex('o', 0) # => nil
'foo'.rindex('o', 1) # => 1
'foo'.rindex('o', 2) # => 2
'foo'.rindex('o', 3) # => 2
If offset
is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset
:
'foo'.rindex('o', -1) # => 2
'foo'.rindex('o', -2) # => 1
'foo'.rindex('o', -3) # => nil
'foo'.rindex('o', -4) # => nil
Related: String#index.
4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 |
# File 'string.c', line 4719
static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
rb_encoding *enc = STR_ENC_GET(str);
long pos, len = str_strlen(str, enc); /* str's enc */
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
pos = NUM2LONG(initpos);
if (pos < 0 && (pos += len) < 0) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
if (pos > len) pos = len;
}
else {
pos = len;
}
if (RB_TYPE_P(sub, T_REGEXP)) {
/* enc = rb_enc_check(str, sub); */
pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
enc, single_byte_optimizable(str));
if (rb_reg_search(sub, str, pos, 1) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = rb_str_sublen(str, BEG(0));
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_rindex(str, sub, pos);
if (pos >= 0) {
pos = rb_str_sublen(str, pos);
return LONG2NUM(pos);
}
}
return Qnil;
}
|
#rjust(size, pad_string = ' ') ⇒ Object
:include: doc/string/rjust.rdoc
Related: String#ljust, String#center.
10964 10965 10966 10967 10968 |
# File 'string.c', line 10964
static VALUE
rb_str_rjust(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'r');
}
|
#rpartition(sep) ⇒ Array
:include: doc/string/rpartition.rdoc
11032 11033 11034 11035 11036 11037 11038 11039 11040 11041 11042 11043 11044 11045 11046 11047 11048 11049 11050 11051 11052 11053 11054 11055 11056 11057 11058 11059 11060 11061 11062 |
# File 'string.c', line 11032
static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
long pos = RSTRING_LEN(str);
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
if (rb_reg_search(sep, str, pos, 1) < 0) {
goto failed;
}
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
pos = rb_str_sublen(str, pos);
pos = rb_str_rindex(str, sep, pos);
if (pos < 0) {
goto failed;
}
}
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
failed:
return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(rb_cString, str));
}
|
#rstrip ⇒ Object
Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.rstrip # => "\u0000\t\n\v\f\r abc"
Related: String#lstrip, String#strip.
10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 |
# File 'string.c', line 10373
static VALUE
rb_str_rstrip(VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
roffset = rstrip_offset(str, start, start+olen, enc);
if (roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, olen-roffset);
}
|
#rstrip! ⇒ self?
Like String#rstrip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#lstrip!, String#strip!.
10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 |
# File 'string.c', line 10336
static VALUE
rb_str_rstrip_bang(VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
roffset = rstrip_offset(str, start, start+olen, enc);
if (roffset > 0) {
long len = olen - roffset;
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#scan(string_or_regexp) ⇒ Array #scan(string_or_regexp) {|matches| ... } ⇒ self
Matches a pattern against self
; the pattern is:
-
string_or_regexp
itself, if it is a Regexp. -
Regexp.quote(string_or_regexp)
, ifstring_or_regexp
is a string.
Iterates through self
, generating a collection of matching results:
-
If the pattern contains no groups, each result is the matched string,
$&
. -
If the pattern contains groups, each result is an array containing one entry per group.
With no block given, returns an array of the results:
s = 'cruel world'
s.scan(/\w+/) # => ["cruel", "world"]
s.scan(/.../) # => ["cru", "el ", "wor"]
s.scan(/(...)/) # => [["cru"], ["el "], ["wor"]]
s.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]]
With a block given, calls the block with each result; returns self
:
s.scan(/\w+/) {|w| print "<<#{w}>> " }
print "\n"
s.scan(/(.)(.)/) {|x,y| print y, x }
print "\n"
Output:
<<cruel>> <<world>>
rceu lowlr
10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 |
# File 'string.c', line 10552
static VALUE
rb_str_scan(VALUE str, VALUE pat)
{
VALUE result;
long start = 0;
long last = -1, prev = 0;
char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
pat = get_pat_quoted(pat, 1);
mustnot_broken(str);
if (!rb_block_given_p()) {
VALUE ary = rb_ary_new();
while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
last = prev;
prev = start;
rb_ary_push(ary, result);
}
if (last >= 0) rb_pat_search(pat, str, last, 1);
else rb_backref_set(Qnil);
return ary;
}
while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
last = prev;
prev = start;
rb_yield(result);
str_mod_check(str, p, len);
}
if (last >= 0) rb_pat_search(pat, str, last, 1);
return str;
}
|
#scrub(replacement_string = default_replacement) ⇒ Object #scrub {|bytes| ... } ⇒ Object
:include: doc/string/scrub.rdoc
11775 11776 11777 11778 11779 11780 11781 |
# File 'string.c', line 11775
static VALUE
str_scrub(int argc, VALUE *argv, VALUE str)
{
VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
VALUE new = rb_str_scrub(str, repl);
return NIL_P(new) ? str_duplicate(rb_cString, str): new;
}
|
#scrub! ⇒ self #scrub!(replacement_string = default_replacement) ⇒ self #scrub! {|bytes| ... } ⇒ self
Like String#scrub, except that any replacements are made in self
.
11792 11793 11794 11795 11796 11797 11798 11799 |
# File 'string.c', line 11792
static VALUE
str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
VALUE new = rb_str_scrub(str, repl);
if (!NIL_P(new)) rb_str_replace(str, new);
return str;
}
|
#setbyte(index, integer) ⇒ Integer
Sets the byte at zero-based index
to integer
; returns integer
:
s = 'abcde' # => "abcde"
s.setbyte(0, 98) # => 98
s # => "bbcde"
Related: String#getbyte.
6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 |
# File 'string.c', line 6566
VALUE
rb_str_setbyte(VALUE str, VALUE index, VALUE value)
{
long pos = NUM2LONG(index);
long len = RSTRING_LEN(str);
char *ptr, *head, *left = 0;
rb_encoding *enc;
int cr = ENC_CODERANGE_UNKNOWN, width, nlen;
if (pos < -len || len <= pos)
rb_raise(rb_eIndexError, "index %ld out of string", pos);
if (pos < 0)
pos += len;
VALUE v = rb_to_int(value);
VALUE w = rb_int_and(v, INT2FIX(0xff));
char byte = (char)(NUM2INT(w) & 0xFF);
if (!str_independent(str))
str_make_independent(str);
enc = STR_ENC_GET(str);
head = RSTRING_PTR(str);
ptr = &head[pos];
if (!STR_EMBED_P(str)) {
cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
left = ptr;
*ptr = byte;
if (ISASCII(byte)) goto end;
nlen = rb_enc_precise_mbclen(left, head+len, enc);
if (!MBCLEN_CHARFOUND_P(nlen))
ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
else
ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
goto end;
case ENC_CODERANGE_VALID:
left = rb_enc_left_char_head(head, ptr, head+len, enc);
width = rb_enc_precise_mbclen(left, head+len, enc);
*ptr = byte;
nlen = rb_enc_precise_mbclen(left, head+len, enc);
if (!MBCLEN_CHARFOUND_P(nlen))
ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte))
ENC_CODERANGE_CLEAR(str);
goto end;
}
}
ENC_CODERANGE_CLEAR(str);
*ptr = byte;
end:
return value;
}
|
#length ⇒ Integer
:include: doc/string/length.rdoc
2351 2352 2353 2354 2355 |
# File 'string.c', line 2351
VALUE
rb_str_length(VALUE str)
{
return LONG2NUM(str_strlen(str, NULL));
}
|
#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil
Returns the substring of self
specified by the arguments. See examples at String Slices.
5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 |
# File 'string.c', line 5680
static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
return rb_str_subpat(str, argv[0], argv[1]);
}
else {
return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
}
}
rb_check_arity(argc, 1, 2);
return rb_str_aref(str, argv[0]);
}
|
#slice!(index) ⇒ nil #slice!(start, length) ⇒ nil #slice!(range) ⇒ nil #slice!(regexp, capture = 0) ⇒ nil #slice!(substring) ⇒ nil
Removes and returns the substring of self
specified by the arguments. See String Slices.
A few examples:
string = "This is a string"
string.slice!(2) #=> "i"
string.slice!(3..6) #=> " is "
string.slice!(/s.*t/) #=> "sa st"
string.slice!("r") #=> "r"
string #=> "Thing"
5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 |
# File 'string.c', line 5988
static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
VALUE result = Qnil;
VALUE indx;
long beg, len = 1;
char *p;
rb_check_arity(argc, 1, 2);
str_modify_keep_cr(str);
indx = argv[0];
if (RB_TYPE_P(indx, T_REGEXP)) {
if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
int nth = 0;
if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
if ((nth += regs->num_regs) <= 0) return Qnil;
}
else if (nth >= regs->num_regs) return Qnil;
beg = BEG(nth);
len = END(nth) - beg;
goto subseq;
}
else if (argc == 2) {
beg = NUM2LONG(indx);
len = NUM2LONG(argv[1]);
goto num_index;
}
else if (FIXNUM_P(indx)) {
beg = FIX2LONG(indx);
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
if (!len) return Qnil;
beg = p - RSTRING_PTR(str);
goto subseq;
}
else if (RB_TYPE_P(indx, T_STRING)) {
beg = rb_str_index(str, indx, 0);
if (beg == -1) return Qnil;
len = RSTRING_LEN(indx);
result = str_duplicate(rb_cString, indx);
goto squash;
}
else {
switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) {
case Qnil:
return Qnil;
case Qfalse:
beg = NUM2LONG(indx);
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
if (!len) return Qnil;
beg = p - RSTRING_PTR(str);
goto subseq;
default:
goto num_index;
}
}
num_index:
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
beg = p - RSTRING_PTR(str);
subseq:
result = rb_str_new(RSTRING_PTR(str)+beg, len);
rb_enc_cr_str_copy_for_substr(result, str);
squash:
if (len > 0) {
if (beg == 0) {
rb_str_drop_bytes(str, len);
}
else {
char *sptr = RSTRING_PTR(str);
long slen = RSTRING_LEN(str);
if (beg + len > slen) /* pathological check */
len = slen - beg;
memmove(sptr + beg,
sptr + beg + len,
slen - (beg + len));
slen -= len;
STR_SET_LEN(str, slen);
TERM_FILL(&sptr[slen], TERM_LEN(str));
}
}
return result;
}
|
#split(field_sep = $;, limit = 0) ⇒ Array #split(field_sep = $;, limit = 0) {|substring| ... } ⇒ self
:include: doc/string/split.rdoc
9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 |
# File 'string.c', line 9181
static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
VALUE spat;
VALUE limit;
split_type_t split_type;
long beg, end, i = 0, empty_count = -1;
int lim = 0;
VALUE result, tmp;
result = rb_block_given_p() ? Qfalse : Qnil;
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
lim = NUM2INT(limit);
if (lim <= 0) limit = Qnil;
else if (lim == 1) {
if (RSTRING_LEN(str) == 0)
return result ? rb_ary_new2(0) : str;
tmp = str_duplicate(rb_cString, str);
if (!result) {
rb_yield(tmp);
return str;
}
return rb_ary_new3(1, tmp);
}
i = 1;
}
if (NIL_P(limit) && !lim) empty_count = 0;
enc = STR_ENC_GET(str);
split_type = SPLIT_TYPE_REGEXP;
if (!NIL_P(spat)) {
spat = get_pat_quoted(spat, 0);
}
else if (NIL_P(spat = rb_fs)) {
split_type = SPLIT_TYPE_AWK;
}
else if (!(spat = rb_fs_check(spat))) {
rb_raise(rb_eTypeError, "value of $; must be String or Regexp");
}
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "$; is set to non-nil value");
}
if (split_type != SPLIT_TYPE_AWK) {
switch (BUILTIN_TYPE(spat)) {
case T_REGEXP:
rb_reg_options(spat); /* check if uninitialized */
tmp = RREGEXP_SRC(spat);
split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
if (split_type == SPLIT_TYPE_AWK) {
spat = tmp;
split_type = SPLIT_TYPE_STRING;
}
break;
case T_STRING:
mustnot_broken(spat);
split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
break;
default:
UNREACHABLE_RETURN(Qnil);
}
}
#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
beg = 0;
char *ptr = RSTRING_PTR(str);
char *eptr = RSTRING_END(str);
if (split_type == SPLIT_TYPE_AWK) {
char *bptr = ptr;
int skip = 1;
unsigned int c;
if (result) result = rb_ary_new();
end = beg;
if (is_ascii_string(str)) {
while (ptr < eptr) {
c = (unsigned char)*ptr++;
if (skip) {
if (ascii_isspace(c)) {
beg = ptr - bptr;
}
else {
end = ptr - bptr;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else if (ascii_isspace(c)) {
SPLIT_STR(beg, end-beg);
skip = 1;
beg = ptr - bptr;
if (!NIL_P(limit)) ++i;
}
else {
end = ptr - bptr;
}
}
}
else {
while (ptr < eptr) {
int n;
c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
ptr += n;
if (skip) {
if (rb_isspace(c)) {
beg = ptr - bptr;
}
else {
end = ptr - bptr;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else if (rb_isspace(c)) {
SPLIT_STR(beg, end-beg);
skip = 1;
beg = ptr - bptr;
if (!NIL_P(limit)) ++i;
}
else {
end = ptr - bptr;
}
}
}
}
else if (split_type == SPLIT_TYPE_STRING) {
char *str_start = ptr;
char *substr_start = ptr;
char *sptr = RSTRING_PTR(spat);
long slen = RSTRING_LEN(spat);
if (result) result = rb_ary_new();
mustnot_broken(str);
enc = rb_enc_check(str, spat);
while (ptr < eptr &&
(end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
/* Check we are at the start of a char */
char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
if (t != ptr + end) {
ptr = t;
continue;
}
SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
ptr += end + slen;
substr_start = ptr;
if (!NIL_P(limit) && lim <= ++i) break;
}
beg = ptr - str_start;
}
else if (split_type == SPLIT_TYPE_CHARS) {
char *str_start = ptr;
int n;
if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
mustnot_broken(str);
enc = rb_enc_get(str);
while (ptr < eptr &&
(n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
SPLIT_STR(ptr - str_start, n);
ptr += n;
if (!NIL_P(limit) && lim <= ++i) break;
}
beg = ptr - str_start;
}
else {
if (result) result = rb_ary_new();
long len = RSTRING_LEN(str);
long start = beg;
long idx;
int last_null = 0;
struct re_registers *regs;
VALUE match = 0;
for (; rb_reg_search(spat, str, start, 0) >= 0;
(match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
match = rb_backref_get();
if (!result) rb_match_busy(match);
regs = RMATCH_REGS(match);
end = BEG(0);
if (start == end && BEG(0) == END(0)) {
if (!ptr) {
SPLIT_STR(0, 0);
break;
}
else if (last_null == 1) {
SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
beg = start;
}
else {
if (start == len)
start++;
else
start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
last_null = 1;
continue;
}
}
else {
SPLIT_STR(beg, end-beg);
beg = start = END(0);
}
last_null = 0;
for (idx=1; idx < regs->num_regs; idx++) {
if (BEG(idx) == -1) continue;
SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
}
if (!NIL_P(limit) && lim <= ++i) break;
}
if (match) rb_match_unbusy(match);
}
if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
SPLIT_STR(beg, RSTRING_LEN(str)-beg);
}
return result ? result : str;
}
|
#squeeze(*selectors) ⇒ Object
Returns a copy of self
with characters specified by selectors
“squeezed” (see Multiple Character Selectors):
“Squeezed” means that each multiple-character run of a selected character is squeezed down to a single character; with no arguments given, squeezes all characters:
"yellow moon".squeeze #=> "yelow mon"
" now is the".squeeze(" ") #=> " now is the"
"putters shoot balls".squeeze("m-z") #=> "puters shot balls"
8941 8942 8943 8944 8945 8946 8947 |
# File 'string.c', line 8941
static VALUE
rb_str_squeeze(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_squeeze_bang(argc, argv, str);
return str;
}
|
#squeeze!(*selectors) ⇒ self?
Like String#squeeze, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 |
# File 'string.c', line 8848
static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
char squeez[TR_TABLE_SIZE];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0;
unsigned char *s, *send, *t;
int i, modify = 0;
int ascompat, singlebyte = single_byte_optimizable(str);
unsigned int save;
if (argc == 0) {
enc = STR_ENC_GET(str);
}
else {
for (i=0; i<argc; i++) {
VALUE s = argv[i];
StringValue(s);
enc = rb_enc_check(str, s);
if (singlebyte && !single_byte_optimizable(s))
singlebyte = 0;
tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
}
}
str_modify_keep_cr(str);
s = t = (unsigned char *)RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
send = (unsigned char *)RSTRING_END(str);
save = -1;
ascompat = rb_enc_asciicompat(enc);
if (singlebyte) {
while (s < send) {
unsigned int c = *s++;
if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
}
}
else {
while (s < send) {
unsigned int c;
int clen;
if (ascompat && (c = *s) < 0x80) {
if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
s++;
}
else {
c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
if (t != s) rb_enc_mbcput(c, t, enc);
save = c;
t += clen;
}
s += clen;
}
}
}
TERM_FILL((char *)t, TERM_LEN(str));
if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
modify = 1;
}
if (modify) return str;
return Qnil;
}
|
#start_with?(*string_or_regexp) ⇒ Boolean
:include: doc/string/start_with_p.rdoc
11072 11073 11074 11075 11076 11077 11078 11079 11080 11081 11082 11083 11084 11085 11086 11087 11088 11089 11090 11091 11092 11093 11094 11095 11096 11097 11098 11099 11100 11101 11102 |
# File 'string.c', line 11072
static VALUE
rb_str_start_with(int argc, VALUE *argv, VALUE str)
{
int i;
for (i=0; i<argc; i++) {
VALUE tmp = argv[i];
if (RB_TYPE_P(tmp, T_REGEXP)) {
if (rb_reg_start_with_p(tmp, str))
return Qtrue;
}
else {
const char *p, *s, *e;
long slen, tlen;
rb_encoding *enc;
StringValue(tmp);
enc = rb_enc_check(str, tmp);
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
if ((slen = RSTRING_LEN(str)) < tlen) continue;
p = RSTRING_PTR(str);
e = p + slen;
s = p + tlen;
if (!at_char_right_boundary(p, s, e, enc))
continue;
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
return Qtrue;
}
}
return Qfalse;
}
|
#strip ⇒ Object
Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.strip # => "abc"
Related: String#lstrip, String#rstrip.
10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 |
# File 'string.c', line 10441
static VALUE
rb_str_strip(VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, olen-loffset-roffset);
}
|
#strip! ⇒ self?
Like String#strip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#lstrip!, String#strip!.
10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 |
# File 'string.c', line 10399
static VALUE
rb_str_strip_bang(VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
if (loffset > 0 || roffset > 0) {
long len = olen-roffset;
if (loffset > 0) {
len -= loffset;
memmove(start, start + loffset, len);
}
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#sub(pattern, replacement) ⇒ Object #sub(pattern) {|match| ... } ⇒ Object
Returns a copy of self
with only the first occurrence (not all occurrences) of the given pattern
replaced.
See Substitution Methods.
Related: String#sub!, String#gsub, String#gsub!.
6284 6285 6286 6287 6288 6289 6290 |
# File 'string.c', line 6284
static VALUE
rb_str_sub(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_sub_bang(argc, argv, str);
return str;
}
|
#sub!(pattern, replacement) ⇒ self? #sub!(pattern) {|match| ... } ⇒ self?
Replaces the first occurrence (not all occurrences) of the given pattern
on self
; returns self
if a replacement occurred, nil
otherwise.
See Substitution Methods.
Related: String#sub, String#gsub, String#gsub!.
6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 |
# File 'string.c', line 6159
static VALUE
rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
{
VALUE pat, repl, hash = Qnil;
int iter = 0;
long plen;
int min_arity = rb_block_given_p() ? 1 : 2;
long beg;
rb_check_arity(argc, min_arity, 2);
if (argc == 1) {
iter = 1;
}
else {
repl = argv[1];
hash = rb_check_hash_type(argv[1]);
if (NIL_P(hash)) {
StringValue(repl);
}
}
pat = get_pat_quoted(argv[0], 1);
str_modifiable(str);
beg = rb_pat_search(pat, str, 0, 1);
if (beg >= 0) {
rb_encoding *enc;
int cr = ENC_CODERANGE(str);
long beg0, end0;
VALUE match, match0 = Qnil;
struct re_registers *regs;
char *p, *rp;
long len, rlen;
match = rb_backref_get();
regs = RMATCH_REGS(match);
if (RB_TYPE_P(pat, T_STRING)) {
beg0 = beg;
end0 = beg0 + RSTRING_LEN(pat);
match0 = pat;
}
else {
beg0 = BEG(0);
end0 = END(0);
if (iter) match0 = rb_reg_nth_match(0, match);
}
if (iter || !NIL_P(hash)) {
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (iter) {
repl = rb_obj_as_string(rb_yield(match0));
}
else {
repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
repl = rb_obj_as_string(repl);
}
str_mod_check(str, p, len);
rb_check_frozen(str);
}
else {
repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat);
}
enc = rb_enc_compatible(str, repl);
if (!enc) {
rb_encoding *str_enc = STR_ENC_GET(str);
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_inspect_name(str_enc),
rb_enc_inspect_name(STR_ENC_GET(repl)));
}
enc = STR_ENC_GET(repl);
}
rb_str_modify(str);
rb_enc_associate(str, enc);
if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
int cr2 = ENC_CODERANGE(repl);
if (cr2 == ENC_CODERANGE_BROKEN ||
(cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
cr = ENC_CODERANGE_UNKNOWN;
else
cr = cr2;
}
plen = end0 - beg0;
rlen = RSTRING_LEN(repl);
len = RSTRING_LEN(str);
if (rlen > plen) {
RESIZE_CAPA(str, len + rlen - plen);
}
p = RSTRING_PTR(str);
if (rlen != plen) {
memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
}
rp = RSTRING_PTR(repl);
memmove(p + beg0, rp, rlen);
len += rlen - plen;
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
ENC_CODERANGE_SET(str, cr);
RB_GC_GUARD(match);
return str;
}
return Qnil;
}
|
#succ ⇒ String
Returns the successor to self
. The successor is calculated by incrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:
'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'
The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:
'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"
The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:
'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"
The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:
s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"
The successor to an empty String
is a new empty String
:
''.succ # => ""
5267 5268 5269 5270 5271 5272 5273 5274 |
# File 'string.c', line 5267
VALUE
rb_str_succ(VALUE orig)
{
VALUE str;
str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
rb_enc_cr_str_copy_for_substr(str, orig);
return str_succ(str);
}
|
#succ! ⇒ self
Equivalent to String#succ, but modifies self
in place; returns self
.
5371 5372 5373 5374 5375 5376 5377 |
# File 'string.c', line 5371
static VALUE
rb_str_succ_bang(VALUE str)
{
rb_str_modify(str);
str_succ(str);
return str;
}
|
#sum(n = 16) ⇒ Integer
:include: doc/string/sum.rdoc
10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802 10803 10804 10805 10806 10807 10808 10809 10810 10811 10812 10813 10814 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826 10827 10828 10829 10830 10831 10832 10833 10834 10835 10836 10837 10838 10839 10840 10841 10842 10843 |
# File 'string.c', line 10792
static VALUE
rb_str_sum(int argc, VALUE *argv, VALUE str)
{
int bits = 16;
char *ptr, *p, *pend;
long len;
VALUE sum = INT2FIX(0);
unsigned long sum0 = 0;
if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) {
bits = 0;
}
ptr = p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
pend = p + len;
while (p < pend) {
if (FIXNUM_MAX - UCHAR_MAX < sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
str_mod_check(str, ptr, len);
sum0 = 0;
}
sum0 += (unsigned char)*p;
p++;
}
if (bits == 0) {
if (sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
}
}
else {
if (sum == INT2FIX(0)) {
if (bits < (int)sizeof(long)*CHAR_BIT) {
sum0 &= (((unsigned long)1)<<bits)-1;
}
sum = LONG2FIX(sum0);
}
else {
VALUE mod;
if (sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
}
mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits));
mod = rb_funcall(mod, '-', 1, INT2FIX(1));
sum = rb_funcall(sum, '&', 1, mod);
}
}
return sum;
}
|
#swapcase(*options) ⇒ String
Returns a string containing the characters in self
, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:
s = 'Hello World!' # => "Hello World!"
s.swapcase # => "hELLO wORLD!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#swapcase!.
8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 |
# File 'string.c', line 8247
static VALUE
rb_str_swapcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str_duplicate(rb_cString, str);
if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#swapcase!(*options) ⇒ self?
Upcases each lowercase character in self
; downcases uppercase character; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.swapcase! # => "hELLO wORLD!"
s # => "hELLO wORLD!"
''.swapcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#swapcase.
8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 |
# File 'string.c', line 8210
static VALUE
rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#to_c ⇒ Object
Returns self
interpreted as a Complex object; leading whitespace and trailing garbage are ignored:
'9'.to_c # => (9+0i)
'2.5'.to_c # => (2.5+0i)
'2.5/1'.to_c # => ((5/2)+0i)
'-3/2'.to_c # => ((-3/2)+0i)
'-i'.to_c # => (0-1i)
'45i'.to_c # => (0+45i)
'3-4i'.to_c # => (3-4i)
'-4e2-4e-2i'.to_c # => (-400.0-0.04i)
'-0.0-0.0i'.to_c # => (-0.0-0.0i)
'1/2+3/4i'.to_c # => ((1/2)+(3/4)*i)
'1.0@0'.to_c # => (1+0.0i)
"1.0@#{Math::PI/2}".to_c # => (0.0+1i)
"1.0@#{Math::PI}".to_c # => (-1+0.0i)
Returns Complex zero if the string cannot be converted:
'ruby'.to_c # => (0+0i)
See Kernel#Complex.
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 |
# File 'complex.c', line 2269
static VALUE
string_to_c(VALUE self)
{
VALUE num;
rb_must_asciicompat(self);
(void)parse_comp(rb_str_fill_terminator(self, 1), FALSE, &num);
return num;
}
|
#to_f ⇒ Float
Returns the result of interpreting leading characters in self
as a Float:
'3.14159'.to_f # => 3.14159
'1.234e-2'.to_f # => 0.01234
Characters past a leading valid number (in the given base
) are ignored:
'3.14 (pi to two places)'.to_f # => 3.14
Returns zero if there is no leading valid number:
'abcdef'.to_f # => 0.0
7042 7043 7044 7045 7046 |
# File 'string.c', line 7042
static VALUE
rb_str_to_f(VALUE str)
{
return DBL2NUM(rb_str_to_dbl(str, FALSE));
}
|
#to_i(base = 10) ⇒ Integer
Returns the result of interpreting leading characters in self
as an integer in the given base
(which must be in (0, 2..36)):
'123456'.to_i # => 123456
'123def'.to_i(16) # => 1195503
With base
zero, string object
may contain leading characters to specify the actual base:
'123def'.to_i(0) # => 123
'0123def'.to_i(0) # => 83
'0b123def'.to_i(0) # => 1
'0o123def'.to_i(0) # => 83
'0d123def'.to_i(0) # => 123
'0x123def'.to_i(0) # => 1195503
Characters past a leading valid number (in the given base
) are ignored:
'12.345'.to_i # => 12
'12345'.to_i(2) # => 1
Returns zero if there is no leading valid number:
'abcdef'.to_i # => 0
'2'.to_i(2) # => 0
7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 |
# File 'string.c', line 7011
static VALUE
rb_str_to_i(int argc, VALUE *argv, VALUE str)
{
int base = 10;
if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) {
rb_raise(rb_eArgError, "invalid radix %d", base);
}
return rb_str_to_inum(str, base, FALSE);
}
|
#to_r ⇒ Object
Returns the result of interpreting leading characters in str
as a rational. Leading whitespace and extraneous characters past the end of a valid number are ignored. Digit sequences can be separated by an underscore. If there is not a valid number at the start of str
, zero is returned. This method never raises an exception.
' 2 '.to_r #=> (2/1)
'300/2'.to_r #=> (150/1)
'-9.2'.to_r #=> (-46/5)
'-9.2e2'.to_r #=> (-920/1)
'1_234_567'.to_r #=> (1234567/1)
'21 June 09'.to_r #=> (21/1)
'21/06/09'.to_r #=> (7/2)
'BWV 1079'.to_r #=> (0/1)
NOTE: “0.3”.to_r isn’t the same as 0.3.to_r. The former is equivalent to “3/10”.to_r, but the latter isn’t so.
"0.3".to_r == 3/10r #=> true
0.3.to_r == 3/10r #=> false
See also Kernel#Rational.
2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 |
# File 'rational.c', line 2529
static VALUE
string_to_r(VALUE self)
{
VALUE num;
rb_must_asciicompat(self);
num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE);
if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num))
rb_raise(rb_eFloatDomainError, "Infinity");
return num;
}
|
#to_s ⇒ self, String
Returns self
if self
is a String
, or self
converted to a String
if self
is a subclass of String
.
7057 7058 7059 7060 7061 7062 7063 7064 |
# File 'string.c', line 7057
static VALUE
rb_str_to_s(VALUE str)
{
if (rb_obj_class(str) != rb_cString) {
return str_duplicate(rb_cString, str);
}
return str;
}
|
#to_s ⇒ self, String
Returns self
if self
is a String
, or self
converted to a String
if self
is a subclass of String
.
7057 7058 7059 7060 7061 7062 7063 7064 |
# File 'string.c', line 7057
static VALUE
rb_str_to_s(VALUE str)
{
if (rb_obj_class(str) != rb_cString) {
return str_duplicate(rb_cString, str);
}
return str;
}
|
#intern ⇒ Object #to_sym ⇒ Object
Returns the Symbol
corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.
"Koala".intern #=> :Koala
s = 'cat'.to_sym #=> :cat
s == :cat #=> true
s = '@cat'.to_sym #=> :@cat
s == :@cat #=> true
This can also be used to create symbols that cannot be represented using the :xxx
notation.
'cat and dog'.to_sym #=> :"cat and dog"
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 |
# File 'symbol.c', line 877
VALUE
rb_str_intern(VALUE str)
{
VALUE sym;
GLOBAL_SYMBOLS_ENTER(symbols);
{
sym = lookup_str_sym_with_lock(symbols, str);
if (sym) {
// ok
}
else if (USE_SYMBOL_GC) {
rb_encoding *enc = rb_enc_get(str);
rb_encoding *ascii = rb_usascii_encoding();
if (enc != ascii && sym_check_asciionly(str, false)) {
str = rb_str_dup(str);
rb_enc_associate(str, ascii);
OBJ_FREEZE(str);
enc = ascii;
}
else {
str = rb_str_dup(str);
OBJ_FREEZE(str);
}
str = rb_fstring(str);
int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
if (type < 0) type = ID_JUNK;
sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
}
else {
ID id = intern_str(str, 0);
sym = ID2SYM(id);
}
}
GLOBAL_SYMBOLS_LEAVE();
return sym;
}
|
#tr(selector, replacements) ⇒ Object
Returns a copy of self
with each character specified by string selector
translated to the corresponding character in string replacements
. The correspondence is positional:
-
Each occurrence of the first character specified by
selector
is translated to the first character inreplacements
. -
Each occurrence of the second character specified by
selector
is translated to the second character inreplacements
. -
And so on.
Example:
'hello'.tr('el', 'ip') #=> "hippo"
If replacements
is shorter than selector
, it is implicitly padded with its own last character:
'hello'.tr('aeiou', '-') # => "h-ll-"
'hello'.tr('aeiou', 'AA-') # => "hAll-"
Arguments selector
and replacements
must be valid character selectors (see Character Selectors), and may use any of its valid forms, including negation, ranges, and escaping:
# Negation.
'hello'.tr('^aeiou', '-') # => "-e--o"
# Ranges.
'ibm'.tr('b-z', 'a-z') # => "hal"
# Escapes.
'hel^lo'.tr('\^aeiou', '-') # => "h-l-l-" # Escaped leading caret.
'i-b-m'.tr('b\-z', 'a-z') # => "ibabm" # Escaped embedded hyphen.
'foo\\bar'.tr('ab\\', 'XYZ') # => "fooZYXr" # Escaped backslash.
8650 8651 8652 8653 8654 8655 8656 |
# File 'string.c', line 8650
static VALUE
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
str = str_duplicate(rb_cString, str);
tr_trans(str, src, repl, 0);
return str;
}
|
#tr!(selector, replacements) ⇒ self?
Like String#tr, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8604 8605 8606 8607 8608 |
# File 'string.c', line 8604
static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
return tr_trans(str, src, repl, 0);
}
|
#tr_s(selector, replacements) ⇒ String
Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).
'hello'.tr_s('l', 'r') #=> "hero"
'hello'.tr_s('el', '-') #=> "h-o"
'hello'.tr_s('el', 'hx') #=> "hhxo"
Related: String#squeeze.
8982 8983 8984 8985 8986 8987 8988 |
# File 'string.c', line 8982
static VALUE
rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
{
str = str_duplicate(rb_cString, str);
tr_trans(str, src, repl, 1);
return str;
}
|
#tr_s!(selector, replacements) ⇒ self?
Like String#tr_s, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
Related: String#squeeze!.
8960 8961 8962 8963 8964 |
# File 'string.c', line 8960
static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
return tr_trans(str, src, repl, 1);
}
|
#undump ⇒ String
Returns an unescaped version of self
:
s_orig = "\f\x00\xff\\\"" # => "\f\u0000\xFF\\\""
s_dumped = s_orig.dump # => "\"\\f\\x00\\xFF\\\\\\\"\""
s_undumped = s_dumped.undump # => "\f\u0000\xFF\\\""
s_undumped == s_orig # => true
Related: String#dump (inverse of String#undump).
7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 |
# File 'string.c', line 7605
static VALUE
str_undump(VALUE str)
{
const char *s = RSTRING_PTR(str);
const char *s_end = RSTRING_END(str);
rb_encoding *enc = rb_enc_get(str);
VALUE undumped = rb_enc_str_new(s, 0L, enc);
bool utf8 = false;
bool binary = false;
int w;
rb_must_asciicompat(str);
if (rb_str_is_ascii_only_p(str) == Qfalse) {
rb_raise(rb_eRuntimeError, "non-ASCII character detected");
}
if (!str_null_check(str, &w)) {
rb_raise(rb_eRuntimeError, "string contains null byte");
}
if (RSTRING_LEN(str) < 2) goto invalid_format;
if (*s != '"') goto invalid_format;
/* strip '"' at the start */
s++;
for (;;) {
if (s >= s_end) {
rb_raise(rb_eRuntimeError, "unterminated dumped string");
}
if (*s == '"') {
/* epilogue */
s++;
if (s == s_end) {
/* ascii compatible dumped string */
break;
}
else {
static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
static const char dup_suffix[] = ".dup";
const char *encname;
int encidx;
ptrdiff_t size;
/* check separately for strings dumped by older versions */
size = sizeof(dup_suffix) - 1;
if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
size = sizeof(force_encoding_suffix) - 1;
if (s_end - s <= size) goto invalid_format;
if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
s += size;
if (utf8) {
rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
}
encname = s;
s = memchr(s, '"', s_end-s);
size = s - encname;
if (!s) goto invalid_format;
if (s_end - s != 2) goto invalid_format;
if (s[0] != '"' || s[1] != ')') goto invalid_format;
encidx = rb_enc_find_index2(encname, (long)size);
if (encidx < 0) {
rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
}
rb_enc_associate_index(undumped, encidx);
}
break;
}
if (*s == '\\') {
s++;
if (s >= s_end) {
rb_raise(rb_eRuntimeError, "invalid escape");
}
undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
}
else {
rb_str_cat(undumped, s++, 1);
}
}
RB_GC_GUARD(str);
return undumped;
invalid_format:
rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}
|
#unicode_normalize(form = :nfc) ⇒ String
Returns a copy of self
with Unicode normalization applied.
Argument form
must be one of the following symbols (see Unicode normalization forms):
-
:nfc
: Canonical decomposition, followed by canonical composition. -
:nfd
: Canonical decomposition. -
:nfkc
: Compatibility decomposition, followed by canonical composition. -
:nfkd
: Compatibility decomposition.
The encoding of self
must be one of:
-
Encoding::UTF_8
-
Encoding::UTF_16BE
-
Encoding::UTF_16LE
-
Encoding::UTF_32BE
-
Encoding::UTF_32LE
-
Encoding::GB18030
-
Encoding::UCS_2BE
-
Encoding::UCS_4BE
Examples:
"a\u0300".unicode_normalize # => "a"
"\u00E0".unicode_normalize(:nfd) # => "a "
Related: String#unicode_normalize!, String#unicode_normalized?.
11853 11854 11855 11856 11857 |
# File 'string.c', line 11853
static VALUE
rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
{
return unicode_normalize_common(argc, argv, str, id_normalize);
}
|
#unicode_normalize!(form = :nfc) ⇒ self
Like String#unicode_normalize, except that the normalization is performed on self
.
Related String#unicode_normalized?.
11869 11870 11871 11872 11873 |
# File 'string.c', line 11869
static VALUE
rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
{
return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
}
|
#unicode_normalized?(form = :nfc) ⇒ Boolean
Returns true
if self
is in the given form
of Unicode normalization, false
otherwise. The form
must be one of :nfc
, :nfd
, :nfkc
, or :nfkd
.
Examples:
"a\u0300".unicode_normalized? # => false
"a\u0300".unicode_normalized?(:nfd) # => true
"\u00E0".unicode_normalized? # => true
"\u00E0".unicode_normalized?(:nfd) # => false
Raises an exception if self
is not in a Unicode encoding:
s = "\xE0".force_encoding('ISO-8859-1')
s.unicode_normalized? # Raises Encoding::CompatibilityError.
Related: String#unicode_normalize, String#unicode_normalize!.
11898 11899 11900 11901 11902 |
# File 'string.c', line 11898
static VALUE
rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)
{
return unicode_normalize_common(argc, argv, str, id_normalized_p);
}
|
#upcase(*options) ⇒ String
Returns a string containing the upcased characters in self
:
s = 'Hello World!' # => "Hello World!"
s.upcase # => "HELLO WORLD!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#upcase!, String#downcase, String#downcase!.
7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 |
# File 'string.c', line 7983
static VALUE
rb_str_upcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
str_enc_copy_direct(ret, str);
upcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#upcase!(*options) ⇒ self?
Upcases the characters in self
; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.upcase! # => "HELLO WORLD!"
s # => "HELLO WORLD!"
s.upcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#upcase, String#downcase, String#downcase!.
7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 |
# File 'string.c', line 7944
static VALUE
rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
if (upcase_single(str))
flags |= ONIGENC_CASE_MODIFIED;
}
else if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#upto(other_string, exclusive = false) {|string| ... } ⇒ self #upto(other_string, exclusive = false) ⇒ Object
With a block given, calls the block with each String
value returned by successive calls to String#succ; the first value is self
, the next is self.succ
, and so on; the sequence terminates when value other_string
is reached; returns self
:
'a8'.upto('b6') {|s| print s, ' ' } # => "a8"
Output:
a8 a9 b0 b1 b2 b3 b4 b5 b6
If argument exclusive
is given as a truthy object, the last value is omitted:
'a8'.upto('b6', true) {|s| print s, ' ' } # => "a8"
Output:
a8 a9 b0 b1 b2 b3 b4 b5
If other_string
would not be reached, does not call the block:
'25'.upto('5') {|s| fail s }
'aa'.upto('a') {|s| fail s }
With no block given, returns a new Enumerator:
'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")>
5431 5432 5433 5434 5435 5436 5437 5438 5439 |
# File 'string.c', line 5431
static VALUE
rb_str_upto(int argc, VALUE *argv, VALUE beg)
{
VALUE end, exclusive;
rb_scan_args(argc, argv, "11", &end, &exclusive);
RETURN_ENUMERATOR(beg, argc, argv);
return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);
}
|
#valid_encoding? ⇒ Boolean
Returns true
if self
is encoded correctly, false
otherwise:
"\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
"\xc2".force_encoding("UTF-8").valid_encoding? # => false
"\x80".force_encoding("UTF-8").valid_encoding? # => false
11420 11421 11422 11423 11424 11425 11426 |
# File 'string.c', line 11420
static VALUE
rb_str_valid_encoding_p(VALUE str)
{
int cr = rb_enc_str_coderange(str);
return RBOOL(cr != ENC_CODERANGE_BROKEN);
}
|