Class: String
Direct Known Subclasses
Class Method Summary collapse
-
.new(*args) ⇒ Object
:nodoc:.
-
.try_convert(object) ⇒ Object?
If
object
is a String object, returnsobject
.
Instance Method Summary collapse
-
#%(object) ⇒ Object
Returns the result of formatting
object
into the format specificationself
(see Kernel#sprintf for formatting details):. -
#*(integer) ⇒ Object
Returns a new String containing
integer
copies ofself
:. -
#+(other_string) ⇒ Object
Returns a new String containing
other_string
concatenated toself
:. -
#+ ⇒ self
Returns
self
ifself
is not frozen. -
#-@ ⇒ Object
(also: #dedup)
Returns a frozen, possibly pre-existing copy of the string.
-
#<<(object) ⇒ String
Concatenates
object
toself
and returnsself
:. -
#<=>(other_string) ⇒ -1, ...
Compares
self
andother_string
, returning:. -
#==(str2) ⇒ Object
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#===(str2) ⇒ Object
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#=~(y) ⇒ Object
Returns the Integer index of the first substring that matches the given
regexp
, ornil
if no match found:. -
#[](*args) ⇒ Object
Returns the substring of
self
specified by the arguments. -
#[]=(*args) ⇒ Object
Replaces all, some, or none of the contents of
self
; returnsnew_string
. -
#ascii_only? ⇒ Boolean
Returns
true
ifself
contains only ASCII characters,false
otherwise:. -
#b ⇒ String
:include: doc/string/b.rdoc.
-
#byteindex(*args) ⇒ Object
Returns the Integer byte-based index of the first occurrence of the given
substring
, ornil
if none found:. -
#byterindex(*args) ⇒ Object
Returns the Integer byte-based index of the last occurrence of the given
substring
, ornil
if none found:. -
#bytes ⇒ Object
:include: doc/string/bytes.rdoc.
-
#bytesize ⇒ Integer
:include: doc/string/bytesize.rdoc.
-
#byteslice(*args) ⇒ Object
Returns a substring of
self
, ornil
if the substring cannot be constructed. -
#bytesplice(*args) ⇒ Object
Replaces some or all of the content of
self
withstr
, and returnsself
. -
#capitalize(*options) ⇒ String
Returns a string containing the characters in
self
; the first character is upcased; the remaining characters are downcased:. -
#capitalize!(*options) ⇒ self?
Upcases the first character in
self
; downcases the remaining characters; returnsself
if any changes were made,nil
otherwise:. -
#casecmp(other_string) ⇒ -1, ...
Compares
self.downcase
andother_string.downcase
; returns:. -
#casecmp?(other_string) ⇒ true, ...
Returns
true
ifself
andother_string
are equal after Unicode case folding, otherwisefalse
:. -
#center(size, pad_string = ' ') ⇒ Object
:include: doc/string/center.rdoc.
-
#chars ⇒ Object
:include: doc/string/chars.rdoc.
-
#chomp(line_sep = $/) ⇒ Object
:include: doc/string/chomp.rdoc.
-
#chomp!(line_sep = $/) ⇒ self?
Like String#chomp, but modifies
self
in place; returnsnil
if no modification made,self
otherwise. -
#chop ⇒ Object
:include: doc/string/chop.rdoc.
-
#chop! ⇒ self?
Like String#chop, but modifies
self
in place; returnsnil
ifself
is empty,self
otherwise. -
#chr ⇒ String
Returns a string containing the first character of
self
:. -
#clear ⇒ self
Removes the contents of
self
:. -
#codepoints ⇒ Object
:include: doc/string/codepoints.rdoc.
-
#concat(*objects) ⇒ String
Concatenates each object in
objects
toself
and returnsself
:. -
#count(*selectors) ⇒ Integer
Returns the total number of characters in
self
that are specified by the givenselectors
(see Multiple Character Selectors):. -
#crypt(salt_str) ⇒ Object
Returns the string generated by calling
crypt(3)
standard library function withstr
andsalt_str
, in this order, as its arguments. -
#delete(*selectors) ⇒ Object
Returns a copy of
self
with characters specified byselectors
removed (see Multiple Character Selectors):. -
#delete!(*selectors) ⇒ self?
Like String#delete, but modifies
self
in place. -
#delete_prefix(prefix) ⇒ Object
:include: doc/string/delete_prefix.rdoc.
-
#delete_prefix!(prefix) ⇒ self?
Like String#delete_prefix, except that
self
is modified in place. -
#delete_suffix(suffix) ⇒ Object
:include: doc/string/delete_suffix.rdoc.
-
#delete_suffix!(suffix) ⇒ self?
Like String#delete_suffix, except that
self
is modified in place. -
#downcase(*options) ⇒ String
Returns a string containing the downcased characters in
self
:. -
#downcase!(*options) ⇒ self?
Downcases the characters in
self
; returnsself
if any changes were made,nil
otherwise:. -
#dump ⇒ String
Returns a printable version of
self
, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:. -
#dup ⇒ Object
:nodoc:.
-
#each_byte ⇒ Object
:include: doc/string/each_byte.rdoc.
-
#each_char ⇒ Object
:include: doc/string/each_char.rdoc.
-
#each_codepoint ⇒ Object
:include: doc/string/each_codepoint.rdoc.
-
#each_grapheme_cluster ⇒ Object
:include: doc/string/each_grapheme_cluster.rdoc.
-
#each_line(*args) ⇒ Object
:include: doc/string/each_line.rdoc.
-
#empty? ⇒ Boolean
Returns
true
if the length ofself
is zero,false
otherwise:. -
#encode(*args) ⇒ Object
:include: doc/string/encode.rdoc.
-
#encode!(*args) ⇒ Object
Like #encode, but applies encoding changes to
self
; returnsself
. -
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
-
#end_with?(*strings) ⇒ Boolean
:include: doc/string/end_with_p.rdoc.
-
#eql?(object) ⇒ Boolean
Returns
true
ifobject
has the same length and content; asself
;false
otherwise:. -
#force_encoding(encoding) ⇒ self
:include: doc/string/force_encoding.rdoc.
-
#freeze ⇒ Object
:nodoc:.
-
#getbyte(index) ⇒ Integer?
Returns the byte at zero-based
index
as an integer, ornil
ifindex
is out of range:. -
#grapheme_clusters ⇒ Object
:include: doc/string/grapheme_clusters.rdoc.
-
#gsub(*args) ⇒ Object
Returns a copy of
self
with all occurrences of the givenpattern
replaced. -
#gsub!(*args) ⇒ Object
Performs the specified substring replacement(s) on
self
; returnsself
if any replacement occurred,nil
otherwise. -
#hash ⇒ Integer
Returns the integer hash value for
self
. -
#hex ⇒ Integer
Interprets the leading substring of
self
as a string of hexadecimal digits (with an optional sign and an optional0x
) and returns the corresponding number; returns zero if there is no such leading substring:. -
#include?(other_string) ⇒ Boolean
Returns
true
ifself
containsother_string
,false
otherwise:. -
#index(*args) ⇒ Object
:include: doc/string/index.rdoc.
-
#new(string = '', **opts) ⇒ Object
constructor
:include: doc/string/new.rdoc.
-
#replace(other_string) ⇒ self
Replaces the contents of
self
with the contents ofother_string
:. -
#insert(index, other_string) ⇒ self
Inserts the given
other_string
intoself
; returnsself
. -
#inspect ⇒ String
Returns a printable version of
self
, enclosed in double-quotes, and with special characters escaped:. -
#intern ⇒ Object
Returns the Symbol corresponding to str, creating the symbol if it did not previously exist.
-
#length ⇒ Integer
:include: doc/string/length.rdoc.
-
#lines(Line_sep = $/, chomp: false) ⇒ Object
Forms substrings (“lines”) of
self
according to the given arguments (see String#each_line for details); returns the lines in an array. -
#ljust(size, pad_string = ' ') ⇒ Object
:include: doc/string/ljust.rdoc.
-
#lstrip ⇒ Object
Returns a copy of
self
with leading whitespace removed; see Whitespace in Strings:. -
#lstrip! ⇒ self?
Like String#lstrip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#match(*args) ⇒ Object
Returns a MatchData object (or
nil
) based onself
and the givenpattern
. -
#match?(pattern, offset = 0) ⇒ Boolean
Returns
true
orfalse
based on whether a match is found forself
andpattern
. -
#succ ⇒ String
Returns the successor to
self
. -
#succ! ⇒ self
Equivalent to String#succ, but modifies
self
in place; returnsself
. -
#oct ⇒ Integer
Interprets the leading substring of
self
as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:. -
#ord ⇒ Integer
:include: doc/string/ord.rdoc.
-
#partition(string_or_regexp) ⇒ Array
:include: doc/string/partition.rdoc.
-
#prepend(*other_strings) ⇒ String
Prepends each string in
other_strings
toself
and returnsself
:. -
#replace(other_string) ⇒ self
Replaces the contents of
self
with the contents ofother_string
:. -
#reverse ⇒ String
Returns a new string with the characters from
self
in reverse order. -
#reverse! ⇒ self
Returns
self
with its characters reversed:. -
#rindex(*args) ⇒ Object
Returns the Integer index of the last occurrence of the given
substring
, ornil
if none found:. -
#rjust(size, pad_string = ' ') ⇒ Object
:include: doc/string/rjust.rdoc.
-
#rpartition(sep) ⇒ Array
:include: doc/string/rpartition.rdoc.
-
#rstrip ⇒ Object
Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:.
-
#rstrip! ⇒ self?
Like String#rstrip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#scan(pat) ⇒ Object
Matches a pattern against
self
; the pattern is:. -
#scrub(*args) ⇒ Object
:include: doc/string/scrub.rdoc.
-
#scrub!(*args) ⇒ Object
Like String#scrub, except that any replacements are made in
self
. -
#setbyte(index, integer) ⇒ Integer
Sets the byte at zero-based
index
tointeger
; returnsinteger
:. -
#length ⇒ Integer
:include: doc/string/length.rdoc.
-
#slice(*args) ⇒ Object
Returns the substring of
self
specified by the arguments. -
#slice!(*args) ⇒ Object
Removes and returns the substring of
self
specified by the arguments. -
#split(*args) ⇒ Object
:include: doc/string/split.rdoc.
-
#squeeze(*selectors) ⇒ Object
Returns a copy of
self
with characters specified byselectors
“squeezed” (see Multiple Character Selectors):. -
#squeeze!(*selectors) ⇒ self?
Like String#squeeze, but modifies
self
in place. -
#start_with?(*string_or_regexp) ⇒ Boolean
:include: doc/string/start_with_p.rdoc.
-
#strip ⇒ Object
Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:.
-
#strip! ⇒ self?
Like String#strip, except that any modifications are made in
self
; returnsself
if any modification are made,nil
otherwise. -
#sub(*args) ⇒ Object
Returns a copy of
self
with only the first occurrence (not all occurrences) of the givenpattern
replaced. -
#sub!(*args) ⇒ Object
Returns
self
with only the first occurrence (not all occurrences) of the givenpattern
replaced. -
#succ ⇒ String
Returns the successor to
self
. -
#succ! ⇒ self
Equivalent to String#succ, but modifies
self
in place; returnsself
. -
#sum(n = 16) ⇒ Integer
:include: doc/string/sum.rdoc.
-
#swapcase(*options) ⇒ String
Returns a string containing the characters in
self
, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:. -
#swapcase!(*options) ⇒ self?
Upcases each lowercase character in
self
; downcases uppercase character; returnsself
if any changes were made,nil
otherwise:. -
#to_c ⇒ Object
Returns
self
interpreted as a Complex object; leading whitespace and trailing garbage are ignored:. -
#to_f ⇒ Float
Returns the result of interpreting leading characters in
self
as a Float:. -
#to_i(base = 10) ⇒ Integer
Returns the result of interpreting leading characters in
self
as an integer in the givenbase
(which must be in (0, 2..36)):. -
#to_r ⇒ Object
Returns the result of interpreting leading characters in
str
as a rational. -
#to_s ⇒ self, String
Returns
self
ifself
is a String, orself
converted to a String ifself
is a subclass of String. -
#to_s ⇒ self, String
Returns
self
ifself
is a String, orself
converted to a String ifself
is a subclass of String. -
#to_sym ⇒ Object
Returns the Symbol corresponding to str, creating the symbol if it did not previously exist.
-
#tr(selector, replacements) ⇒ Object
Returns a copy of
self
with each character specified by stringselector
translated to the corresponding character in stringreplacements
. -
#tr!(selector, replacements) ⇒ self?
Like String#tr, but modifies
self
in place. -
#tr_s(selector, replacements) ⇒ String
Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).
-
#tr_s!(selector, replacements) ⇒ self?
Like String#tr_s, but modifies
self
in place. -
#undump ⇒ String
Returns an unescaped version of
self
:. -
#unicode_normalize(form = :nfc) ⇒ String
Returns a copy of
self
with Unicode normalization applied. -
#unicode_normalize!(form = :nfc) ⇒ self
Like String#unicode_normalize, except that the normalization is performed on
self
. -
#unicode_normalized?(form = :nfc) ⇒ Boolean
Returns
true
ifself
is in the givenform
of Unicode normalization,false
otherwise. -
#upcase(*options) ⇒ String
Returns a string containing the upcased characters in
self
:. -
#upcase!(*options) ⇒ self?
Upcases the characters in
self
; returnsself
if any changes were made,nil
otherwise:. -
#upto(*args) ⇒ Object
With a block given, calls the block with each String value returned by successive calls to String#succ; the first value is
self
, the next isself.succ
, and so on; the sequence terminates when valueother_string
is reached; returnsself
:. -
#valid_encoding? ⇒ Boolean
Returns
true
ifself
is encoded correctly,false
otherwise:.
Methods included from Comparable
#<, #<=, #>, #>=, #between?, #clamp
Constructor Details
#new(string = '', **opts) ⇒ Object
:include: doc/string/new.rdoc
1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 |
# File 'string.c', line 1828
static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
static ID keyword_ids[2];
VALUE orig, opt, venc, vcapa;
VALUE kwargs[2];
rb_encoding *enc = 0;
int n;
if (!keyword_ids[0]) {
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
}
n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (!NIL_P(opt)) {
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
venc = kwargs[0];
vcapa = kwargs[1];
if (!UNDEF_P(venc) && !NIL_P(venc)) {
enc = rb_to_encoding(venc);
}
if (!UNDEF_P(vcapa) && !NIL_P(vcapa)) {
long capa = NUM2LONG(vcapa);
long len = 0;
int termlen = enc ? rb_enc_mbminlen(enc) : 1;
if (capa < STR_BUF_MIN_SIZE) {
capa = STR_BUF_MIN_SIZE;
}
if (n == 1) {
StringValue(orig);
len = RSTRING_LEN(orig);
if (capa < len) {
capa = len;
}
if (orig == str) n = 0;
}
str_modifiable(str);
if (STR_EMBED_P(str) || FL_TEST(str, STR_SHARED|STR_NOFREE)) {
/* make noembed always */
const size_t size = (size_t)capa + termlen;
const char *const old_ptr = RSTRING_PTR(str);
const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
char *new_ptr = ALLOC_N(char, size);
if (STR_EMBED_P(str)) RUBY_ASSERT(osize <= str_embed_capa(str));
memcpy(new_ptr, old_ptr, osize < size ? osize : size);
FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) {
SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
(size_t)capa + termlen, STR_HEAP_SIZE(str));
}
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
if (n == 1) {
memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
rb_enc_cr_str_exact_copy(str, orig);
}
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
}
else if (n == 1) {
rb_str_replace(str, orig);
}
if (enc) {
rb_enc_associate(str, enc);
ENC_CODERANGE_CLEAR(str);
}
}
else if (n == 1) {
rb_str_replace(str, orig);
}
return str;
}
|
Class Method Details
.new(*args) ⇒ Object
:nodoc:
1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 |
# File 'string.c', line 1906
static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
if (klass != rb_cString) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
static ID keyword_ids[2];
VALUE orig, opt, encoding = Qnil, capacity = Qnil;
VALUE kwargs[2];
rb_encoding *enc = NULL;
int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (NIL_P(opt)) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
encoding = kwargs[0];
capacity = kwargs[1];
int termlen = 1;
if (n == 1) {
orig = StringValue(orig);
}
else {
orig = Qnil;
}
if (UNDEF_P(encoding)) {
if (!NIL_P(orig)) {
encoding = rb_obj_encoding(orig);
}
}
if (!UNDEF_P(encoding)) {
enc = rb_to_encoding(encoding);
termlen = rb_enc_mbminlen(enc);
}
// If capacity is nil, we're basically just duping `orig`.
if (UNDEF_P(capacity)) {
if (NIL_P(orig)) {
VALUE empty_str = str_new(klass, "", 0);
if (enc) {
rb_enc_associate(empty_str, enc);
}
return empty_str;
}
VALUE copy = str_duplicate(klass, orig);
rb_enc_associate(copy, enc);
ENC_CODERANGE_CLEAR(copy);
return copy;
}
long capa = 0;
capa = NUM2LONG(capacity);
if (capa < 0) {
capa = 0;
}
if (!NIL_P(orig)) {
long orig_capa = rb_str_capacity(orig);
if (orig_capa > capa) {
capa = orig_capa;
}
}
VALUE str = str_new0(klass, NULL, capa, termlen);
STR_SET_LEN(str, 0);
TERM_FILL(RSTRING_PTR(str), termlen);
if (enc) {
rb_enc_associate(str, enc);
}
if (!NIL_P(orig)) {
rb_str_buf_append(str, orig);
}
return str;
}
|
.try_convert(object) ⇒ Object?
If object
is a String object, returns object
.
Otherwise if object
responds to :to_str
, calls object.to_str
and returns the result.
Returns nil
if object
does not respond to :to_str
.
Raises an exception unless object.to_str
returns a String object.
2704 2705 2706 2707 2708 |
# File 'string.c', line 2704
static VALUE
rb_str_s_try_convert(VALUE dummy, VALUE str)
{
return rb_check_string_type(str);
}
|
Instance Method Details
#%(object) ⇒ Object
Returns the result of formatting object
into the format specification self
(see Kernel#sprintf for formatting details):
"%05d" % 123 # => "00123"
If self
contains multiple substitutions, object
must be an Array or Hash containing the values to be substituted:
"%-5s: %016x" % [ "ID", self.object_id ] # => "ID : 00002b054ec93168"
"foo = %{foo}" % {foo: 'bar'} # => "foo = bar"
"foo = %{foo}, baz = %{baz}" % {foo: 'bar', baz: 'bat'} # => "foo = bar, baz = bat"
2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 |
# File 'string.c', line 2403
static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
VALUE tmp = rb_check_array_type(arg);
if (!NIL_P(tmp)) {
return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
}
return rb_str_format(1, &arg, str);
}
|
#*(integer) ⇒ Object
Returns a new String containing integer
copies of self
:
"Ho! " * 3 # => "Ho! Ho! Ho! "
"Ho! " * 0 # => ""
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 |
# File 'string.c', line 2327
VALUE
rb_str_times(VALUE str, VALUE times)
{
VALUE str2;
long n, len;
char *ptr2;
int termlen;
if (times == INT2FIX(1)) {
return str_duplicate(rb_cString, str);
}
if (times == INT2FIX(0)) {
str2 = str_alloc_embed(rb_cString, 0);
rb_enc_copy(str2, str);
return str2;
}
len = NUM2LONG(times);
if (len < 0) {
rb_raise(rb_eArgError, "negative argument");
}
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
if (STR_EMBEDDABLE_P(len, 1)) {
str2 = str_alloc_embed(rb_cString, len + 1);
memset(RSTRING_PTR(str2), 0, len + 1);
}
else {
str2 = str_alloc_heap(rb_cString);
RSTRING(str2)->as.heap.aux.capa = len;
RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
}
STR_SET_LEN(str2, len);
rb_enc_copy(str2, str);
return str2;
}
if (len && LONG_MAX/len < RSTRING_LEN(str)) {
rb_raise(rb_eArgError, "argument too big");
}
len *= RSTRING_LEN(str);
termlen = TERM_LEN(str);
str2 = str_new0(rb_cString, 0, len, termlen);
ptr2 = RSTRING_PTR(str2);
if (len) {
n = RSTRING_LEN(str);
memcpy(ptr2, RSTRING_PTR(str), n);
while (n <= len/2) {
memcpy(ptr2 + n, ptr2, n);
n *= 2;
}
memcpy(ptr2 + n, ptr2, len-n);
}
STR_SET_LEN(str2, len);
TERM_FILL(&ptr2[len], termlen);
rb_enc_cr_str_copy_for_substr(str2, str);
return str2;
}
|
#+(other_string) ⇒ Object
Returns a new String containing other_string
concatenated to self
:
"Hello from " + self.to_s # => "Hello from main"
2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 |
# File 'string.c', line 2255
VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
VALUE str3;
rb_encoding *enc;
char *ptr1, *ptr2, *ptr3;
long len1, len2;
int termlen;
StringValue(str2);
enc = rb_enc_check_str(str1, str2);
RSTRING_GETMEM(str1, ptr1, len1);
RSTRING_GETMEM(str2, ptr2, len2);
termlen = rb_enc_mbminlen(enc);
if (len1 > LONG_MAX - len2) {
rb_raise(rb_eArgError, "string size too big");
}
str3 = str_new0(rb_cString, 0, len1+len2, termlen);
ptr3 = RSTRING_PTR(str3);
memcpy(ptr3, ptr1, len1);
memcpy(ptr3+len1, ptr2, len2);
TERM_FILL(&ptr3[len1+len2], termlen);
ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
RB_GC_GUARD(str1);
RB_GC_GUARD(str2);
return str3;
}
|
#+ ⇒ self
Returns self
if self
is not frozen.
Otherwise returns self.dup
, which is not frozen.
3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 |
# File 'string.c', line 3021
static VALUE
str_uplus(VALUE str)
{
if (OBJ_FROZEN(str)) {
return rb_str_dup(str);
}
else {
return str;
}
}
|
#- ⇒ Object #dedup ⇒ Object Also known as: dedup
Returns a frozen, possibly pre-existing copy of the string.
The returned String will be deduplicated as long as it does not have any instance variables set on it and is not a String subclass.
Note that -string
variant is more convenient for defining constants:
FILENAME = -'config/database.yml'
while dedup
is better suitable for using the method in chains of calculations:
@url_list.concat(urls.map(&:dedup))
3053 3054 3055 3056 3057 3058 3059 3060 |
# File 'string.c', line 3053
static VALUE
str_uminus(VALUE str)
{
if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) {
str = rb_str_dup(str);
}
return rb_fstring(str);
}
|
#<<(object) ⇒ String
Concatenates object
to self
and returns self
:
s = 'foo'
s << 'bar' # => "foobar"
s # => "foobar"
If object
is an Integer, the value is considered a codepoint and converted to a character before concatenation:
s = 'foo'
s << 33 # => "foo!"
Related: String#concat, which takes multiple arguments.
3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 |
# File 'string.c', line 3505
VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
unsigned int code;
rb_encoding *enc = STR_ENC_GET(str1);
int encidx;
if (RB_INTEGER_TYPE_P(str2)) {
if (rb_num_to_uint(str2, &code) == 0) {
}
else if (FIXNUM_P(str2)) {
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
}
else {
rb_raise(rb_eRangeError, "bignum out of char range");
}
}
else {
return rb_str_append(str1, str2);
}
encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
if (encidx >= 0) {
char buf[1];
buf[0] = (char)code;
rb_str_cat(str1, buf, 1);
if (encidx != rb_enc_to_index(enc)) {
rb_enc_associate_index(str1, encidx);
ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
}
}
else {
long pos = RSTRING_LEN(str1);
int cr = ENC_CODERANGE(str1);
int len;
char *buf;
switch (len = rb_enc_codelen(code, enc)) {
case ONIGERR_INVALID_CODE_POINT_VALUE:
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
case 0:
rb_raise(rb_eRangeError, "%u out of char range", code);
break;
}
buf = ALLOCA_N(char, len + 1);
rb_enc_mbcput(code, buf, enc);
if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
}
rb_str_resize(str1, pos+len);
memcpy(RSTRING_PTR(str1) + pos, buf, len);
if (cr == ENC_CODERANGE_7BIT && code > 127) {
cr = ENC_CODERANGE_VALID;
}
else if (cr == ENC_CODERANGE_BROKEN) {
cr = ENC_CODERANGE_UNKNOWN;
}
ENC_CODERANGE_SET(str1, cr);
}
return str1;
}
|
#<=>(other_string) ⇒ -1, ...
Compares self
and other_string
, returning:
-
-1 if
other_string
is larger. -
0 if the two are equal.
-
1 if
other_string
is smaller. -
nil
if the two are incomparable.
Examples:
'foo' <=> 'foo' # => 0
'foo' <=> 'food' # => -1
'food' <=> 'foo' # => 1
'FOO' <=> 'foo' # => -1
'foo' <=> 'FOO' # => 1
'foo' <=> 1 # => nil
3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 |
# File 'string.c', line 3797
static VALUE
rb_str_cmp_m(VALUE str1, VALUE str2)
{
int result;
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return rb_invcmp(str1, str2);
}
result = rb_str_cmp(str1, s);
return INT2FIX(result);
}
|
#==(object) ⇒ Boolean #===(object) ⇒ Boolean
Returns true
if object
has the same length and content; as self
; false
otherwise:
s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false
Returns false
if the two strings’ encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false
If object
is not an instance of String but responds to to_str
, then the two strings are compared using object.==
.
3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 |
# File 'string.c', line 3736
VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) {
if (!rb_respond_to(str2, idTo_str)) {
return Qfalse;
}
return rb_equal(str2, str1);
}
return rb_str_eql_internal(str1, str2);
}
|
#==(object) ⇒ Boolean #===(object) ⇒ Boolean
Returns true
if object
has the same length and content; as self
; false
otherwise:
s = 'foo'
s == 'foo' # => true
s == 'food' # => false
s == 'FOO' # => false
Returns false
if the two strings’ encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false
If object
is not an instance of String but responds to to_str
, then the two strings are compared using object.==
.
3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 |
# File 'string.c', line 3736
VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) {
if (!rb_respond_to(str2, idTo_str)) {
return Qfalse;
}
return rb_equal(str2, str1);
}
return rb_str_eql_internal(str1, str2);
}
|
#=~(regexp) ⇒ Integer? #=~(object) ⇒ Integer?
Returns the Integer index of the first substring that matches the given regexp
, or nil
if no match found:
'foo' =~ /f/ # => 0
'foo' =~ /o/ # => 1
'foo' =~ /x/ # => nil
Note: also updates Regexp@Global+Variables.
If the given object
is not a Regexp, returns the value returned by object =~ self
.
Note that string =~ regexp
is different from regexp =~ string
(see Regexp#=~):
number= nil
"no. 9" =~ /(?<number>\d+)/
number # => nil (not assigned)
/(?<number>\d+)/ =~ "no. 9"
number #=> "9"
4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 |
# File 'string.c', line 4517
static VALUE
rb_str_match(VALUE x, VALUE y)
{
switch (OBJ_BUILTIN_TYPE(y)) {
case T_STRING:
rb_raise(rb_eTypeError, "type mismatch: String given");
case T_REGEXP:
return rb_reg_match(y, x);
default:
return rb_funcall(y, idEqTilde, 1, x);
}
}
|
#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil
Returns the substring of self
specified by the arguments. See examples at String Slices.
5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 |
# File 'string.c', line 5273
static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
return rb_str_subpat(str, argv[0], argv[1]);
}
else {
long beg = NUM2LONG(argv[0]);
long len = NUM2LONG(argv[1]);
return rb_str_substr(str, beg, len);
}
}
rb_check_arity(argc, 1, 2);
return rb_str_aref(str, argv[0]);
}
|
#[]=(index) ⇒ Object #[]=(start, length) ⇒ Object #[]=(range) ⇒ Object #[]=(regexp, capture = 0) ⇒ Object #[]=(substring) ⇒ Object
Replaces all, some, or none of the contents of self
; returns new_string
. See String Slices.
A few examples:
s = 'foo'
s[2] = 'rtune' # => "rtune"
s # => "fortune"
s[1, 5] = 'init' # => "init"
s # => "finite"
s[3..4] = 'al' # => "al"
s # => "finale"
s[/e$/] = 'ly' # => "ly"
s # => "finally"
s['lly'] = 'ncial' # => "ncial"
s # => "financial"
5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 |
# File 'string.c', line 5510
static VALUE
rb_str_aset_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 3) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
}
else {
rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
}
return argv[2];
}
rb_check_arity(argc, 2, 3);
return rb_str_aset(str, argv[0], argv[1]);
}
|
#ascii_only? ⇒ Boolean
Returns true
if self
contains only ASCII characters, false
otherwise:
'abc'.ascii_only? # => true
"abc\u{6666}".ascii_only? # => false
11010 11011 11012 11013 11014 11015 11016 |
# File 'string.c', line 11010
static VALUE
rb_str_is_ascii_only_p(VALUE str)
{
int cr = rb_enc_str_coderange(str);
return RBOOL(cr == ENC_CODERANGE_7BIT);
}
|
#b ⇒ String
:include: doc/string/b.rdoc
10946 10947 10948 10949 10950 10951 10952 10953 10954 10955 10956 10957 10958 10959 10960 10961 10962 10963 10964 10965 10966 10967 10968 10969 10970 10971 10972 10973 10974 10975 10976 10977 |
# File 'string.c', line 10946
static VALUE
rb_str_b(VALUE str)
{
VALUE str2;
if (STR_EMBED_P(str)) {
str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
}
else {
str2 = str_alloc_heap(rb_cString);
}
str_replace_shared_without_enc(str2, str);
if (rb_enc_asciicompat(STR_ENC_GET(str))) {
// BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
// If we know the receiver's code range then we know the result's code range.
int cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
break;
case ENC_CODERANGE_BROKEN:
case ENC_CODERANGE_VALID:
ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
break;
default:
ENC_CODERANGE_CLEAR(str2);
break;
}
}
return str2;
}
|
#byteindex(substring, offset = 0) ⇒ Integer? #byteindex(regexp, offset = 0) ⇒ Integer?
Returns the Integer byte-based index of the first occurrence of the given substring
, or nil
if none found:
'foo'.byteindex('f') # => 0
'foo'.byteindex('o') # => 1
'foo'.byteindex('oo') # => 1
'foo'.byteindex('ooo') # => nil
Returns the Integer byte-based index of the first match for the given Regexp regexp
, or nil
if none found:
'foo'.byteindex(/f/) # => 0
'foo'.byteindex(/o/) # => 1
'foo'.byteindex(/oo/) # => 1
'foo'.byteindex(/ooo/) # => nil
Integer argument offset
, if given, specifies the byte-based position in the string to begin the search:
'foo'.byteindex('o', 1) # => 1
'foo'.byteindex('o', 2) # => 2
'foo'.byteindex('o', 3) # => nil
If offset
is negative, counts backward from the end of self
:
'foo'.byteindex('o', -1) # => 2
'foo'.byteindex('o', -2) # => 1
'foo'.byteindex('o', -3) # => 1
'foo'.byteindex('o', -4) # => nil
If offset
does not land on character (codepoint) boundary, IndexError
is raised.
Related: String#index, String#byterindex.
4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 |
# File 'string.c', line 4129
static VALUE
rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
long pos;
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
long slen = RSTRING_LEN(str);
pos = NUM2LONG(initpos);
if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
}
else {
pos = 0;
}
str_ensure_byte_pos(str, pos);
if (RB_TYPE_P(sub, T_REGEXP)) {
if (rb_reg_search(sub, str, pos, 0) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_byteindex(str, sub, pos);
if (pos >= 0) return LONG2NUM(pos);
}
return Qnil;
}
|
#byterindex(substring, offset = self.bytesize) ⇒ Integer? #byterindex(regexp, offset = self.bytesize) ⇒ Integer?
Returns the Integer byte-based index of the last occurrence of the given substring
, or nil
if none found:
'foo'.byterindex('f') # => 0
'foo'.byterindex('o') # => 2
'foo'.byterindex('oo') # => 1
'foo'.byterindex('ooo') # => nil
Returns the Integer byte-based index of the last match for the given Regexp regexp
, or nil
if none found:
'foo'.byterindex(/f/) # => 0
'foo'.byterindex(/o/) # => 2
'foo'.byterindex(/oo/) # => 1
'foo'.byterindex(/ooo/) # => nil
The last match means starting at the possible last position, not the last of longest matches.
'foo'.byterindex(/o+/) # => 2
$~ #=> #<MatchData "o">
To get the last longest match, needs to combine with negative lookbehind.
'foo'.byterindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">
Or String#byteindex with negative lookforward.
'foo'.byteindex(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">
Integer argument offset
, if given and non-negative, specifies the maximum starting byte-based position in the
string to _end_ the search:
'foo'.byterindex('o', 0) # => nil
'foo'.byterindex('o', 1) # => 1
'foo'.byterindex('o', 2) # => 2
'foo'.byterindex('o', 3) # => 2
If offset
is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset
:
'foo'.byterindex('o', -1) # => 2
'foo'.byterindex('o', -2) # => 1
'foo'.byterindex('o', -3) # => nil
'foo'.byterindex('o', -4) # => nil
If offset
does not land on character (codepoint) boundary, IndexError
is raised.
Related: String#byteindex.
4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 |
# File 'string.c', line 4450
static VALUE
rb_str_byterindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
long pos, len = RSTRING_LEN(str);
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
pos = NUM2LONG(initpos);
if (pos < 0 && (pos += len) < 0) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
if (pos > len) pos = len;
}
else {
pos = len;
}
str_ensure_byte_pos(str, pos);
if (RB_TYPE_P(sub, T_REGEXP)) {
if (rb_reg_search(sub, str, pos, 1) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_byterindex(str, sub, pos);
if (pos >= 0) return LONG2NUM(pos);
}
return Qnil;
}
|
#bytes ⇒ Object
:include: doc/string/bytes.rdoc
9244 9245 9246 9247 9248 9249 |
# File 'string.c', line 9244
static VALUE
rb_str_bytes(VALUE str)
{
VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str));
return rb_str_enumerate_bytes(str, ary);
}
|
#bytesize ⇒ Integer
:include: doc/string/bytesize.rdoc
2221 2222 2223 2224 2225 |
# File 'string.c', line 2221
VALUE
rb_str_bytesize(VALUE str)
{
return LONG2NUM(RSTRING_LEN(str));
}
|
#byteslice(index, length = 1) ⇒ String? #byteslice(range) ⇒ String?
Returns a substring of self
, or nil
if the substring cannot be constructed.
With integer arguments index
and length
given, returns the substring beginning at the given index
of the given length
(if possible), or nil
if length
is negative or index
falls outside of self
:
s = '0123456789' # => "0123456789"
s.byteslice(2) # => "2"
s.byteslice(200) # => nil
s.byteslice(4, 3) # => "456"
s.byteslice(4, 30) # => "456789"
s.byteslice(4, -1) # => nil
s.byteslice(40, 2) # => nil
In either case above, counts backwards from the end of self
if index
is negative:
s = '0123456789' # => "0123456789"
s.byteslice(-4) # => "6"
s.byteslice(-4, 3) # => "678"
With Range argument range
given, returns byteslice(range.begin, range.size)
:
s = '0123456789' # => "0123456789"
s.byteslice(4..6) # => "456"
s.byteslice(-6..-4) # => "456"
s.byteslice(5..2) # => "" # range.size is zero.
s.byteslice(40..42) # => nil
In all cases, a returned string has the same encoding as self
:
s.encoding # => #<Encoding:UTF-8>
s.byteslice(4).encoding # => #<Encoding:UTF-8>
6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 |
# File 'string.c', line 6324
static VALUE
rb_str_byteslice(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
long beg = NUM2LONG(argv[0]);
long len = NUM2LONG(argv[1]);
return str_byte_substr(str, beg, len, TRUE);
}
rb_check_arity(argc, 1, 2);
return str_byte_aref(str, argv[0]);
}
|
#bytesplice(index, length, str) ⇒ String #bytesplice(index, length, str, str_index, str_length) ⇒ String #bytesplice(range, str) ⇒ String #bytesplice(range, str, str_range) ⇒ String
Replaces some or all of the content of self
with str
, and returns self
. The portion of the string affected is determined using the same criteria as String#byteslice, except that length
cannot be omitted. If the replacement string is not the same length as the text it is replacing, the string will be adjusted accordingly.
If str_index
and str_length
, or str_range
are given, the content of self
is replaced by str.byteslice(str_index, str_length) or str.byteslice(str_range); however the substring of str
is not allocated as a new string.
The form that take an Integer will raise an IndexError if the value is out of range; the Range form will raise a RangeError. If the beginning or ending offset does not land on character (codepoint) boundary, an IndexError will be raised.
6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 |
# File 'string.c', line 6379
static VALUE
rb_str_bytesplice(int argc, VALUE *argv, VALUE str)
{
long beg, len, vbeg, vlen;
VALUE val;
rb_encoding *enc;
int cr;
rb_check_arity(argc, 2, 5);
if (!(argc == 2 || argc == 3 || argc == 5)) {
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
}
if (argc == 2 || (argc == 3 && !RB_INTEGER_TYPE_P(argv[0]))) {
if (!rb_range_beg_len(argv[0], &beg, &len, RSTRING_LEN(str), 2)) {
rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
rb_builtin_class_name(argv[0]));
}
val = argv[1];
StringValue(val);
if (argc == 2) {
/* bytesplice(range, str) */
vbeg = 0;
vlen = RSTRING_LEN(val);
}
else {
/* bytesplice(range, str, str_range) */
if (!rb_range_beg_len(argv[2], &vbeg, &vlen, RSTRING_LEN(val), 2)) {
rb_raise(rb_eTypeError, "wrong argument type %s (expected Range)",
rb_builtin_class_name(argv[2]));
}
}
}
else {
beg = NUM2LONG(argv[0]);
len = NUM2LONG(argv[1]);
val = argv[2];
StringValue(val);
if (argc == 3) {
/* bytesplice(index, length, str) */
vbeg = 0;
vlen = RSTRING_LEN(val);
}
else {
/* bytesplice(index, length, str, str_index, str_length) */
vbeg = NUM2LONG(argv[3]);
vlen = NUM2LONG(argv[4]);
}
}
str_check_beg_len(str, &beg, &len);
str_check_beg_len(val, &vbeg, &vlen);
enc = rb_enc_check(str, val);
str_modify_keep_cr(str);
rb_str_update_1(str, beg, len, val, vbeg, vlen);
rb_enc_associate(str, enc);
cr = ENC_CODERANGE_AND(ENC_CODERANGE(str), ENC_CODERANGE(val));
if (cr != ENC_CODERANGE_BROKEN)
ENC_CODERANGE_SET(str, cr);
return str;
}
|
#capitalize(*options) ⇒ String
Returns a string containing the characters in self
; the first character is upcased; the remaining characters are downcased:
s = 'hello World!' # => "hello World!"
s.capitalize # => "Hello world!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#capitalize!.
7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 |
# File 'string.c', line 7754
static VALUE
rb_str_capitalize(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#capitalize!(*options) ⇒ self?
Upcases the first character in self
; downcases the remaining characters; returns self
if any changes were made, nil
otherwise:
s = 'hello World!' # => "hello World!"
s.capitalize! # => "Hello world!"
s # => "Hello world!"
s.capitalize! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#capitalize.
7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 |
# File 'string.c', line 7716
static VALUE
rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#casecmp(other_string) ⇒ -1, ...
Compares self.downcase
and other_string.downcase
; returns:
-
-1 if
other_string.downcase
is larger. -
0 if the two are equal.
-
1 if
other_string.downcase
is smaller. -
nil
if the two are incomparable.
Examples:
'foo'.casecmp('foo') # => 0
'foo'.casecmp('food') # => -1
'food'.casecmp('foo') # => 1
'FOO'.casecmp('foo') # => 0
'foo'.casecmp('FOO') # => 0
'foo'.casecmp(1) # => nil
See Case Mapping.
Related: String#casecmp?.
3838 3839 3840 3841 3842 3843 3844 3845 3846 |
# File 'string.c', line 3838
static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return Qnil;
}
return str_casecmp(str1, s);
}
|
#casecmp?(other_string) ⇒ true, ...
Returns true
if self
and other_string
are equal after Unicode case folding, otherwise false
:
'foo'.casecmp?('foo') # => true
'foo'.casecmp?('food') # => false
'food'.casecmp?('foo') # => false
'FOO'.casecmp?('foo') # => true
'foo'.casecmp?('FOO') # => true
Returns nil
if the two values are incomparable:
'foo'.casecmp?(1) # => nil
See Case Mapping.
Related: String#casecmp.
3928 3929 3930 3931 3932 3933 3934 3935 3936 |
# File 'string.c', line 3928
static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
VALUE s = rb_check_string_type(str2);
if (NIL_P(s)) {
return Qnil;
}
return str_casecmp_p(str1, s);
}
|
#center(size, pad_string = ' ') ⇒ Object
:include: doc/string/center.rdoc
Related: String#ljust, String#rjust.
10551 10552 10553 10554 10555 |
# File 'string.c', line 10551
static VALUE
rb_str_center(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'c');
}
|
#chars ⇒ Object
:include: doc/string/chars.rdoc
9313 9314 9315 9316 9317 9318 |
# File 'string.c', line 9313
static VALUE
rb_str_chars(VALUE str)
{
VALUE ary = WANTARRAY("chars", rb_str_strlen(str));
return rb_str_enumerate_chars(str, ary);
}
|
#chomp(line_sep = $/) ⇒ Object
:include: doc/string/chomp.rdoc
9777 9778 9779 9780 9781 9782 9783 |
# File 'string.c', line 9777
static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
VALUE rs = chomp_rs(argc, argv);
if (NIL_P(rs)) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, chompped_length(str, rs));
}
|
#chomp!(line_sep = $/) ⇒ self?
Like String#chomp, but modifies self
in place; returns nil
if no modification made, self
otherwise.
9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 |
# File 'string.c', line 9757
static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
VALUE rs;
str_modifiable(str);
if (RSTRING_LEN(str) == 0 && argc < 2) return Qnil;
rs = chomp_rs(argc, argv);
if (NIL_P(rs)) return Qnil;
return rb_str_chomp_string(str, rs);
}
|
#chop ⇒ Object
:include: doc/string/chop.rdoc
9604 9605 9606 9607 9608 |
# File 'string.c', line 9604
static VALUE
rb_str_chop(VALUE str)
{
return rb_str_subseq(str, 0, chopped_length(str));
}
|
#chop! ⇒ self?
Like String#chop, but modifies self
in place; returns nil
if self
is empty, self
otherwise.
Related: String#chomp!.
9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 |
# File 'string.c', line 9578
static VALUE
rb_str_chop_bang(VALUE str)
{
str_modify_keep_cr(str);
if (RSTRING_LEN(str) > 0) {
long len;
len = chopped_length(str);
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
ENC_CODERANGE_CLEAR(str);
}
return str;
}
return Qnil;
}
|
#chr ⇒ String
Returns a string containing the first character of self
:
s = 'foo' # => "foo"
s.chr # => "f"
6116 6117 6118 6119 6120 |
# File 'string.c', line 6116
static VALUE
rb_str_chr(VALUE str)
{
return rb_str_substr(str, 0, 1);
}
|
#clear ⇒ self
Removes the contents of self
:
s = 'foo' # => "foo"
s.clear # => ""
6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 |
# File 'string.c', line 6091
static VALUE
rb_str_clear(VALUE str)
{
str_discard(str);
STR_SET_EMBED(str);
STR_SET_LEN(str, 0);
RSTRING_PTR(str)[0] = 0;
if (rb_enc_asciicompat(STR_ENC_GET(str)))
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
else
ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
return str;
}
|
#codepoints ⇒ Object
:include: doc/string/codepoints.rdoc
9373 9374 9375 9376 9377 9378 |
# File 'string.c', line 9373
static VALUE
rb_str_codepoints(VALUE str)
{
VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str));
return rb_str_enumerate_codepoints(str, ary);
}
|
#concat(*objects) ⇒ String
Concatenates each object in objects
to self
and returns self
:
s = 'foo'
s.concat('bar', 'baz') # => "foobarbaz"
s # => "foobarbaz"
For each given object object
that is an Integer, the value is considered a codepoint and converted to a character before concatenation:
s = 'foo'
s.concat(32, 'bar', 32, 'baz') # => "foo bar baz"
Related: String#<<, which takes a single argument.
3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 |
# File 'string.c', line 3466
static VALUE
rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
{
str_modifiable(str);
if (argc == 1) {
return rb_str_concat(str, argv[0]);
}
else if (argc > 1) {
int i;
VALUE arg_str = rb_str_tmp_new(0);
rb_enc_copy(arg_str, str);
for (i = 0; i < argc; i++) {
rb_str_concat(arg_str, argv[i]);
}
rb_str_buf_append(str, arg_str);
}
return str;
}
|
#count(*selectors) ⇒ Integer
Returns the total number of characters in self
that are specified by the given selectors
(see Multiple Character Selectors):
a = "hello world"
a.count "lo" #=> 5
a.count "lo", "o" #=> 2
a.count "hello", "^l" #=> 4
a.count "ej-m" #=> 4
"hello^world".count "\\^aeiou" #=> 4
"hello-world".count "a\\-eo" #=> 4
c = "hello world\\r\\n"
c.count "\\" #=> 2
c.count "\\A" #=> 0
c.count "X-\\w" #=> 3
8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 |
# File 'string.c', line 8584
static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
char table[TR_TABLE_SIZE];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0, tstr;
char *s, *send;
int i;
int ascompat;
size_t n = 0;
rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
tstr = argv[0];
StringValue(tstr);
enc = rb_enc_check(str, tstr);
if (argc == 1) {
const char *ptstr;
if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
(ptstr = RSTRING_PTR(tstr),
ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
!is_broken_string(str)) {
int clen;
unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
send = RSTRING_END(str);
while (s < send) {
if (*(unsigned char*)s++ == c) n++;
}
return SIZET2NUM(n);
}
}
tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
for (i=1; i<argc; i++) {
tstr = argv[i];
StringValue(tstr);
enc = rb_enc_check(str, tstr);
tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
}
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
send = RSTRING_END(str);
ascompat = rb_enc_asciicompat(enc);
while (s < send) {
unsigned int c;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (table[c]) {
n++;
}
s++;
}
else {
int clen;
c = rb_enc_codepoint_len(s, send, &clen, enc);
if (tr_find(c, table, del, nodel)) {
n++;
}
s += clen;
}
}
return SIZET2NUM(n);
}
|
#crypt(salt_str) ⇒ Object
Returns the string generated by calling crypt(3)
standard library function with str
and salt_str
, in this order, as its arguments. Please do not use this method any longer. It is legacy; provided only for backward compatibility with ruby scripts in earlier days. It is bad to use in contemporary programs for several reasons:
-
Behaviour of C’s
crypt(3)
depends on the OS it is run. The generated string lacks data portability. -
On some OSes such as Mac OS,
crypt(3)
never fails (i.e. silently ends up in unexpected results). -
On some OSes such as Mac OS,
crypt(3)
is not thread safe. -
So-called “traditional” usage of
crypt(3)
is very very very weak. According to its manpage, Linux’s traditionalcrypt(3)
output has only 2**56 variations; too easy to brute force today. And this is the default behaviour. -
In order to make things robust some OSes implement so-called “modular” usage. To go through, you have to do a complex build-up of the
salt_str
parameter, by hand. Failure in generation of a proper salt string tends not to yield any errors; typos in parameters are normally not detectable.-
For instance, in the following example, the second invocation of String#crypt is wrong; it has a typo in “round=” (lacks “s”). However the call does not fail and something unexpected is generated.
"foo".crypt("$5$rounds=1000$salt$") # OK, proper usage "foo".crypt("$5$round=1000$salt$") # Typo not detected
-
-
Even in the “modular” mode, some hash functions are considered archaic and no longer recommended at all; for instance module
$1$
is officially abandoned by its author: see phk.freebsd.dk/sagas/md5crypt_eol/ . For another instance module$3$
is considered completely broken: see the manpage of FreeBSD. -
On some OS such as Mac OS, there is no modular mode. Yet, as written above,
crypt(3)
on Mac OS never fails. This means even if you build up a proper salt string it generates a traditional DES hash anyways, and there is no way for you to be aware of."foo".crypt("$5$rounds=1000$salt$") # => "$5fNPQMxC5j6."
If for some reason you cannot migrate to other secure contemporary password hashing algorithms, install the string-crypt gem and require 'string/crypt'
to continue using it.
10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 |
# File 'string.c', line 10281
static VALUE
rb_str_crypt(VALUE str, VALUE salt)
{
#ifdef HAVE_CRYPT_R
VALUE databuf;
struct crypt_data *data;
# define CRYPT_END() ALLOCV_END(databuf)
#else
extern char *crypt(const char *, const char *);
# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
#endif
VALUE result;
const char *s, *saltp;
char *res;
#ifdef BROKEN_CRYPT
char salt_8bit_clean[3];
#endif
StringValue(salt);
mustnot_wchar(str);
mustnot_wchar(salt);
s = StringValueCStr(str);
saltp = RSTRING_PTR(salt);
if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
}
#ifdef BROKEN_CRYPT
if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
salt_8bit_clean[0] = saltp[0] & 0x7f;
salt_8bit_clean[1] = saltp[1] & 0x7f;
salt_8bit_clean[2] = '\0';
saltp = salt_8bit_clean;
}
#endif
#ifdef HAVE_CRYPT_R
data = ALLOCV(databuf, sizeof(struct crypt_data));
# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
data->initialized = 0;
# endif
res = crypt_r(s, saltp, data);
#else
crypt_mutex_initialize();
rb_nativethread_lock_lock(&crypt_mutex.lock);
res = crypt(s, saltp);
#endif
if (!res) {
int err = errno;
CRYPT_END();
rb_syserr_fail(err, "crypt");
}
result = rb_str_new_cstr(res);
CRYPT_END();
return result;
}
|
#delete(*selectors) ⇒ Object
Returns a copy of self
with characters specified by selectors
removed (see Multiple Character Selectors):
"hello".delete "l","lo" #=> "heo"
"hello".delete "lo" #=> "he"
"hello".delete "aeiou", "^e" #=> "hell"
"hello".delete "ej-m" #=> "ho"
8401 8402 8403 8404 8405 8406 8407 |
# File 'string.c', line 8401
static VALUE
rb_str_delete(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_delete_bang(argc, argv, str);
return str;
}
|
#delete!(*selectors) ⇒ self?
Like String#delete, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 |
# File 'string.c', line 8325
static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
char squeez[TR_TABLE_SIZE];
rb_encoding *enc = 0;
char *s, *send, *t;
VALUE del = 0, nodel = 0;
int modify = 0;
int i, ascompat, cr;
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
for (i=0; i<argc; i++) {
VALUE s = argv[i];
StringValue(s);
enc = rb_enc_check(str, s);
tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
}
str_modify_keep_cr(str);
ascompat = rb_enc_asciicompat(enc);
s = t = RSTRING_PTR(str);
send = RSTRING_END(str);
cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (s < send) {
unsigned int c;
int clen;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (squeez[c]) {
modify = 1;
}
else {
if (t != s) *t = c;
t++;
}
s++;
}
else {
c = rb_enc_codepoint_len(s, send, &clen, enc);
if (tr_find(c, squeez, del, nodel)) {
modify = 1;
}
else {
if (t != s) rb_enc_mbcput(c, t, enc);
t += clen;
if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
}
s += clen;
}
}
TERM_FILL(t, TERM_LEN(str));
STR_SET_LEN(str, t - RSTRING_PTR(str));
ENC_CODERANGE_SET(str, cr);
if (modify) return str;
return Qnil;
}
|
#delete_prefix(prefix) ⇒ Object
:include: doc/string/delete_prefix.rdoc
10786 10787 10788 10789 10790 10791 10792 10793 10794 10795 |
# File 'string.c', line 10786
static VALUE
rb_str_delete_prefix(VALUE str, VALUE prefix)
{
long prefixlen;
prefixlen = deleted_prefix_length(str, prefix);
if (prefixlen <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
}
|
#delete_prefix!(prefix) ⇒ self?
Like String#delete_prefix, except that self
is modified in place. Returns self
if the prefix is removed, nil
otherwise.
10766 10767 10768 10769 10770 10771 10772 10773 10774 10775 10776 |
# File 'string.c', line 10766
static VALUE
rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
{
long prefixlen;
str_modify_keep_cr(str);
prefixlen = deleted_prefix_length(str, prefix);
if (prefixlen <= 0) return Qnil;
return rb_str_drop_bytes(str, prefixlen);
}
|
#delete_suffix(suffix) ⇒ Object
:include: doc/string/delete_suffix.rdoc
10869 10870 10871 10872 10873 10874 10875 10876 10877 10878 |
# File 'string.c', line 10869
static VALUE
rb_str_delete_suffix(VALUE str, VALUE suffix)
{
long suffixlen;
suffixlen = deleted_suffix_length(str, suffix);
if (suffixlen <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
}
|
#delete_suffix!(suffix) ⇒ self?
Like String#delete_suffix, except that self
is modified in place. Returns self
if the suffix is removed, nil
otherwise.
10841 10842 10843 10844 10845 10846 10847 10848 10849 10850 10851 10852 10853 10854 10855 10856 10857 10858 10859 |
# File 'string.c', line 10841
static VALUE
rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
{
long olen, suffixlen, len;
str_modifiable(str);
suffixlen = deleted_suffix_length(str, suffix);
if (suffixlen <= 0) return Qnil;
olen = RSTRING_LEN(str);
str_modify_keep_cr(str);
len = olen - suffixlen;
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
ENC_CODERANGE_CLEAR(str);
}
return str;
}
|
#downcase(*options) ⇒ String
Returns a string containing the downcased characters in self
:
s = 'Hello World!' # => "Hello World!"
s.downcase # => "hello world!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#downcase!, String#upcase, String#upcase!.
7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 |
# File 'string.c', line 7670
static VALUE
rb_str_downcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
str_enc_copy_direct(ret, str);
downcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#downcase!(*options) ⇒ self?
Downcases the characters in self
; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.downcase! # => "hello world!"
s # => "hello world!"
s.downcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#downcase, String#upcase, String#upcase!.
7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 |
# File 'string.c', line 7631
static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
if (downcase_single(str))
flags |= ONIGENC_CASE_MODIFIED;
}
else if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#dump ⇒ String
Returns a printable version of self
, enclosed in double-quotes, with special characters escaped, and with non-printing characters replaced by hexadecimal notation:
"hello \n ''".dump # => "\"hello \\n ''\""
"\f\x00\xff\\\"".dump # => "\"\\f\\x00\\xFF\\\\\\\"\""
Related: String#undump (inverse of String#dump).
6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 |
# File 'string.c', line 6895
VALUE
rb_str_dump(VALUE str)
{
int encidx = rb_enc_get_index(str);
rb_encoding *enc = rb_enc_from_index(encidx);
long len;
const char *p, *pend;
char *q, *qend;
VALUE result;
int u8 = (encidx == rb_utf8_encindex());
static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")";
len = 2; /* "" */
if (!rb_enc_asciicompat(enc)) {
len += strlen(nonascii_suffix) - rb_strlen_lit("%s");
len += strlen(enc->name);
}
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
while (p < pend) {
int clen;
unsigned char c = *p++;
switch (c) {
case '"': case '\\':
case '\n': case '\r':
case '\t': case '\f':
case '\013': case '\010': case '\007': case '\033':
clen = 2;
break;
case '#':
clen = IS_EVSTR(p, pend) ? 2 : 1;
break;
default:
if (ISPRINT(c)) {
clen = 1;
}
else {
if (u8 && c > 0x7F) { /* \u notation */
int n = rb_enc_precise_mbclen(p-1, pend, enc);
if (MBCLEN_CHARFOUND_P(n)) {
unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
if (cc <= 0xFFFF)
clen = 6; /* \uXXXX */
else if (cc <= 0xFFFFF)
clen = 9; /* \u{XXXXX} */
else
clen = 10; /* \u{XXXXXX} */
p += MBCLEN_CHARFOUND_LEN(n)-1;
break;
}
}
clen = 4; /* \xNN */
}
break;
}
if (clen > LONG_MAX - len) {
rb_raise(rb_eRuntimeError, "string size too big");
}
len += clen;
}
result = rb_str_new(0, len);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
q = RSTRING_PTR(result); qend = q + len + 1;
*q++ = '"';
while (p < pend) {
unsigned char c = *p++;
if (c == '"' || c == '\\') {
*q++ = '\\';
*q++ = c;
}
else if (c == '#') {
if (IS_EVSTR(p, pend)) *q++ = '\\';
*q++ = '#';
}
else if (c == '\n') {
*q++ = '\\';
*q++ = 'n';
}
else if (c == '\r') {
*q++ = '\\';
*q++ = 'r';
}
else if (c == '\t') {
*q++ = '\\';
*q++ = 't';
}
else if (c == '\f') {
*q++ = '\\';
*q++ = 'f';
}
else if (c == '\013') {
*q++ = '\\';
*q++ = 'v';
}
else if (c == '\010') {
*q++ = '\\';
*q++ = 'b';
}
else if (c == '\007') {
*q++ = '\\';
*q++ = 'a';
}
else if (c == '\033') {
*q++ = '\\';
*q++ = 'e';
}
else if (ISPRINT(c)) {
*q++ = c;
}
else {
*q++ = '\\';
if (u8) {
int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
p += n;
if (cc <= 0xFFFF)
snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */
else
snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */
q += strlen(q);
continue;
}
}
snprintf(q, qend-q, "x%02X", c);
q += 3;
}
}
*q++ = '"';
*q = '\0';
if (!rb_enc_asciicompat(enc)) {
snprintf(q, qend-q, nonascii_suffix, enc->name);
encidx = rb_ascii8bit_encindex();
}
/* result from dump is ASCII */
rb_enc_associate_index(result, encidx);
ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
return result;
}
|
#dup ⇒ Object
:nodoc:
1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 |
# File 'string.c', line 1794
VALUE
rb_str_dup_m(VALUE str)
{
if (LIKELY(BARE_STRING_P(str))) {
return str_duplicate(rb_obj_class(str), str);
}
else {
return rb_obj_dup(str);
}
}
|
#each_byte {|byte| ... } ⇒ self #each_byte ⇒ Object
:include: doc/string/each_byte.rdoc
9229 9230 9231 9232 9233 9234 |
# File 'string.c', line 9229
static VALUE
rb_str_each_byte(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
return rb_str_enumerate_bytes(str, 0);
}
|
#each_char {|c| ... } ⇒ self #each_char ⇒ Object
:include: doc/string/each_char.rdoc
9298 9299 9300 9301 9302 9303 |
# File 'string.c', line 9298
static VALUE
rb_str_each_char(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
return rb_str_enumerate_chars(str, 0);
}
|
#each_codepoint {|integer| ... } ⇒ self #each_codepoint ⇒ Object
:include: doc/string/each_codepoint.rdoc
9358 9359 9360 9361 9362 9363 |
# File 'string.c', line 9358
static VALUE
rb_str_each_codepoint(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
return rb_str_enumerate_codepoints(str, 0);
}
|
#each_grapheme_cluster {|gc| ... } ⇒ self #each_grapheme_cluster ⇒ Object
:include: doc/string/each_grapheme_cluster.rdoc
9528 9529 9530 9531 9532 9533 |
# File 'string.c', line 9528
static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
return rb_str_enumerate_grapheme_clusters(str, 0);
}
|
#each_line(line_sep = $/, chomp: false) {|substring| ... } ⇒ self #each_line(line_sep = $/, chomp: false) ⇒ Object
:include: doc/string/each_line.rdoc
9177 9178 9179 9180 9181 9182 |
# File 'string.c', line 9177
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
RETURN_SIZED_ENUMERATOR(str, argc, argv, 0);
return rb_str_enumerate_lines(argc, argv, str, 0);
}
|
#empty? ⇒ Boolean
Returns true
if the length of self
is zero, false
otherwise:
"hello".empty? # => false
" ".empty? # => false
"".empty? # => true
2239 2240 2241 2242 2243 |
# File 'string.c', line 2239
static VALUE
rb_str_empty(VALUE str)
{
return RBOOL(RSTRING_LEN(str) == 0);
}
|
#encode(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ String #encode(dst_encoding, src_encoding, **enc_opts) ⇒ String
:include: doc/string/encode.rdoc
2905 2906 2907 2908 2909 2910 2911 |
# File 'transcode.c', line 2905
static VALUE
str_encode(int argc, VALUE *argv, VALUE str)
{
VALUE newstr = str;
int encidx = str_transcode(argc, argv, &newstr);
return encoded_dup(newstr, str, encidx);
}
|
#encode!(dst_encoding = Encoding.default_internal, **enc_opts) ⇒ self #encode!(dst_encoding, src_encoding, **enc_opts) ⇒ self
Like #encode, but applies encoding changes to self
; returns self
.
2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 |
# File 'transcode.c', line 2874
static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
VALUE newstr;
int encidx;
rb_check_frozen(str);
newstr = str;
encidx = str_transcode(argc, argv, &newstr);
if (encidx < 0) return str;
if (newstr == str) {
rb_enc_associate_index(str, encidx);
return str;
}
rb_str_shared_replace(str, newstr);
return str_encode_associate(str, encidx);
}
|
#encoding ⇒ Encoding
Returns the Encoding object that represents the encoding of obj.
1146 1147 1148 1149 1150 1151 1152 1153 1154 |
# File 'encoding.c', line 1146
VALUE
rb_obj_encoding(VALUE obj)
{
int idx = rb_enc_get_index(obj);
if (idx < 0) {
rb_raise(rb_eTypeError, "unknown encoding");
}
return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}
|
#end_with?(*strings) ⇒ Boolean
:include: doc/string/end_with_p.rdoc
10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 |
# File 'string.c', line 10682
static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
int i;
for (i=0; i<argc; i++) {
VALUE tmp = argv[i];
const char *p, *s, *e;
long slen, tlen;
rb_encoding *enc;
StringValue(tmp);
enc = rb_enc_check(str, tmp);
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
if ((slen = RSTRING_LEN(str)) < tlen) continue;
p = RSTRING_PTR(str);
e = p + slen;
s = e - tlen;
if (!at_char_boundary(p, s, e, enc))
continue;
if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
return Qtrue;
}
return Qfalse;
}
|
#eql?(object) ⇒ Boolean
Returns true
if object
has the same length and content;
as +self+; +false+ otherwise:
s = 'foo'
s.eql?('foo') # => true
s.eql?('food') # => false
s.eql?('FOO') # => false
Returns +false+ if the two strings' encodings are not compatible:
"\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false
3767 3768 3769 3770 3771 3772 3773 |
# File 'string.c', line 3767
VALUE
rb_str_eql(VALUE str1, VALUE str2)
{
if (str1 == str2) return Qtrue;
if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
return rb_str_eql_internal(str1, str2);
}
|
#force_encoding(encoding) ⇒ self
:include: doc/string/force_encoding.rdoc
10913 10914 10915 10916 10917 10918 10919 10920 10921 10922 10923 10924 10925 10926 10927 10928 10929 10930 10931 10932 10933 10934 10935 10936 |
# File 'string.c', line 10913
static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
str_modifiable(str);
rb_encoding *encoding = rb_to_encoding(enc);
int idx = rb_enc_to_index(encoding);
// If the encoding is unchanged, we do nothing.
if (ENCODING_GET(str) == idx) {
return str;
}
rb_enc_associate_index(str, idx);
// If the coderange was 7bit and the new encoding is ASCII-compatible
// we can keep the coderange.
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
return str;
}
ENC_CODERANGE_CLEAR(str);
return str;
}
|
#freeze ⇒ Object
:nodoc:
3004 3005 3006 3007 3008 3009 3010 |
# File 'string.c', line 3004
VALUE
rb_str_freeze(VALUE str)
{
if (OBJ_FROZEN(str)) return str;
rb_str_resize(str, RSTRING_LEN(str));
return rb_obj_freeze(str);
}
|
#getbyte(index) ⇒ Integer?
Returns the byte at zero-based index
as an integer, or nil
if index
is out of range:
s = 'abcde' # => "abcde"
s.getbyte(0) # => 97
s.getbyte(-1) # => 101
s.getbyte(5) # => nil
Related: String#setbyte.
6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 |
# File 'string.c', line 6135
VALUE
rb_str_getbyte(VALUE str, VALUE index)
{
long pos = NUM2LONG(index);
if (pos < 0)
pos += RSTRING_LEN(str);
if (pos < 0 || RSTRING_LEN(str) <= pos)
return Qnil;
return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
}
|
#grapheme_clusters ⇒ Object
:include: doc/string/grapheme_clusters.rdoc
9543 9544 9545 9546 9547 9548 |
# File 'string.c', line 9543
static VALUE
rb_str_grapheme_clusters(VALUE str)
{
VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str));
return rb_str_enumerate_grapheme_clusters(str, ary);
}
|
#gsub(pattern, replacement) ⇒ Object #gsub(pattern) {|match| ... } ⇒ Object #gsub(pattern) ⇒ Object
Returns a copy of self
with all occurrences of the given pattern
replaced.
See Substitution Methods.
Returns an Enumerator if no replacement
and no block given.
Related: String#sub, String#sub!, String#gsub!.
6051 6052 6053 6054 6055 |
# File 'string.c', line 6051
static VALUE
rb_str_gsub(int argc, VALUE *argv, VALUE str)
{
return str_gsub(argc, argv, str, 0);
}
|
#gsub!(pattern, replacement) ⇒ self? #gsub!(pattern) {|match| ... } ⇒ self? #gsub!(pattern) ⇒ Object
Performs the specified substring replacement(s) on self
; returns self
if any replacement occurred, nil
otherwise.
See Substitution Methods.
Returns an Enumerator if no replacement
and no block given.
Related: String#sub, String#gsub, String#sub!.
6027 6028 6029 6030 6031 6032 |
# File 'string.c', line 6027
static VALUE
rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
{
str_modify_keep_cr(str);
return str_gsub(argc, argv, str, 1);
}
|
#hash ⇒ Integer
Returns the integer hash value for self
. The value is based on the length, content and encoding of self
.
Related: Object#hash.
3656 3657 3658 3659 3660 3661 |
# File 'string.c', line 3656
static VALUE
rb_str_hash_m(VALUE str)
{
st_index_t hval = rb_str_hash(str);
return ST2FIX(hval);
}
|
#hex ⇒ Integer
Interprets the leading substring of self
as a string of hexadecimal digits (with an optional sign and an optional 0x
) and returns the corresponding number; returns zero if there is no such leading substring:
'0x0a'.hex # => 10
'-1234'.hex # => -4660
'0'.hex # => 0
'non-numeric'.hex # => 0
Related: String#oct.
10174 10175 10176 10177 10178 |
# File 'string.c', line 10174
static VALUE
rb_str_hex(VALUE str)
{
return rb_str_to_inum(str, 16, FALSE);
}
|
#include?(other_string) ⇒ Boolean
Returns true
if self
contains other_string
, false
otherwise:
s = 'foo'
s.include?('f') # => true
s.include?('fo') # => true
s.include?('food') # => false
6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 |
# File 'string.c', line 6552
VALUE
rb_str_include(VALUE str, VALUE arg)
{
long i;
StringValue(arg);
i = rb_str_index(str, arg, 0);
return RBOOL(i != -1);
}
|
#index(substring, offset = 0) ⇒ Integer? #index(regexp, offset = 0) ⇒ Integer?
:include: doc/string/index.rdoc
4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 |
# File 'string.c', line 4028
static VALUE
rb_str_index_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
rb_encoding *enc = STR_ENC_GET(str);
long pos;
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
long slen = str_strlen(str, enc); /* str's enc */
pos = NUM2LONG(initpos);
if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
}
else {
pos = 0;
}
if (RB_TYPE_P(sub, T_REGEXP)) {
pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
enc, single_byte_optimizable(str));
if (rb_reg_search(sub, str, pos, 0) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = rb_str_sublen(str, BEG(0));
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_index(str, sub, pos);
if (pos >= 0) {
pos = rb_str_sublen(str, pos);
return LONG2NUM(pos);
}
}
return Qnil;
}
|
#replace(other_string) ⇒ self
Replaces the contents of self
with the contents of other_string
:
s = 'foo' # => "foo"
s.replace('bar') # => "bar"
6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 |
# File 'string.c', line 6069
VALUE
rb_str_replace(VALUE str, VALUE str2)
{
str_modifiable(str);
if (str == str2) return str;
StringValue(str2);
str_discard(str);
return str_replace(str, str2);
}
|
#insert(index, other_string) ⇒ self
Inserts the given other_string
into self
; returns self
.
If the Integer index
is positive, inserts other_string
at offset index
:
'foo'.insert(1, 'bar') # => "fbaroo"
If the Integer index
is negative, counts backward from the end of self
and inserts other_string
at offset index+1
(that is, after self[index]
):
'foo'.insert(-2, 'bar') # => "fobaro"
5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 |
# File 'string.c', line 5544
static VALUE
rb_str_insert(VALUE str, VALUE idx, VALUE str2)
{
long pos = NUM2LONG(idx);
if (pos == -1) {
return rb_str_append(str, str2);
}
else if (pos < 0) {
pos++;
}
rb_str_update(str, pos, 0, str2);
return str;
}
|
#inspect ⇒ String
Returns a printable version of self
, enclosed in double-quotes, and with special characters escaped:
s = "foo\tbar\tbaz\n"
s.inspect
# => "\"foo\\tbar\\tbaz\\n\""
6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 |
# File 'string.c', line 6781
VALUE
rb_str_inspect(VALUE str)
{
int encidx = ENCODING_GET(str);
rb_encoding *enc = rb_enc_from_index(encidx);
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
VALUE result = rb_str_buf_new(0);
rb_encoding *resenc = rb_default_internal_encoding();
int unicode_p = rb_enc_unicode_p(enc);
int asciicompat = rb_enc_asciicompat(enc);
if (resenc == NULL) resenc = rb_default_external_encoding();
if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
rb_enc_associate(result, resenc);
str_buf_cat2(result, "\"");
p = RSTRING_PTR(str); pend = RSTRING_END(str);
prev = p;
while (p < pend) {
unsigned int c, cc;
int n;
n = rb_enc_precise_mbclen(p, pend, enc);
if (!MBCLEN_CHARFOUND_P(n)) {
if (p > prev) str_buf_cat(result, prev, p - prev);
n = rb_enc_mbminlen(enc);
if (pend < p + n)
n = (int)(pend - p);
while (n--) {
snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
str_buf_cat(result, buf, strlen(buf));
prev = ++p;
}
continue;
}
n = MBCLEN_CHARFOUND_LEN(n);
c = rb_enc_mbc_to_codepoint(p, pend, enc);
p += n;
if ((asciicompat || unicode_p) &&
(c == '"'|| c == '\\' ||
(c == '#' &&
p < pend &&
MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
(cc = rb_enc_codepoint(p,pend,enc),
(cc == '$' || cc == '@' || cc == '{'))))) {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
str_buf_cat2(result, "\\");
if (asciicompat || enc == resenc) {
prev = p - n;
continue;
}
}
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
case '\t': cc = 't'; break;
case '\f': cc = 'f'; break;
case '\013': cc = 'v'; break;
case '\010': cc = 'b'; break;
case '\007': cc = 'a'; break;
case 033: cc = 'e'; break;
default: cc = 0; break;
}
if (cc) {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
buf[0] = '\\';
buf[1] = (char)cc;
str_buf_cat(result, buf, 2);
prev = p;
continue;
}
/* The special casing of 0x85 (NEXT_LINE) here is because
* Oniguruma historically treats it as printable, but it
* doesn't match the print POSIX bracket class or character
* property in regexps.
*
* See Ruby Bug #16842 for details:
* https://bugs.ruby-lang.org/issues/16842
*/
if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
(asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
continue;
}
else {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
rb_str_buf_cat_escaped_char(result, c, unicode_p);
prev = p;
continue;
}
}
if (p > prev) str_buf_cat(result, prev, p - prev);
str_buf_cat2(result, "\"");
return result;
}
|
#intern ⇒ Object #to_sym ⇒ Object
Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.
"Koala".intern #=> :Koala
s = 'cat'.to_sym #=> :cat
s == :cat #=> true
s = '@cat'.to_sym #=> :@cat
s == :@cat #=> true
This can also be used to create symbols that cannot be represented using the :xxx
notation.
'cat and dog'.to_sym #=> :"cat and dog"
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 |
# File 'symbol.c', line 860
VALUE
rb_str_intern(VALUE str)
{
VALUE sym;
GLOBAL_SYMBOLS_ENTER(symbols);
{
sym = lookup_str_sym_with_lock(symbols, str);
if (sym) {
// ok
}
else if (USE_SYMBOL_GC) {
rb_encoding *enc = rb_enc_get(str);
rb_encoding *ascii = rb_usascii_encoding();
if (enc != ascii && sym_check_asciionly(str, false)) {
str = rb_str_dup(str);
rb_enc_associate(str, ascii);
OBJ_FREEZE(str);
enc = ascii;
}
else {
str = rb_str_dup(str);
OBJ_FREEZE(str);
}
str = rb_fstring(str);
int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
if (type < 0) type = ID_JUNK;
sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
}
else {
ID id = intern_str(str, 0);
sym = ID2SYM(id);
}
}
GLOBAL_SYMBOLS_LEAVE();
return sym;
}
|
#length ⇒ Integer
:include: doc/string/length.rdoc
2207 2208 2209 2210 2211 |
# File 'string.c', line 2207
VALUE
rb_str_length(VALUE str)
{
return LONG2NUM(str_strlen(str, NULL));
}
|
#lines(Line_sep = $/, chomp: false) ⇒ Object
Forms substrings (“lines”) of self
according to the given arguments (see String#each_line for details); returns the lines in an array.
9193 9194 9195 9196 9197 9198 |
# File 'string.c', line 9193
static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
VALUE ary = WANTARRAY("lines", 0);
return rb_str_enumerate_lines(argc, argv, str, ary);
}
|
#ljust(size, pad_string = ' ') ⇒ Object
:include: doc/string/ljust.rdoc
Related: String#rjust, String#center.
10518 10519 10520 10521 10522 |
# File 'string.c', line 10518
static VALUE
rb_str_ljust(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'l');
}
|
#lstrip ⇒ Object
Returns a copy of self
with leading whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.lstrip # => "abc\u0000\t\n\v\f\r "
Related: String#rstrip, String#strip.
9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 |
# File 'string.c', line 9856
static VALUE
rb_str_lstrip(VALUE str)
{
char *start;
long len, loffset;
RSTRING_GETMEM(str, start, len);
loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
if (loffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, len - loffset);
}
|
#lstrip! ⇒ self?
Like String#lstrip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#rstrip!, String#strip!.
9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 |
# File 'string.c', line 9818
static VALUE
rb_str_lstrip_bang(VALUE str)
{
rb_encoding *enc;
char *start, *s;
long olen, loffset;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
if (loffset > 0) {
long len = olen-loffset;
s = start + loffset;
memmove(start, s, len);
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#match(pattern, offset = 0) ⇒ MatchData? #match(pattern, offset = 0) {|matchdata| ... } ⇒ Object
Returns a MatchData object (or nil
) based on self
and the given pattern
.
Note: also updates Regexp@Global+Variables.
-
Computes
regexp
by convertingpattern
(if not already a Regexp).regexp = Regexp.new(pattern)
-
Computes
matchdata
, which will be either a MatchData object ornil
(see Regexp#match):matchdata = <tt>regexp.match(self)
With no block given, returns the computed matchdata
:
'foo'.match('f') # => #<MatchData "f">
'foo'.match('o') # => #<MatchData "o">
'foo'.match('x') # => nil
If Integer argument offset
is given, the search begins at index offset
:
'foo'.match('f', 1) # => nil
'foo'.match('o', 1) # => #<MatchData "o">
With a block given, calls the block with the computed matchdata
and returns the block’s return value:
'foo'.match(/o/) {|matchdata| matchdata } # => #<MatchData "o">
'foo'.match(/x/) {|matchdata| matchdata } # => nil
'foo'.match(/f/, 1) {|matchdata| matchdata } # => nil
4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 |
# File 'string.c', line 4571
static VALUE
rb_str_match_m(int argc, VALUE *argv, VALUE str)
{
VALUE re, result;
if (argc < 1)
rb_check_arity(argc, 1, 2);
re = argv[0];
argv[0] = str;
result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv);
if (!NIL_P(result) && rb_block_given_p()) {
return rb_yield(result);
}
return result;
}
|
#match?(pattern, offset = 0) ⇒ Boolean
Returns true
or false
based on whether a match is found for self
and pattern
.
Note: does not update Regexp@Global+Variables.
Computes regexp
by converting pattern
(if not already a Regexp).
regexp = Regexp.new(pattern)
Returns true
if self+.match(regexp)
returns a MatchData object, false
otherwise:
'foo'.match?(/o/) # => true
'foo'.match?('o') # => true
'foo'.match?(/x/) # => false
If Integer argument offset
is given, the search begins at index offset
:
'foo'.match?('f', 1) # => false
'foo'.match?('o', 1) # => true
4610 4611 4612 4613 4614 4615 4616 4617 |
# File 'string.c', line 4610
static VALUE
rb_str_match_m_p(int argc, VALUE *argv, VALUE str)
{
VALUE re;
rb_check_arity(argc, 1, 2);
re = get_pat(argv[0]);
return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);
}
|
#succ ⇒ String
Returns the successor to self
. The successor is calculated by incrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:
'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'
The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:
'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"
The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:
'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"
The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:
s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"
The successor to an empty String is a new empty String:
''.succ # => ""
4862 4863 4864 4865 4866 4867 4868 4869 |
# File 'string.c', line 4862
VALUE
rb_str_succ(VALUE orig)
{
VALUE str;
str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
rb_enc_cr_str_copy_for_substr(str, orig);
return str_succ(str);
}
|
#succ! ⇒ self
Equivalent to String#succ, but modifies self
in place; returns self
.
4966 4967 4968 4969 4970 4971 4972 |
# File 'string.c', line 4966
static VALUE
rb_str_succ_bang(VALUE str)
{
rb_str_modify(str);
str_succ(str);
return str;
}
|
#oct ⇒ Integer
Interprets the leading substring of self
as a string of octal digits (with an optional sign) and returns the corresponding number; returns zero if there is no such leading substring:
'123'.oct # => 83
'-377'.oct # => -255
'0377non-numeric'.oct # => 255
'non-numeric'.oct # => 0
If self
starts with 0
, radix indicators are honored; see Kernel#Integer.
Related: String#hex.
10201 10202 10203 10204 10205 |
# File 'string.c', line 10201
static VALUE
rb_str_oct(VALUE str)
{
return rb_str_to_inum(str, -8, FALSE);
}
|
#ord ⇒ Integer
:include: doc/string/ord.rdoc
10346 10347 10348 10349 10350 10351 10352 10353 |
# File 'string.c', line 10346
static VALUE
rb_str_ord(VALUE s)
{
unsigned int c;
c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
return UINT2NUM(c);
}
|
#partition(string_or_regexp) ⇒ Array
:include: doc/string/partition.rdoc
10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586 10587 10588 10589 10590 10591 10592 |
# File 'string.c', line 10565
static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
long pos;
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
if (rb_reg_search(sep, str, 0, 0) < 0) {
goto failed;
}
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
pos = rb_str_index(str, sep, 0);
if (pos < 0) goto failed;
}
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
failed:
return rb_ary_new3(3, str_duplicate(rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
}
|
#prepend(*other_strings) ⇒ String
Prepends each string in other_strings
to self
and returns self
:
s = 'foo'
s.prepend('bar', 'baz') # => "barbazfoo"
s # => "barbazfoo"
Related: String#concat.
3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 |
# File 'string.c', line 3602
static VALUE
rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
{
str_modifiable(str);
if (argc == 1) {
rb_str_update(str, 0L, 0L, argv[0]);
}
else if (argc > 1) {
int i;
VALUE arg_str = rb_str_tmp_new(0);
rb_enc_copy(arg_str, str);
for (i = 0; i < argc; i++) {
rb_str_append(arg_str, argv[i]);
}
rb_str_update(str, 0L, 0L, arg_str);
}
return str;
}
|
#replace(other_string) ⇒ self
Replaces the contents of self
with the contents of other_string
:
s = 'foo' # => "foo"
s.replace('bar') # => "bar"
6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 |
# File 'string.c', line 6069
VALUE
rb_str_replace(VALUE str, VALUE str2)
{
str_modifiable(str);
if (str == str2) return str;
StringValue(str2);
str_discard(str);
return str_replace(str, str2);
}
|
#reverse ⇒ String
Returns a new string with the characters from self
in reverse order.
'stressed'.reverse # => "desserts"
6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 |
# File 'string.c', line 6449
static VALUE
rb_str_reverse(VALUE str)
{
rb_encoding *enc;
VALUE rev;
char *s, *e, *p;
int cr;
if (RSTRING_LEN(str) <= 1) return str_duplicate(rb_cString, str);
enc = STR_ENC_GET(str);
rev = rb_str_new(0, RSTRING_LEN(str));
s = RSTRING_PTR(str); e = RSTRING_END(str);
p = RSTRING_END(rev);
cr = ENC_CODERANGE(str);
if (RSTRING_LEN(str) > 1) {
if (single_byte_optimizable(str)) {
while (s < e) {
*--p = *s++;
}
}
else if (cr == ENC_CODERANGE_VALID) {
while (s < e) {
int clen = rb_enc_fast_mbclen(s, e, enc);
p -= clen;
memcpy(p, s, clen);
s += clen;
}
}
else {
cr = rb_enc_asciicompat(enc) ?
ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (s < e) {
int clen = rb_enc_mbclen(s, e, enc);
if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
p -= clen;
memcpy(p, s, clen);
s += clen;
}
}
}
STR_SET_LEN(rev, RSTRING_LEN(str));
str_enc_copy_direct(rev, str);
ENC_CODERANGE_SET(rev, cr);
return rev;
}
|
#reverse! ⇒ self
Returns self
with its characters reversed:
s = 'stressed'
s.reverse! # => "desserts"
s # => "desserts"
6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 |
# File 'string.c', line 6512
static VALUE
rb_str_reverse_bang(VALUE str)
{
if (RSTRING_LEN(str) > 1) {
if (single_byte_optimizable(str)) {
char *s, *e, c;
str_modify_keep_cr(str);
s = RSTRING_PTR(str);
e = RSTRING_END(str) - 1;
while (s < e) {
c = *s;
*s++ = *e;
*e-- = c;
}
}
else {
str_shared_replace(str, rb_str_reverse(str));
}
}
else {
str_modify_keep_cr(str);
}
return str;
}
|
#rindex(substring, offset = self.length) ⇒ Integer? #rindex(regexp, offset = self.length) ⇒ Integer?
Returns the Integer index of the last occurrence of the given substring
, or nil
if none found:
'foo'.rindex('f') # => 0
'foo'.rindex('o') # => 2
'foo'.rindex('oo') # => 1
'foo'.rindex('ooo') # => nil
Returns the Integer index of the last match for the given Regexp regexp
, or nil
if none found:
'foo'.rindex(/f/) # => 0
'foo'.rindex(/o/) # => 2
'foo'.rindex(/oo/) # => 1
'foo'.rindex(/ooo/) # => nil
The last match means starting at the possible last position, not the last of longest matches.
'foo'.rindex(/o+/) # => 2
$~ #=> #<MatchData "o">
To get the last longest match, needs to combine with negative lookbehind.
'foo'.rindex(/(?<!o)o+/) # => 1
$~ #=> #<MatchData "oo">
Or String#index with negative lookforward.
'foo'.index(/o+(?!.*o)/) # => 1
$~ #=> #<MatchData "oo">
Integer argument offset
, if given and non-negative, specifies the maximum starting position in the
string to _end_ the search:
'foo'.rindex('o', 0) # => nil
'foo'.rindex('o', 1) # => 1
'foo'.rindex('o', 2) # => 2
'foo'.rindex('o', 3) # => 2
If offset
is a negative Integer, the maximum starting position in the string to end the search is the sum of the string’s length and offset
:
'foo'.rindex('o', -1) # => 2
'foo'.rindex('o', -2) # => 1
'foo'.rindex('o', -3) # => nil
'foo'.rindex('o', -4) # => nil
Related: String#index.
4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 |
# File 'string.c', line 4314
static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
VALUE initpos;
rb_encoding *enc = STR_ENC_GET(str);
long pos, len = str_strlen(str, enc); /* str's enc */
if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
pos = NUM2LONG(initpos);
if (pos < 0 && (pos += len) < 0) {
if (RB_TYPE_P(sub, T_REGEXP)) {
rb_backref_set(Qnil);
}
return Qnil;
}
if (pos > len) pos = len;
}
else {
pos = len;
}
if (RB_TYPE_P(sub, T_REGEXP)) {
/* enc = rb_enc_check(str, sub); */
pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
enc, single_byte_optimizable(str));
if (rb_reg_search(sub, str, pos, 1) >= 0) {
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = rb_str_sublen(str, BEG(0));
return LONG2NUM(pos);
}
}
else {
StringValue(sub);
pos = rb_str_rindex(str, sub, pos);
if (pos >= 0) {
pos = rb_str_sublen(str, pos);
return LONG2NUM(pos);
}
}
return Qnil;
}
|
#rjust(size, pad_string = ' ') ⇒ Object
:include: doc/string/rjust.rdoc
Related: String#ljust, String#center.
10534 10535 10536 10537 10538 |
# File 'string.c', line 10534
static VALUE
rb_str_rjust(int argc, VALUE *argv, VALUE str)
{
return rb_str_justify(argc, argv, str, 'r');
}
|
#rpartition(sep) ⇒ Array
:include: doc/string/rpartition.rdoc
10602 10603 10604 10605 10606 10607 10608 10609 10610 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 |
# File 'string.c', line 10602
static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
long pos = RSTRING_LEN(str);
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
if (rb_reg_search(sep, str, pos, 1) < 0) {
goto failed;
}
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
pos = BEG(0);
sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
pos = rb_str_sublen(str, pos);
pos = rb_str_rindex(str, sep, pos);
if (pos < 0) {
goto failed;
}
}
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
failed:
return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(rb_cString, str));
}
|
#rstrip ⇒ Object
Returns a copy of the receiver with trailing whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.rstrip # => "\u0000\t\n\v\f\r abc"
Related: String#lstrip, String#strip.
9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 |
# File 'string.c', line 9943
static VALUE
rb_str_rstrip(VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
roffset = rstrip_offset(str, start, start+olen, enc);
if (roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, olen-roffset);
}
|
#rstrip! ⇒ self?
Like String#rstrip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#lstrip!, String#strip!.
9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 |
# File 'string.c', line 9906
static VALUE
rb_str_rstrip_bang(VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
roffset = rstrip_offset(str, start, start+olen, enc);
if (roffset > 0) {
long len = olen - roffset;
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#scan(string_or_regexp) ⇒ Array #scan(string_or_regexp) {|matches| ... } ⇒ self
Matches a pattern against self
; the pattern is:
-
string_or_regexp
itself, if it is a Regexp. -
Regexp.quote(string_or_regexp)
, ifstring_or_regexp
is a string.
Iterates through self
, generating a collection of matching results:
-
If the pattern contains no groups, each result is the matched string,
$&
. -
If the pattern contains groups, each result is an array containing one entry per group.
With no block given, returns an array of the results:
s = 'cruel world'
s.scan(/\w+/) # => ["cruel", "world"]
s.scan(/.../) # => ["cru", "el ", "wor"]
s.scan(/(...)/) # => [["cru"], ["el "], ["wor"]]
s.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]]
With a block given, calls the block with each result; returns self
:
s.scan(/\w+/) {|w| print "<<#{w}>> " }
print "\n"
s.scan(/(.)(.)/) {|x,y| print y, x }
print "\n"
Output:
<<cruel>> <<world>>
rceu lowlr
10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 |
# File 'string.c', line 10122
static VALUE
rb_str_scan(VALUE str, VALUE pat)
{
VALUE result;
long start = 0;
long last = -1, prev = 0;
char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
pat = get_pat_quoted(pat, 1);
mustnot_broken(str);
if (!rb_block_given_p()) {
VALUE ary = rb_ary_new();
while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
last = prev;
prev = start;
rb_ary_push(ary, result);
}
if (last >= 0) rb_pat_search(pat, str, last, 1);
else rb_backref_set(Qnil);
return ary;
}
while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
last = prev;
prev = start;
rb_yield(result);
str_mod_check(str, p, len);
}
if (last >= 0) rb_pat_search(pat, str, last, 1);
return str;
}
|
#scrub(replacement_string = default_replacement) ⇒ Object #scrub {|bytes| ... } ⇒ Object
:include: doc/string/scrub.rdoc
11345 11346 11347 11348 11349 11350 11351 |
# File 'string.c', line 11345
static VALUE
str_scrub(int argc, VALUE *argv, VALUE str)
{
VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
VALUE new = rb_str_scrub(str, repl);
return NIL_P(new) ? str_duplicate(rb_cString, str): new;
}
|
#scrub! ⇒ self #scrub!(replacement_string = default_replacement) ⇒ self #scrub! {|bytes| ... } ⇒ self
Like String#scrub, except that any replacements are made in self
.
11362 11363 11364 11365 11366 11367 11368 11369 |
# File 'string.c', line 11362
static VALUE
str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
VALUE new = rb_str_scrub(str, repl);
if (!NIL_P(new)) rb_str_replace(str, new);
return str;
}
|
#setbyte(index, integer) ⇒ Integer
Sets the byte at zero-based index
to integer
; returns integer
:
s = 'abcde' # => "abcde"
s.setbyte(0, 98) # => 98
s # => "bbcde"
Related: String#getbyte.
6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 |
# File 'string.c', line 6160
static VALUE
rb_str_setbyte(VALUE str, VALUE index, VALUE value)
{
long pos = NUM2LONG(index);
long len = RSTRING_LEN(str);
char *ptr, *head, *left = 0;
rb_encoding *enc;
int cr = ENC_CODERANGE_UNKNOWN, width, nlen;
if (pos < -len || len <= pos)
rb_raise(rb_eIndexError, "index %ld out of string", pos);
if (pos < 0)
pos += len;
VALUE v = rb_to_int(value);
VALUE w = rb_int_and(v, INT2FIX(0xff));
char byte = (char)(NUM2INT(w) & 0xFF);
if (!str_independent(str))
str_make_independent(str);
enc = STR_ENC_GET(str);
head = RSTRING_PTR(str);
ptr = &head[pos];
if (!STR_EMBED_P(str)) {
cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
left = ptr;
*ptr = byte;
if (ISASCII(byte)) goto end;
nlen = rb_enc_precise_mbclen(left, head+len, enc);
if (!MBCLEN_CHARFOUND_P(nlen))
ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
else
ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
goto end;
case ENC_CODERANGE_VALID:
left = rb_enc_left_char_head(head, ptr, head+len, enc);
width = rb_enc_precise_mbclen(left, head+len, enc);
*ptr = byte;
nlen = rb_enc_precise_mbclen(left, head+len, enc);
if (!MBCLEN_CHARFOUND_P(nlen))
ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte))
ENC_CODERANGE_CLEAR(str);
goto end;
}
}
ENC_CODERANGE_CLEAR(str);
*ptr = byte;
end:
return value;
}
|
#length ⇒ Integer
:include: doc/string/length.rdoc
2207 2208 2209 2210 2211 |
# File 'string.c', line 2207
VALUE
rb_str_length(VALUE str)
{
return LONG2NUM(str_strlen(str, NULL));
}
|
#[](index) ⇒ nil #[](start, length) ⇒ nil #[](range) ⇒ nil #[](regexp, capture = 0) ⇒ nil #[](substring) ⇒ nil
Returns the substring of self
specified by the arguments. See examples at String Slices.
5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 |
# File 'string.c', line 5273
static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
if (argc == 2) {
if (RB_TYPE_P(argv[0], T_REGEXP)) {
return rb_str_subpat(str, argv[0], argv[1]);
}
else {
long beg = NUM2LONG(argv[0]);
long len = NUM2LONG(argv[1]);
return rb_str_substr(str, beg, len);
}
}
rb_check_arity(argc, 1, 2);
return rb_str_aref(str, argv[0]);
}
|
#slice!(index) ⇒ nil #slice!(start, length) ⇒ nil #slice!(range) ⇒ nil #slice!(regexp, capture = 0) ⇒ nil #slice!(substring) ⇒ nil
Removes and returns the substring of self
specified by the arguments. See String Slices.
A few examples:
string = "This is a string"
string.slice!(2) #=> "i"
string.slice!(3..6) #=> " is "
string.slice!(/s.*t/) #=> "sa st"
string.slice!("r") #=> "r"
string #=> "Thing"
5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 |
# File 'string.c', line 5582
static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
VALUE result = Qnil;
VALUE indx;
long beg, len = 1;
char *p;
rb_check_arity(argc, 1, 2);
str_modify_keep_cr(str);
indx = argv[0];
if (RB_TYPE_P(indx, T_REGEXP)) {
if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
VALUE match = rb_backref_get();
struct re_registers *regs = RMATCH_REGS(match);
int nth = 0;
if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
if ((nth += regs->num_regs) <= 0) return Qnil;
}
else if (nth >= regs->num_regs) return Qnil;
beg = BEG(nth);
len = END(nth) - beg;
goto subseq;
}
else if (argc == 2) {
beg = NUM2LONG(indx);
len = NUM2LONG(argv[1]);
goto num_index;
}
else if (FIXNUM_P(indx)) {
beg = FIX2LONG(indx);
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
if (!len) return Qnil;
beg = p - RSTRING_PTR(str);
goto subseq;
}
else if (RB_TYPE_P(indx, T_STRING)) {
beg = rb_str_index(str, indx, 0);
if (beg == -1) return Qnil;
len = RSTRING_LEN(indx);
result = str_duplicate(rb_cString, indx);
goto squash;
}
else {
switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) {
case Qnil:
return Qnil;
case Qfalse:
beg = NUM2LONG(indx);
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
if (!len) return Qnil;
beg = p - RSTRING_PTR(str);
goto subseq;
default:
goto num_index;
}
}
num_index:
if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
beg = p - RSTRING_PTR(str);
subseq:
result = rb_str_new(RSTRING_PTR(str)+beg, len);
rb_enc_cr_str_copy_for_substr(result, str);
squash:
if (len > 0) {
if (beg == 0) {
rb_str_drop_bytes(str, len);
}
else {
char *sptr = RSTRING_PTR(str);
long slen = RSTRING_LEN(str);
if (beg + len > slen) /* pathological check */
len = slen - beg;
memmove(sptr + beg,
sptr + beg + len,
slen - (beg + len));
slen -= len;
STR_SET_LEN(str, slen);
TERM_FILL(&sptr[slen], TERM_LEN(str));
}
}
return result;
}
|
#split(field_sep = $;, limit = nil) ⇒ Array #split(field_sep = $;, limit = nil) {|substring| ... } ⇒ self
:include: doc/string/split.rdoc
8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 |
# File 'string.c', line 8751
static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
VALUE spat;
VALUE limit;
split_type_t split_type;
long beg, end, i = 0, empty_count = -1;
int lim = 0;
VALUE result, tmp;
result = rb_block_given_p() ? Qfalse : Qnil;
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
lim = NUM2INT(limit);
if (lim <= 0) limit = Qnil;
else if (lim == 1) {
if (RSTRING_LEN(str) == 0)
return result ? rb_ary_new2(0) : str;
tmp = str_duplicate(rb_cString, str);
if (!result) {
rb_yield(tmp);
return str;
}
return rb_ary_new3(1, tmp);
}
i = 1;
}
if (NIL_P(limit) && !lim) empty_count = 0;
enc = STR_ENC_GET(str);
split_type = SPLIT_TYPE_REGEXP;
if (!NIL_P(spat)) {
spat = get_pat_quoted(spat, 0);
}
else if (NIL_P(spat = rb_fs)) {
split_type = SPLIT_TYPE_AWK;
}
else if (!(spat = rb_fs_check(spat))) {
rb_raise(rb_eTypeError, "value of $; must be String or Regexp");
}
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "$; is set to non-nil value");
}
if (split_type != SPLIT_TYPE_AWK) {
switch (BUILTIN_TYPE(spat)) {
case T_REGEXP:
rb_reg_options(spat); /* check if uninitialized */
tmp = RREGEXP_SRC(spat);
split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
if (split_type == SPLIT_TYPE_AWK) {
spat = tmp;
split_type = SPLIT_TYPE_STRING;
}
break;
case T_STRING:
mustnot_broken(spat);
split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
break;
default:
UNREACHABLE_RETURN(Qnil);
}
}
#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
beg = 0;
char *ptr = RSTRING_PTR(str);
char *eptr = RSTRING_END(str);
if (split_type == SPLIT_TYPE_AWK) {
char *bptr = ptr;
int skip = 1;
unsigned int c;
if (result) result = rb_ary_new();
end = beg;
if (is_ascii_string(str)) {
while (ptr < eptr) {
c = (unsigned char)*ptr++;
if (skip) {
if (ascii_isspace(c)) {
beg = ptr - bptr;
}
else {
end = ptr - bptr;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else if (ascii_isspace(c)) {
SPLIT_STR(beg, end-beg);
skip = 1;
beg = ptr - bptr;
if (!NIL_P(limit)) ++i;
}
else {
end = ptr - bptr;
}
}
}
else {
while (ptr < eptr) {
int n;
c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
ptr += n;
if (skip) {
if (rb_isspace(c)) {
beg = ptr - bptr;
}
else {
end = ptr - bptr;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else if (rb_isspace(c)) {
SPLIT_STR(beg, end-beg);
skip = 1;
beg = ptr - bptr;
if (!NIL_P(limit)) ++i;
}
else {
end = ptr - bptr;
}
}
}
}
else if (split_type == SPLIT_TYPE_STRING) {
char *str_start = ptr;
char *substr_start = ptr;
char *sptr = RSTRING_PTR(spat);
long slen = RSTRING_LEN(spat);
if (result) result = rb_ary_new();
mustnot_broken(str);
enc = rb_enc_check(str, spat);
while (ptr < eptr &&
(end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
/* Check we are at the start of a char */
char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
if (t != ptr + end) {
ptr = t;
continue;
}
SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
ptr += end + slen;
substr_start = ptr;
if (!NIL_P(limit) && lim <= ++i) break;
}
beg = ptr - str_start;
}
else if (split_type == SPLIT_TYPE_CHARS) {
char *str_start = ptr;
int n;
if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
mustnot_broken(str);
enc = rb_enc_get(str);
while (ptr < eptr &&
(n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
SPLIT_STR(ptr - str_start, n);
ptr += n;
if (!NIL_P(limit) && lim <= ++i) break;
}
beg = ptr - str_start;
}
else {
if (result) result = rb_ary_new();
long len = RSTRING_LEN(str);
long start = beg;
long idx;
int last_null = 0;
struct re_registers *regs;
VALUE match = 0;
for (; rb_reg_search(spat, str, start, 0) >= 0;
(match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
match = rb_backref_get();
if (!result) rb_match_busy(match);
regs = RMATCH_REGS(match);
end = BEG(0);
if (start == end && BEG(0) == END(0)) {
if (!ptr) {
SPLIT_STR(0, 0);
break;
}
else if (last_null == 1) {
SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
beg = start;
}
else {
if (start == len)
start++;
else
start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
last_null = 1;
continue;
}
}
else {
SPLIT_STR(beg, end-beg);
beg = start = END(0);
}
last_null = 0;
for (idx=1; idx < regs->num_regs; idx++) {
if (BEG(idx) == -1) continue;
SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
}
if (!NIL_P(limit) && lim <= ++i) break;
}
if (match) rb_match_unbusy(match);
}
if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
SPLIT_STR(beg, RSTRING_LEN(str)-beg);
}
return result ? result : str;
}
|
#squeeze(*selectors) ⇒ Object
Returns a copy of self
with characters specified by selectors
“squeezed” (see Multiple Character Selectors):
“Squeezed” means that each multiple-character run of a selected character is squeezed down to a single character; with no arguments given, squeezes all characters:
"yellow moon".squeeze #=> "yelow mon"
" now is the".squeeze(" ") #=> " now is the"
"putters shoot balls".squeeze("m-z") #=> "puters shot balls"
8511 8512 8513 8514 8515 8516 8517 |
# File 'string.c', line 8511
static VALUE
rb_str_squeeze(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_squeeze_bang(argc, argv, str);
return str;
}
|
#squeeze!(*selectors) ⇒ self?
Like String#squeeze, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 |
# File 'string.c', line 8418
static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
char squeez[TR_TABLE_SIZE];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0;
unsigned char *s, *send, *t;
int i, modify = 0;
int ascompat, singlebyte = single_byte_optimizable(str);
unsigned int save;
if (argc == 0) {
enc = STR_ENC_GET(str);
}
else {
for (i=0; i<argc; i++) {
VALUE s = argv[i];
StringValue(s);
enc = rb_enc_check(str, s);
if (singlebyte && !single_byte_optimizable(s))
singlebyte = 0;
tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
}
}
str_modify_keep_cr(str);
s = t = (unsigned char *)RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
send = (unsigned char *)RSTRING_END(str);
save = -1;
ascompat = rb_enc_asciicompat(enc);
if (singlebyte) {
while (s < send) {
unsigned int c = *s++;
if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
}
}
else {
while (s < send) {
unsigned int c;
int clen;
if (ascompat && (c = *s) < 0x80) {
if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
s++;
}
else {
c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
if (t != s) rb_enc_mbcput(c, t, enc);
save = c;
t += clen;
}
s += clen;
}
}
}
TERM_FILL((char *)t, TERM_LEN(str));
if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
modify = 1;
}
if (modify) return str;
return Qnil;
}
|
#start_with?(*string_or_regexp) ⇒ Boolean
:include: doc/string/start_with_p.rdoc
10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 |
# File 'string.c', line 10642
static VALUE
rb_str_start_with(int argc, VALUE *argv, VALUE str)
{
int i;
for (i=0; i<argc; i++) {
VALUE tmp = argv[i];
if (RB_TYPE_P(tmp, T_REGEXP)) {
if (rb_reg_start_with_p(tmp, str))
return Qtrue;
}
else {
const char *p, *s, *e;
long slen, tlen;
rb_encoding *enc;
StringValue(tmp);
enc = rb_enc_check(str, tmp);
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
if ((slen = RSTRING_LEN(str)) < tlen) continue;
p = RSTRING_PTR(str);
e = p + slen;
s = p + tlen;
if (!at_char_right_boundary(p, s, e, enc))
continue;
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
return Qtrue;
}
}
return Qfalse;
}
|
#strip ⇒ Object
Returns a copy of the receiver with leading and trailing whitespace removed; see Whitespace in Strings:
whitespace = "\x00\t\n\v\f\r "
s = whitespace + 'abc' + whitespace
s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
s.strip # => "abc"
Related: String#lstrip, String#rstrip.
10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 |
# File 'string.c', line 10011
static VALUE
rb_str_strip(VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, olen-loffset-roffset);
}
|
#strip! ⇒ self?
Like String#strip, except that any modifications are made in self
; returns self
if any modification are made, nil
otherwise.
Related: String#lstrip!, String#strip!.
9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 |
# File 'string.c', line 9969
static VALUE
rb_str_strip_bang(VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc;
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
loffset = lstrip_offset(str, start, start+olen, enc);
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
if (loffset > 0 || roffset > 0) {
long len = olen-roffset;
if (loffset > 0) {
len -= loffset;
memmove(start, start + loffset, len);
}
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
}
return Qnil;
}
|
#sub(pattern, replacement) ⇒ Object #sub(pattern) {|match| ... } ⇒ Object
Returns a copy of self
with only the first occurrence (not all occurrences) of the given pattern
replaced.
See Substitution Methods.
Related: String#sub!, String#gsub, String#gsub!.
5878 5879 5880 5881 5882 5883 5884 |
# File 'string.c', line 5878
static VALUE
rb_str_sub(int argc, VALUE *argv, VALUE str)
{
str = str_duplicate(rb_cString, str);
rb_str_sub_bang(argc, argv, str);
return str;
}
|
#sub!(pattern, replacement) ⇒ self? #sub!(pattern) {|match| ... } ⇒ self?
Returns self
with only the first occurrence (not all occurrences) of the given pattern
replaced.
See Substitution Methods.
Related: String#sub, String#gsub, String#gsub!.
5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 |
# File 'string.c', line 5753
static VALUE
rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
{
VALUE pat, repl, hash = Qnil;
int iter = 0;
long plen;
int min_arity = rb_block_given_p() ? 1 : 2;
long beg;
rb_check_arity(argc, min_arity, 2);
if (argc == 1) {
iter = 1;
}
else {
repl = argv[1];
hash = rb_check_hash_type(argv[1]);
if (NIL_P(hash)) {
StringValue(repl);
}
}
pat = get_pat_quoted(argv[0], 1);
str_modifiable(str);
beg = rb_pat_search(pat, str, 0, 1);
if (beg >= 0) {
rb_encoding *enc;
int cr = ENC_CODERANGE(str);
long beg0, end0;
VALUE match, match0 = Qnil;
struct re_registers *regs;
char *p, *rp;
long len, rlen;
match = rb_backref_get();
regs = RMATCH_REGS(match);
if (RB_TYPE_P(pat, T_STRING)) {
beg0 = beg;
end0 = beg0 + RSTRING_LEN(pat);
match0 = pat;
}
else {
beg0 = BEG(0);
end0 = END(0);
if (iter) match0 = rb_reg_nth_match(0, match);
}
if (iter || !NIL_P(hash)) {
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (iter) {
repl = rb_obj_as_string(rb_yield(match0));
}
else {
repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
repl = rb_obj_as_string(repl);
}
str_mod_check(str, p, len);
rb_check_frozen(str);
}
else {
repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat);
}
enc = rb_enc_compatible(str, repl);
if (!enc) {
rb_encoding *str_enc = STR_ENC_GET(str);
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(str_enc),
rb_enc_name(STR_ENC_GET(repl)));
}
enc = STR_ENC_GET(repl);
}
rb_str_modify(str);
rb_enc_associate(str, enc);
if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
int cr2 = ENC_CODERANGE(repl);
if (cr2 == ENC_CODERANGE_BROKEN ||
(cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
cr = ENC_CODERANGE_UNKNOWN;
else
cr = cr2;
}
plen = end0 - beg0;
rlen = RSTRING_LEN(repl);
len = RSTRING_LEN(str);
if (rlen > plen) {
RESIZE_CAPA(str, len + rlen - plen);
}
p = RSTRING_PTR(str);
if (rlen != plen) {
memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
}
rp = RSTRING_PTR(repl);
memmove(p + beg0, rp, rlen);
len += rlen - plen;
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
ENC_CODERANGE_SET(str, cr);
RB_GC_GUARD(match);
return str;
}
return Qnil;
}
|
#succ ⇒ String
Returns the successor to self
. The successor is calculated by incrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, if no alphanumerics, the rightmost character:
'THX1138'.succ # => "THX1139"
'<<koala>>'.succ # => "<<koalb>>"
'***'.succ # => '**+'
The successor to a digit is another digit, “carrying” to the next-left character for a “rollover” from 9 to 0, and prepending another digit if necessary:
'00'.succ # => "01"
'09'.succ # => "10"
'99'.succ # => "100"
The successor to a letter is another letter of the same case, carrying to the next-left character for a rollover, and prepending another same-case letter if necessary:
'aa'.succ # => "ab"
'az'.succ # => "ba"
'zz'.succ # => "aaa"
'AA'.succ # => "AB"
'AZ'.succ # => "BA"
'ZZ'.succ # => "AAA"
The successor to a non-alphanumeric character is the next character in the underlying character set’s collating sequence, carrying to the next-left character for a rollover, and prepending another character if necessary:
s = 0.chr * 3
s # => "\x00\x00\x00"
s.succ # => "\x00\x00\x01"
s = 255.chr * 3
s # => "\xFF\xFF\xFF"
s.succ # => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s = 'zz99zz99'
s.succ # => "aaa00aa00"
s = '99zz99zz'
s.succ # => "100aa00aa"
The successor to an empty String is a new empty String:
''.succ # => ""
4862 4863 4864 4865 4866 4867 4868 4869 |
# File 'string.c', line 4862
VALUE
rb_str_succ(VALUE orig)
{
VALUE str;
str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
rb_enc_cr_str_copy_for_substr(str, orig);
return str_succ(str);
}
|
#succ! ⇒ self
Equivalent to String#succ, but modifies self
in place; returns self
.
4966 4967 4968 4969 4970 4971 4972 |
# File 'string.c', line 4966
static VALUE
rb_str_succ_bang(VALUE str)
{
rb_str_modify(str);
str_succ(str);
return str;
}
|
#sum(n = 16) ⇒ Integer
:include: doc/string/sum.rdoc
10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 |
# File 'string.c', line 10362
static VALUE
rb_str_sum(int argc, VALUE *argv, VALUE str)
{
int bits = 16;
char *ptr, *p, *pend;
long len;
VALUE sum = INT2FIX(0);
unsigned long sum0 = 0;
if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) {
bits = 0;
}
ptr = p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
pend = p + len;
while (p < pend) {
if (FIXNUM_MAX - UCHAR_MAX < sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
str_mod_check(str, ptr, len);
sum0 = 0;
}
sum0 += (unsigned char)*p;
p++;
}
if (bits == 0) {
if (sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
}
}
else {
if (sum == INT2FIX(0)) {
if (bits < (int)sizeof(long)*CHAR_BIT) {
sum0 &= (((unsigned long)1)<<bits)-1;
}
sum = LONG2FIX(sum0);
}
else {
VALUE mod;
if (sum0) {
sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
}
mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits));
mod = rb_funcall(mod, '-', 1, INT2FIX(1));
sum = rb_funcall(sum, '&', 1, mod);
}
}
return sum;
}
|
#swapcase(*options) ⇒ String
Returns a string containing the characters in self
, with cases reversed; each uppercase character is downcased; each lowercase character is upcased:
s = 'Hello World!' # => "Hello World!"
s.swapcase # => "hELLO wORLD!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#swapcase!.
7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 |
# File 'string.c', line 7832
static VALUE
rb_str_swapcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str_duplicate(rb_cString, str);
if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#swapcase!(*options) ⇒ self?
Upcases each lowercase character in self
; downcases uppercase character; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.swapcase! # => "hELLO wORLD!"
s # => "hELLO wORLD!"
''.swapcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#swapcase.
7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 |
# File 'string.c', line 7795
static VALUE
rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#to_c ⇒ Object
Returns self
interpreted as a Complex object; leading whitespace and trailing garbage are ignored:
'9'.to_c # => (9+0i)
'2.5'.to_c # => (2.5+0i)
'2.5/1'.to_c # => ((5/2)+0i)
'-3/2'.to_c # => ((-3/2)+0i)
'-i'.to_c # => (0-1i)
'45i'.to_c # => (0+45i)
'3-4i'.to_c # => (3-4i)
'-4e2-4e-2i'.to_c # => (-400.0-0.04i)
'-0.0-0.0i'.to_c # => (-0.0-0.0i)
'1/2+3/4i'.to_c # => ((1/2)+(3/4)*i)
'1.0@0'.to_c # => (1+0.0i)
"1.0@#{Math::PI/2}".to_c # => (0.0+1i)
"1.0@#{Math::PI}".to_c # => (-1+0.0i)
Returns Complex zero if the string cannot be converted:
'ruby'.to_c # => (0+0i)
See Kernel#Complex.
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 |
# File 'complex.c', line 2260
static VALUE
string_to_c(VALUE self)
{
VALUE num;
rb_must_asciicompat(self);
(void)parse_comp(rb_str_fill_terminator(self, 1), FALSE, &num);
return num;
}
|
#to_f ⇒ Float
Returns the result of interpreting leading characters in self
as a Float:
'3.14159'.to_f # => 3.14159
'1.234e-2'.to_f # => 0.01234
Characters past a leading valid number (in the given base
) are ignored:
'3.14 (pi to two places)'.to_f # => 3.14
Returns zero if there is no leading valid number:
'abcdef'.to_f # => 0.0
6627 6628 6629 6630 6631 |
# File 'string.c', line 6627
static VALUE
rb_str_to_f(VALUE str)
{
return DBL2NUM(rb_str_to_dbl(str, FALSE));
}
|
#to_i(base = 10) ⇒ Integer
Returns the result of interpreting leading characters in self
as an integer in the given base
(which must be in (0, 2..36)):
'123456'.to_i # => 123456
'123def'.to_i(16) # => 1195503
With base
zero, string object
may contain leading characters to specify the actual base:
'123def'.to_i(0) # => 123
'0123def'.to_i(0) # => 83
'0b123def'.to_i(0) # => 1
'0o123def'.to_i(0) # => 83
'0d123def'.to_i(0) # => 123
'0x123def'.to_i(0) # => 1195503
Characters past a leading valid number (in the given base
) are ignored:
'12.345'.to_i # => 12
'12345'.to_i(2) # => 1
Returns zero if there is no leading valid number:
'abcdef'.to_i # => 0
'2'.to_i(2) # => 0
6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 |
# File 'string.c', line 6596
static VALUE
rb_str_to_i(int argc, VALUE *argv, VALUE str)
{
int base = 10;
if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) {
rb_raise(rb_eArgError, "invalid radix %d", base);
}
return rb_str_to_inum(str, base, FALSE);
}
|
#to_r ⇒ Object
Returns the result of interpreting leading characters in str
as a rational. Leading whitespace and extraneous characters past the end of a valid number are ignored. Digit sequences can be separated by an underscore. If there is not a valid number at the start of str
, zero is returned. This method never raises an exception.
' 2 '.to_r #=> (2/1)
'300/2'.to_r #=> (150/1)
'-9.2'.to_r #=> (-46/5)
'-9.2e2'.to_r #=> (-920/1)
'1_234_567'.to_r #=> (1234567/1)
'21 June 09'.to_r #=> (21/1)
'21/06/09'.to_r #=> (7/2)
'BWV 1079'.to_r #=> (0/1)
NOTE: “0.3”.to_r isn’t the same as 0.3.to_r. The former is equivalent to “3/10”.to_r, but the latter isn’t so.
"0.3".to_r == 3/10r #=> true
0.3.to_r == 3/10r #=> false
See also Kernel#Rational.
2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 |
# File 'rational.c', line 2524
static VALUE
string_to_r(VALUE self)
{
VALUE num;
rb_must_asciicompat(self);
num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE);
if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num))
rb_raise(rb_eFloatDomainError, "Infinity");
return num;
}
|
#to_s ⇒ self, String
Returns self
if self
is a String, or self
converted to a String if self
is a subclass of String.
6642 6643 6644 6645 6646 6647 6648 6649 |
# File 'string.c', line 6642
static VALUE
rb_str_to_s(VALUE str)
{
if (rb_obj_class(str) != rb_cString) {
return str_duplicate(rb_cString, str);
}
return str;
}
|
#to_s ⇒ self, String
Returns self
if self
is a String, or self
converted to a String if self
is a subclass of String.
6642 6643 6644 6645 6646 6647 6648 6649 |
# File 'string.c', line 6642
static VALUE
rb_str_to_s(VALUE str)
{
if (rb_obj_class(str) != rb_cString) {
return str_duplicate(rb_cString, str);
}
return str;
}
|
#intern ⇒ Object #to_sym ⇒ Object
Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.
"Koala".intern #=> :Koala
s = 'cat'.to_sym #=> :cat
s == :cat #=> true
s = '@cat'.to_sym #=> :@cat
s == :@cat #=> true
This can also be used to create symbols that cannot be represented using the :xxx
notation.
'cat and dog'.to_sym #=> :"cat and dog"
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 |
# File 'symbol.c', line 860
VALUE
rb_str_intern(VALUE str)
{
VALUE sym;
GLOBAL_SYMBOLS_ENTER(symbols);
{
sym = lookup_str_sym_with_lock(symbols, str);
if (sym) {
// ok
}
else if (USE_SYMBOL_GC) {
rb_encoding *enc = rb_enc_get(str);
rb_encoding *ascii = rb_usascii_encoding();
if (enc != ascii && sym_check_asciionly(str, false)) {
str = rb_str_dup(str);
rb_enc_associate(str, ascii);
OBJ_FREEZE(str);
enc = ascii;
}
else {
str = rb_str_dup(str);
OBJ_FREEZE(str);
}
str = rb_fstring(str);
int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
if (type < 0) type = ID_JUNK;
sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
}
else {
ID id = intern_str(str, 0);
sym = ID2SYM(id);
}
}
GLOBAL_SYMBOLS_LEAVE();
return sym;
}
|
#tr(selector, replacements) ⇒ Object
Returns a copy of self
with each character specified by string selector
translated to the corresponding character in string replacements
. The correspondence is positional:
-
Each occurrence of the first character specified by
selector
is translated to the first character inreplacements
. -
Each occurrence of the second character specified by
selector
is translated to the second character inreplacements
. -
And so on.
Example:
'hello'.tr('el', 'ip') #=> "hippo"
If replacements
is shorter than selector
, it is implicitly padded with its own last character:
'hello'.tr('aeiou', '-') # => "h-ll-"
'hello'.tr('aeiou', 'AA-') # => "hAll-"
Arguments selector
and replacements
must be valid character selectors (see Character Selectors), and may use any of its valid forms, including negation, ranges, and escaping:
# Negation.
'hello'.tr('^aeiou', '-') # => "-e--o"
# Ranges.
'ibm'.tr('b-z', 'a-z') # => "hal"
# Escapes.
'hel^lo'.tr('\^aeiou', '-') # => "h-l-l-" # Escaped leading caret.
'i-b-m'.tr('b\-z', 'a-z') # => "ibabm" # Escaped embedded hyphen.
'foo\\bar'.tr('ab\\', 'XYZ') # => "fooZYXr" # Escaped backslash.
8220 8221 8222 8223 8224 8225 8226 |
# File 'string.c', line 8220
static VALUE
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
str = str_duplicate(rb_cString, str);
tr_trans(str, src, repl, 0);
return str;
}
|
#tr!(selector, replacements) ⇒ self?
Like String#tr, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
8174 8175 8176 8177 8178 |
# File 'string.c', line 8174
static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
return tr_trans(str, src, repl, 0);
}
|
#tr_s(selector, replacements) ⇒ String
Like String#tr, but also squeezes the modified portions of the translated string; returns a new string (translated and squeezed).
'hello'.tr_s('l', 'r') #=> "hero"
'hello'.tr_s('el', '-') #=> "h-o"
'hello'.tr_s('el', 'hx') #=> "hhxo"
Related: String#squeeze.
8552 8553 8554 8555 8556 8557 8558 |
# File 'string.c', line 8552
static VALUE
rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
{
str = str_duplicate(rb_cString, str);
tr_trans(str, src, repl, 1);
return str;
}
|
#tr_s!(selector, replacements) ⇒ self?
Like String#tr_s, but modifies self
in place. Returns self
if any changes were made, nil
otherwise.
Related: String#squeeze!.
8530 8531 8532 8533 8534 |
# File 'string.c', line 8530
static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
return tr_trans(str, src, repl, 1);
}
|
#undump ⇒ String
Returns an unescaped version of self
:
s_orig = "\f\x00\xff\\\"" # => "\f\u0000\xFF\\\""
s_dumped = s_orig.dump # => "\"\\f\\x00\\xFF\\\\\\\"\""
s_undumped = s_dumped.undump # => "\f\u0000\xFF\\\""
s_undumped == s_orig # => true
Related: String#dump (inverse of String#undump).
7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 |
# File 'string.c', line 7190
static VALUE
str_undump(VALUE str)
{
const char *s = RSTRING_PTR(str);
const char *s_end = RSTRING_END(str);
rb_encoding *enc = rb_enc_get(str);
VALUE undumped = rb_enc_str_new(s, 0L, enc);
bool utf8 = false;
bool binary = false;
int w;
rb_must_asciicompat(str);
if (rb_str_is_ascii_only_p(str) == Qfalse) {
rb_raise(rb_eRuntimeError, "non-ASCII character detected");
}
if (!str_null_check(str, &w)) {
rb_raise(rb_eRuntimeError, "string contains null byte");
}
if (RSTRING_LEN(str) < 2) goto invalid_format;
if (*s != '"') goto invalid_format;
/* strip '"' at the start */
s++;
for (;;) {
if (s >= s_end) {
rb_raise(rb_eRuntimeError, "unterminated dumped string");
}
if (*s == '"') {
/* epilogue */
s++;
if (s == s_end) {
/* ascii compatible dumped string */
break;
}
else {
static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
static const char dup_suffix[] = ".dup";
const char *encname;
int encidx;
ptrdiff_t size;
/* check separately for strings dumped by older versions */
size = sizeof(dup_suffix) - 1;
if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
size = sizeof(force_encoding_suffix) - 1;
if (s_end - s <= size) goto invalid_format;
if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
s += size;
if (utf8) {
rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
}
encname = s;
s = memchr(s, '"', s_end-s);
size = s - encname;
if (!s) goto invalid_format;
if (s_end - s != 2) goto invalid_format;
if (s[0] != '"' || s[1] != ')') goto invalid_format;
encidx = rb_enc_find_index2(encname, (long)size);
if (encidx < 0) {
rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
}
rb_enc_associate_index(undumped, encidx);
}
break;
}
if (*s == '\\') {
s++;
if (s >= s_end) {
rb_raise(rb_eRuntimeError, "invalid escape");
}
undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
}
else {
rb_str_cat(undumped, s++, 1);
}
}
RB_GC_GUARD(str);
return undumped;
invalid_format:
rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}
|
#unicode_normalize(form = :nfc) ⇒ String
Returns a copy of self
with Unicode normalization applied.
Argument form
must be one of the following symbols (see Unicode normalization forms):
-
:nfc
: Canonical decomposition, followed by canonical composition. -
:nfd
: Canonical decomposition. -
:nfkc
: Compatibility decomposition, followed by canonical composition. -
:nfkd
: Compatibility decomposition.
The encoding of self
must be one of:
-
Encoding::UTF_8
-
Encoding::UTF_16BE
-
Encoding::UTF_16LE
-
Encoding::UTF_32BE
-
Encoding::UTF_32LE
-
Encoding::GB18030
-
Encoding::UCS_2BE
-
Encoding::UCS_4BE
Examples:
"a\u0300".unicode_normalize # => "a"
"\u00E0".unicode_normalize(:nfd) # => "a "
Related: String#unicode_normalize!, String#unicode_normalized?.
11423 11424 11425 11426 11427 |
# File 'string.c', line 11423
static VALUE
rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
{
return unicode_normalize_common(argc, argv, str, id_normalize);
}
|
#unicode_normalize!(form = :nfc) ⇒ self
Like String#unicode_normalize, except that the normalization is performed on self
.
Related String#unicode_normalized?.
11439 11440 11441 11442 11443 |
# File 'string.c', line 11439
static VALUE
rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
{
return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
}
|
#unicode_normalized?(form = :nfc) ⇒ Boolean
Returns true
if self
is in the given form
of Unicode normalization, false
otherwise. The form
must be one of :nfc
, :nfd
, :nfkc
, or :nfkd
.
Examples:
"a\u0300".unicode_normalized? # => false
"a\u0300".unicode_normalized?(:nfd) # => true
"\u00E0".unicode_normalized? # => true
"\u00E0".unicode_normalized?(:nfd) # => false
Raises an exception if self
is not in a Unicode encoding:
s = "\xE0".force_encoding('ISO-8859-1')
s.unicode_normalized? # Raises Encoding::CompatibilityError.
Related: String#unicode_normalize, String#unicode_normalize!.
11468 11469 11470 11471 11472 |
# File 'string.c', line 11468
static VALUE
rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)
{
return unicode_normalize_common(argc, argv, str, id_normalized_p);
}
|
#upcase(*options) ⇒ String
Returns a string containing the upcased characters in self
:
s = 'Hello World!' # => "Hello World!"
s.upcase # => "HELLO WORLD!"
The casing may be affected by the given options
; see Case Mapping.
Related: String#upcase!, String#downcase, String#downcase!.
7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 |
# File 'string.c', line 7568
static VALUE
rb_str_upcase(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
VALUE ret;
flags = check_case_options(argc, argv, flags);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
str_enc_copy_direct(ret, str);
upcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {
ret = rb_str_new(0, RSTRING_LEN(str));
rb_str_ascii_casemap(str, ret, &flags, enc);
}
else {
ret = rb_str_casemap(str, &flags, enc);
}
return ret;
}
|
#upcase!(*options) ⇒ self?
Upcases the characters in self
; returns self
if any changes were made, nil
otherwise:
s = 'Hello World!' # => "Hello World!"
s.upcase! # => "HELLO WORLD!"
s # => "HELLO WORLD!"
s.upcase! # => nil
The casing may be affected by the given options
; see Case Mapping.
Related: String#upcase, String#downcase, String#downcase!.
7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 |
# File 'string.c', line 7529
static VALUE
rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
if (upcase_single(str))
flags |= ONIGENC_CASE_MODIFIED;
}
else if (flags&ONIGENC_CASE_ASCII_ONLY)
rb_str_ascii_casemap(str, str, &flags, enc);
else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}
|
#upto(other_string, exclusive = false) {|string| ... } ⇒ self #upto(other_string, exclusive = false) ⇒ Object
With a block given, calls the block with each String value returned by successive calls to String#succ; the first value is self
, the next is self.succ
, and so on; the sequence terminates when value other_string
is reached; returns self
:
'a8'.upto('b6') {|s| print s, ' ' } # => "a8"
Output:
a8 a9 b0 b1 b2 b3 b4 b5 b6
If argument exclusive
is given as a truthy object, the last value is omitted:
'a8'.upto('b6', true) {|s| print s, ' ' } # => "a8"
Output:
a8 a9 b0 b1 b2 b3 b4 b5
If other_string
would not be reached, does not call the block:
'25'.upto('5') {|s| fail s }
'aa'.upto('a') {|s| fail s }
With no block given, returns a new Enumerator:
'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")>
5026 5027 5028 5029 5030 5031 5032 5033 5034 |
# File 'string.c', line 5026
static VALUE
rb_str_upto(int argc, VALUE *argv, VALUE beg)
{
VALUE end, exclusive;
rb_scan_args(argc, argv, "11", &end, &exclusive);
RETURN_ENUMERATOR(beg, argc, argv);
return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);
}
|
#valid_encoding? ⇒ Boolean
Returns true
if self
is encoded correctly, false
otherwise:
"\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
"\xc2".force_encoding("UTF-8").valid_encoding? # => false
"\x80".force_encoding("UTF-8").valid_encoding? # => false
10990 10991 10992 10993 10994 10995 10996 |
# File 'string.c', line 10990
static VALUE
rb_str_valid_encoding_p(VALUE str)
{
int cr = rb_enc_str_coderange(str);
return RBOOL(cr != ENC_CODERANGE_BROKEN);
}
|