diff --git a/string.c b/string.c
index 26463b95e27c17..56b55d6a0d5376 100644
--- a/string.c
+++ b/string.c
@@ -6072,45 +6072,8 @@ rb_str_split(VALUE str, const char *sep0)
}
-/*
- * call-seq:
- * str.each_line(separator=$/) {|substr| block } -> str
- * str.each_line(separator=$/) -> an_enumerator
- *
- * str.lines(separator=$/) {|substr| block } -> str
- * str.lines(separator=$/) -> an_enumerator
- *
- * Splits str using the supplied parameter as the record separator
- * ($/
by default), passing each substring in turn to the supplied
- * block. If a zero-length record separator is supplied, the string is split
- * into paragraphs delimited by multiple successive newlines.
- *
- * If no block is given, an enumerator is returned instead.
- *
- * print "Example one\n"
- * "hello\nworld".each_line {|s| p s}
- * print "Example two\n"
- * "hello\nworld".each_line('l') {|s| p s}
- * print "Example three\n"
- * "hello\n\n\nworld".each_line('') {|s| p s}
- *
- * produces:
- *
- * Example one
- * "hello\n"
- * "world"
- * Example two
- * "hel"
- * "l"
- * "o\nworl"
- * "d"
- * Example three
- * "hello\n\n\n"
- * "world"
- */
-
static VALUE
-rb_str_each_line(int argc, VALUE *argv, VALUE str)
+rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE return_enumerator_p)
{
rb_encoding *enc;
VALUE rs;
@@ -6120,6 +6083,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
VALUE line;
int n;
VALUE orig = str;
+ VALUE ary, yieldp;
if (argc == 0) {
rs = rb_rs;
@@ -6127,10 +6091,27 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
else {
rb_scan_args(argc, argv, "01", &rs);
}
- RETURN_ENUMERATOR(str, argc, argv);
+
+ if (rb_block_given_p()) {
+ yieldp = Qtrue;
+ }
+ else {
+ if (return_enumerator_p)
+ RETURN_ENUMERATOR(str, argc, argv);
+
+ yieldp = Qfalse;
+ ary = rb_ary_new();
+ }
+
if (NIL_P(rs)) {
- rb_yield(str);
- return orig;
+ if (yieldp) {
+ rb_yield(str);
+ return orig;
+ }
+ else {
+ rb_ary_push(ary, str);
+ return ary;
+ }
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
@@ -6153,7 +6134,12 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
- rb_yield(line);
+ if (yieldp) {
+ rb_yield(line);
+ }
+ else {
+ rb_ary_push(ary, line);
+ }
str_mod_check(str, ptr, len);
s = p;
}
@@ -6189,7 +6175,12 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
- rb_yield(line);
+ if (yieldp) {
+ rb_yield(line);
+ }
+ else {
+ rb_ary_push(ary, line);
+ }
str_mod_check(str, ptr, len);
s = p + (rslen ? rslen : n);
}
@@ -6201,71 +6192,156 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
- rb_yield(line);
+ if (yieldp) {
+ rb_yield(line);
+ }
+ else {
+ rb_ary_push(ary, line);
+ }
}
- return orig;
+ if (yieldp) {
+ return orig;
+ }
+ else {
+ return ary;
+ }
}
-
/*
* call-seq:
- * str.bytes {|fixnum| block } -> str
- * str.bytes -> an_enumerator
+ * str.lines(separator=$/) {|substr| block } -> str
+ * str.lines(separator=$/) -> an_array
*
- * str.each_byte {|fixnum| block } -> str
- * str.each_byte -> an_enumerator
+ * str.each_line(separator=$/) {|substr| block } -> str
+ * str.each_line(separator=$/) -> an_enumerator
*
- * Passes each byte in str to the given block, or returns
- * an enumerator if no block is given.
+ * Splits str using the supplied parameter as the record separator
+ * ($/
by default), passing each substring in turn to the supplied
+ * block. If a zero-length record separator is supplied, the string is split
+ * into paragraphs delimited by multiple successive newlines.
*
- * "hello".each_byte {|c| print c, ' ' }
+ * If no block is given, an array or enumerator is returned instead.
+ *
+ * print "Example one\n"
+ * "hello\nworld".each_line {|s| p s}
+ * print "Example two\n"
+ * "hello\nworld".each_line('l') {|s| p s}
+ * print "Example three\n"
+ * "hello\n\n\nworld".each_line('') {|s| p s}
*
* produces:
*
- * 104 101 108 108 111
+ * Example one
+ * "hello\n"
+ * "world"
+ * Example two
+ * "hel"
+ * "l"
+ * "o\nworl"
+ * "d"
+ * Example three
+ * "hello\n\n\n"
+ * "world"
*/
static VALUE
-rb_str_each_byte(VALUE str)
+rb_str_each_line(int argc, VALUE *argv, VALUE str)
+{
+ return rb_str_enumerate_lines(argc, argv, str, Qtrue);
+}
+
+static VALUE
+rb_str_lines(int argc, VALUE *argv, VALUE str)
+{
+ return rb_str_enumerate_lines(argc, argv, str, Qfalse);
+}
+
+
+static VALUE
+rb_str_enumerate_bytes(VALUE str, VALUE return_enumerator_p)
{
long i;
+ VALUE ary, yieldp;
+
+ if (rb_block_given_p()) {
+ yieldp = Qtrue;
+ }
+ else {
+ if (return_enumerator_p)
+ RETURN_ENUMERATOR(str, 0, 0);
+
+ yieldp = Qfalse;
+ ary = rb_ary_new2(RSTRING_LEN(str));
+ }
- RETURN_ENUMERATOR(str, 0, 0);
for (i=0; i str
- * str.chars -> an_enumerator
+ * str.bytes {|fixnum| block } -> str
+ * str.bytes -> an_array
*
- * str.each_char {|cstr| block } -> str
- * str.each_char -> an_enumerator
+ * str.each_byte {|fixnum| block } -> str
+ * str.each_byte -> an_enumerator
*
- * Passes each character in str to the given block, or returns
- * an enumerator if no block is given.
+ * Passes each byte in str to the given block, or returns
+ * an array or enumerator if no block is given.
*
- * "hello".each_char {|c| print c, ' ' }
+ * "hello".each_byte {|c| print c, ' ' }
*
* produces:
*
- * h e l l o
+ * 104 101 108 108 111
*/
static VALUE
-rb_str_each_char(VALUE str)
+rb_str_each_byte(VALUE str)
+{
+ return rb_str_enumerate_bytes(str, Qtrue);
+}
+
+static VALUE
+rb_str_bytes(VALUE str)
+{
+ return rb_str_enumerate_bytes(str, Qfalse);
+}
+
+
+static VALUE
+rb_str_enumerate_chars(VALUE str, VALUE return_enumerator_p)
{
VALUE orig = str;
long i, len, n;
const char *ptr;
rb_encoding *enc;
+ VALUE ary, yieldp;
+
+ if (rb_block_given_p()) {
+ yieldp = Qtrue;
+ }
+ else {
+ if (return_enumerator_p)
+ RETURN_ENUMERATOR(str, 0, 0);
+
+ yieldp = Qfalse;
+ ary = rb_ary_new();
+ }
- RETURN_ENUMERATOR(str, 0, 0);
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
@@ -6275,62 +6351,144 @@ rb_str_each_char(VALUE str)
case ENC_CODERANGE_7BIT:
for (i = 0; i < len; i += n) {
n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
- rb_yield(rb_str_subseq(str, i, n));
+ if (yieldp) {
+ rb_yield(rb_str_subseq(str, i, n));
+ }
+ else {
+ rb_ary_push(ary, rb_str_subseq(str, i, n));
+ }
}
break;
default:
for (i = 0; i < len; i += n) {
n = rb_enc_mbclen(ptr + i, ptr + len, enc);
- rb_yield(rb_str_subseq(str, i, n));
+ if (yieldp) {
+ rb_yield(rb_str_subseq(str, i, n));
+ }
+ else {
+ rb_ary_push(ary, rb_str_subseq(str, i, n));
+ }
}
}
- return orig;
+ if (yieldp) {
+ return orig;
+ }
+ else {
+ return ary;
+ }
}
/*
* call-seq:
- * str.codepoints {|integer| block } -> str
- * str.codepoints -> an_enumerator
- *
- * str.each_codepoint {|integer| block } -> str
- * str.each_codepoint -> an_enumerator
+ * str.chars {|cstr| block } -> str
+ * str.chars -> an_array
*
- * Passes the Integer
ordinal of each character in str,
- * also known as a codepoint when applied to Unicode strings to the
- * given block.
+ * str.each_char {|cstr| block } -> str
+ * str.each_char -> an_enumerator
*
- * If no block is given, an enumerator is returned instead.
+ * Passes each character in str to the given block, or returns
+ * an array or enumerator if no block is given.
*
- * "hello\u0639".each_codepoint {|c| print c, ' ' }
+ * "hello".each_char {|c| print c, ' ' }
*
* produces:
*
- * 104 101 108 108 111 1593
+ * h e l l o
*/
static VALUE
-rb_str_each_codepoint(VALUE str)
+rb_str_each_char(VALUE str)
+{
+ return rb_str_enumerate_chars(str, Qtrue);
+}
+
+static VALUE
+rb_str_chars(VALUE str)
+{
+ return rb_str_enumerate_chars(str, Qfalse);
+}
+
+
+static VALUE
+rb_str_enumerate_codepoints(VALUE str, VALUE return_enumerator_p)
{
VALUE orig = str;
int n;
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
+ VALUE ary, yieldp;
+
+ if (single_byte_optimizable(str))
+ return rb_str_enumerate_bytes(str, return_enumerator_p);
+
+ if (rb_block_given_p()) {
+ yieldp = Qtrue;
+ }
+ else {
+ if (return_enumerator_p)
+ RETURN_ENUMERATOR(str, 0, 0);
+
+ yieldp = Qfalse;
+ ary = rb_ary_new();
+ }
- if (single_byte_optimizable(str)) return rb_str_each_byte(str);
- RETURN_ENUMERATOR(str, 0, 0);
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
while (ptr < end) {
c = rb_enc_codepoint_len(ptr, end, &n, enc);
- rb_yield(UINT2NUM(c));
+ if (yieldp) {
+ rb_yield(UINT2NUM(c));
+ }
+ else {
+ rb_ary_push(ary, UINT2NUM(c));
+ }
ptr += n;
}
- return orig;
+ if (yieldp) {
+ return orig;
+ }
+ else {
+ return ary;
+ }
}
+/*
+ * call-seq:
+ * str.codepoints {|integer| block } -> str
+ * str.codepoints -> an_array
+ *
+ * str.each_codepoint {|integer| block } -> str
+ * str.each_codepoint -> an_enumerator
+ *
+ * Passes the Integer
ordinal of each character in str,
+ * also known as a codepoint when applied to Unicode strings to the
+ * given block.
+ *
+ * If no block is given, an array or enumerator is returned instead.
+ *
+ * "hello\u0639".each_codepoint {|c| print c, ' ' }
+ *
+ * produces:
+ *
+ * 104 101 108 108 111 1593
+ */
+
+static VALUE
+rb_str_each_codepoint(VALUE str)
+{
+ return rb_str_enumerate_codepoints(str, Qtrue);
+}
+
+static VALUE
+rb_str_codepoints(VALUE str)
+{
+ return rb_str_enumerate_codepoints(str, Qfalse);
+}
+
+
static long
chopped_length(VALUE str)
{
@@ -7915,10 +8073,10 @@ Init_String(void)
rb_define_method(rb_cString, "hex", rb_str_hex, 0);
rb_define_method(rb_cString, "oct", rb_str_oct, 0);
rb_define_method(rb_cString, "split", rb_str_split_m, -1);
- rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
- rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0);
- rb_define_method(rb_cString, "chars", rb_str_each_char, 0);
- rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
+ rb_define_method(rb_cString, "lines", rb_str_lines, -1);
+ rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
+ rb_define_method(rb_cString, "chars", rb_str_chars, 0);
+ rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
rb_define_method(rb_cString, "concat", rb_str_concat, 1);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index bb0d9b04f9f489..fb59486b5d07b3 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -621,29 +621,93 @@ def test_each
def test_each_byte
res = []
- S("ABC").each_byte {|x| res << x }
+ s = S("ABC")
+ assert_equal s.object_id, s.each_byte {|x| res << x }.object_id
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
+
+ assert_equal 65, S("ABC").each_byte.next
+ end
+
+ def test_bytes
+ res = []
+ s = S("ABC")
+ assert_equal s.object_id, s.bytes {|x| res << x }.object_id
+ assert_equal(65, res[0])
+ assert_equal(66, res[1])
+ assert_equal(67, res[2])
+
+ assert_equal [65, 66, 67], S("ABC").bytes
+ end
+
+ def test_each_char
+ res = []
+ s = S("ABC")
+ assert_equal s.object_id, s.each_char {|x| res << x }.object_id
+ assert_equal("A", res[0])
+ assert_equal("B", res[1])
+ assert_equal("C", res[2])
+
+ assert_equal "A", S("ABC").each_char.next
+ end
+
+ def test_chars
+ res = []
+ s = S("ABC")
+ assert_equal s.object_id, s.chars {|x| res << x }.object_id
+ assert_equal("A", res[0])
+ assert_equal("B", res[1])
+ assert_equal("C", res[2])
+
+ assert_equal ["A", "B", "C"], S("ABC").chars
+ end
+
+ def test_each_codepoint
+ # Single byte optimization
+ assert_equal 65, S("ABC").each_codepoint.next
+
+ res = []
+ s = S("\u3042\u3044\u3046")
+ assert_equal s.object_id, s.each_codepoint {|x| res << x }.object_id
+ assert_equal(0x3042, res[0])
+ assert_equal(0x3044, res[1])
+ assert_equal(0x3046, res[2])
+
+ assert_equal 0x3042, S("\u3042\u3044\u3046").each_codepoint.next
+ end
+
+ def test_codepoints
+ # Single byte optimization
+ assert_equal [65, 66, 67], S("ABC").codepoints
+
+ res = []
+ s = S("\u3042\u3044\u3046")
+ assert_equal s.object_id, s.codepoints {|x| res << x }.object_id
+ assert_equal(0x3042, res[0])
+ assert_equal(0x3044, res[1])
+ assert_equal(0x3046, res[2])
+
+ assert_equal [0x3042, 0x3044, 0x3046], S("\u3042\u3044\u3046").codepoints
end
def test_each_line
save = $/
$/ = "\n"
res=[]
- S("hello\nworld").lines.each {|x| res << x}
+ S("hello\nworld").each_line {|x| res << x}
assert_equal(S("hello\n"), res[0])
assert_equal(S("world"), res[1])
res=[]
- S("hello\n\n\nworld").lines(S('')).each {|x| res << x}
+ S("hello\n\n\nworld").each_line(S('')) {|x| res << x}
assert_equal(S("hello\n\n\n"), res[0])
assert_equal(S("world"), res[1])
$/ = "!"
res=[]
- S("hello!world").lines.each {|x| res << x}
+ S("hello!world").each_line {|x| res << x}
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
@@ -659,6 +723,19 @@ def test_each_line
s = nil
"foo\nbar".each_line(nil) {|s2| s = s2 }
assert_equal("foo\nbar", s)
+
+ assert_equal "hello\n", S("hello\nworld").each_line.next
+ assert_equal "hello\nworld", S("hello\nworld").each_line(nil).next
+ end
+
+ def test_lines
+ res=[]
+ S("hello\nworld").lines {|x| res << x}
+ assert_equal(S("hello\n"), res[0])
+ assert_equal(S("world"), res[1])
+
+ assert_equal ["hello\n", "world"], S("hello\nworld").lines
+ assert_equal ["hello\nworld"], S("hello\nworld").lines(nil)
end
def test_empty?