diff --git a/string.c b/string.c index 26463b95e27c17..56b55d6a0d5376 100644 --- a/string.c +++ b/string.c @@ -6072,45 +6072,8 @@ rb_str_split(VALUE str, const char *sep0) } -/* - * call-seq: - * str.each_line(separator=$/) {|substr| block } -> str - * str.each_line(separator=$/) -> an_enumerator - * - * str.lines(separator=$/) {|substr| block } -> str - * str.lines(separator=$/) -> an_enumerator - * - * Splits str using the supplied parameter as the record separator - * ($/ by default), passing each substring in turn to the supplied - * block. If a zero-length record separator is supplied, the string is split - * into paragraphs delimited by multiple successive newlines. - * - * If no block is given, an enumerator is returned instead. - * - * print "Example one\n" - * "hello\nworld".each_line {|s| p s} - * print "Example two\n" - * "hello\nworld".each_line('l') {|s| p s} - * print "Example three\n" - * "hello\n\n\nworld".each_line('') {|s| p s} - * - * produces: - * - * Example one - * "hello\n" - * "world" - * Example two - * "hel" - * "l" - * "o\nworl" - * "d" - * Example three - * "hello\n\n\n" - * "world" - */ - static VALUE -rb_str_each_line(int argc, VALUE *argv, VALUE str) +rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE return_enumerator_p) { rb_encoding *enc; VALUE rs; @@ -6120,6 +6083,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) VALUE line; int n; VALUE orig = str; + VALUE ary, yieldp; if (argc == 0) { rs = rb_rs; @@ -6127,10 +6091,27 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) else { rb_scan_args(argc, argv, "01", &rs); } - RETURN_ENUMERATOR(str, argc, argv); + + if (rb_block_given_p()) { + yieldp = Qtrue; + } + else { + if (return_enumerator_p) + RETURN_ENUMERATOR(str, argc, argv); + + yieldp = Qfalse; + ary = rb_ary_new(); + } + if (NIL_P(rs)) { - rb_yield(str); - return orig; + if (yieldp) { + rb_yield(str); + return orig; + } + else { + rb_ary_push(ary, str); + return ary; + } } str = rb_str_new4(str); ptr = p = s = RSTRING_PTR(str); @@ -6153,7 +6134,12 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, p - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (yieldp) { + rb_yield(line); + } + else { + rb_ary_push(ary, line); + } str_mod_check(str, ptr, len); s = p; } @@ -6189,7 +6175,12 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (yieldp) { + rb_yield(line); + } + else { + rb_ary_push(ary, line); + } str_mod_check(str, ptr, len); s = p + (rslen ? rslen : n); } @@ -6201,71 +6192,156 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, pend - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (yieldp) { + rb_yield(line); + } + else { + rb_ary_push(ary, line); + } } - return orig; + if (yieldp) { + return orig; + } + else { + return ary; + } } - /* * call-seq: - * str.bytes {|fixnum| block } -> str - * str.bytes -> an_enumerator + * str.lines(separator=$/) {|substr| block } -> str + * str.lines(separator=$/) -> an_array * - * str.each_byte {|fixnum| block } -> str - * str.each_byte -> an_enumerator + * str.each_line(separator=$/) {|substr| block } -> str + * str.each_line(separator=$/) -> an_enumerator * - * Passes each byte in str to the given block, or returns - * an enumerator if no block is given. + * Splits str using the supplied parameter as the record separator + * ($/ by default), passing each substring in turn to the supplied + * block. If a zero-length record separator is supplied, the string is split + * into paragraphs delimited by multiple successive newlines. * - * "hello".each_byte {|c| print c, ' ' } + * If no block is given, an array or enumerator is returned instead. + * + * print "Example one\n" + * "hello\nworld".each_line {|s| p s} + * print "Example two\n" + * "hello\nworld".each_line('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each_line('') {|s| p s} * * produces: * - * 104 101 108 108 111 + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" */ static VALUE -rb_str_each_byte(VALUE str) +rb_str_each_line(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, Qtrue); +} + +static VALUE +rb_str_lines(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, Qfalse); +} + + +static VALUE +rb_str_enumerate_bytes(VALUE str, VALUE return_enumerator_p) { long i; + VALUE ary, yieldp; + + if (rb_block_given_p()) { + yieldp = Qtrue; + } + else { + if (return_enumerator_p) + RETURN_ENUMERATOR(str, 0, 0); + + yieldp = Qfalse; + ary = rb_ary_new2(RSTRING_LEN(str)); + } - RETURN_ENUMERATOR(str, 0, 0); for (i=0; i str - * str.chars -> an_enumerator + * str.bytes {|fixnum| block } -> str + * str.bytes -> an_array * - * str.each_char {|cstr| block } -> str - * str.each_char -> an_enumerator + * str.each_byte {|fixnum| block } -> str + * str.each_byte -> an_enumerator * - * Passes each character in str to the given block, or returns - * an enumerator if no block is given. + * Passes each byte in str to the given block, or returns + * an array or enumerator if no block is given. * - * "hello".each_char {|c| print c, ' ' } + * "hello".each_byte {|c| print c, ' ' } * * produces: * - * h e l l o + * 104 101 108 108 111 */ static VALUE -rb_str_each_char(VALUE str) +rb_str_each_byte(VALUE str) +{ + return rb_str_enumerate_bytes(str, Qtrue); +} + +static VALUE +rb_str_bytes(VALUE str) +{ + return rb_str_enumerate_bytes(str, Qfalse); +} + + +static VALUE +rb_str_enumerate_chars(VALUE str, VALUE return_enumerator_p) { VALUE orig = str; long i, len, n; const char *ptr; rb_encoding *enc; + VALUE ary, yieldp; + + if (rb_block_given_p()) { + yieldp = Qtrue; + } + else { + if (return_enumerator_p) + RETURN_ENUMERATOR(str, 0, 0); + + yieldp = Qfalse; + ary = rb_ary_new(); + } - RETURN_ENUMERATOR(str, 0, 0); str = rb_str_new4(str); ptr = RSTRING_PTR(str); len = RSTRING_LEN(str); @@ -6275,62 +6351,144 @@ rb_str_each_char(VALUE str) case ENC_CODERANGE_7BIT: for (i = 0; i < len; i += n) { n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (yieldp) { + rb_yield(rb_str_subseq(str, i, n)); + } + else { + rb_ary_push(ary, rb_str_subseq(str, i, n)); + } } break; default: for (i = 0; i < len; i += n) { n = rb_enc_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (yieldp) { + rb_yield(rb_str_subseq(str, i, n)); + } + else { + rb_ary_push(ary, rb_str_subseq(str, i, n)); + } } } - return orig; + if (yieldp) { + return orig; + } + else { + return ary; + } } /* * call-seq: - * str.codepoints {|integer| block } -> str - * str.codepoints -> an_enumerator - * - * str.each_codepoint {|integer| block } -> str - * str.each_codepoint -> an_enumerator + * str.chars {|cstr| block } -> str + * str.chars -> an_array * - * Passes the Integer ordinal of each character in str, - * also known as a codepoint when applied to Unicode strings to the - * given block. + * str.each_char {|cstr| block } -> str + * str.each_char -> an_enumerator * - * If no block is given, an enumerator is returned instead. + * Passes each character in str to the given block, or returns + * an array or enumerator if no block is given. * - * "hello\u0639".each_codepoint {|c| print c, ' ' } + * "hello".each_char {|c| print c, ' ' } * * produces: * - * 104 101 108 108 111 1593 + * h e l l o */ static VALUE -rb_str_each_codepoint(VALUE str) +rb_str_each_char(VALUE str) +{ + return rb_str_enumerate_chars(str, Qtrue); +} + +static VALUE +rb_str_chars(VALUE str) +{ + return rb_str_enumerate_chars(str, Qfalse); +} + + +static VALUE +rb_str_enumerate_codepoints(VALUE str, VALUE return_enumerator_p) { VALUE orig = str; int n; unsigned int c; const char *ptr, *end; rb_encoding *enc; + VALUE ary, yieldp; + + if (single_byte_optimizable(str)) + return rb_str_enumerate_bytes(str, return_enumerator_p); + + if (rb_block_given_p()) { + yieldp = Qtrue; + } + else { + if (return_enumerator_p) + RETURN_ENUMERATOR(str, 0, 0); + + yieldp = Qfalse; + ary = rb_ary_new(); + } - if (single_byte_optimizable(str)) return rb_str_each_byte(str); - RETURN_ENUMERATOR(str, 0, 0); str = rb_str_new4(str); ptr = RSTRING_PTR(str); end = RSTRING_END(str); enc = STR_ENC_GET(str); while (ptr < end) { c = rb_enc_codepoint_len(ptr, end, &n, enc); - rb_yield(UINT2NUM(c)); + if (yieldp) { + rb_yield(UINT2NUM(c)); + } + else { + rb_ary_push(ary, UINT2NUM(c)); + } ptr += n; } - return orig; + if (yieldp) { + return orig; + } + else { + return ary; + } } +/* + * call-seq: + * str.codepoints {|integer| block } -> str + * str.codepoints -> an_array + * + * str.each_codepoint {|integer| block } -> str + * str.each_codepoint -> an_enumerator + * + * Passes the Integer ordinal of each character in str, + * also known as a codepoint when applied to Unicode strings to the + * given block. + * + * If no block is given, an array or enumerator is returned instead. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * produces: + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rb_str_each_codepoint(VALUE str) +{ + return rb_str_enumerate_codepoints(str, Qtrue); +} + +static VALUE +rb_str_codepoints(VALUE str) +{ + return rb_str_enumerate_codepoints(str, Qfalse); +} + + static long chopped_length(VALUE str) { @@ -7915,10 +8073,10 @@ Init_String(void) rb_define_method(rb_cString, "hex", rb_str_hex, 0); rb_define_method(rb_cString, "oct", rb_str_oct, 0); rb_define_method(rb_cString, "split", rb_str_split_m, -1); - rb_define_method(rb_cString, "lines", rb_str_each_line, -1); - rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0); - rb_define_method(rb_cString, "chars", rb_str_each_char, 0); - rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0); + rb_define_method(rb_cString, "lines", rb_str_lines, -1); + rb_define_method(rb_cString, "bytes", rb_str_bytes, 0); + rb_define_method(rb_cString, "chars", rb_str_chars, 0); + rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0); rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); rb_define_method(rb_cString, "concat", rb_str_concat, 1); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index bb0d9b04f9f489..fb59486b5d07b3 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -621,29 +621,93 @@ def test_each def test_each_byte res = [] - S("ABC").each_byte {|x| res << x } + s = S("ABC") + assert_equal s.object_id, s.each_byte {|x| res << x }.object_id assert_equal(65, res[0]) assert_equal(66, res[1]) assert_equal(67, res[2]) + + assert_equal 65, S("ABC").each_byte.next + end + + def test_bytes + res = [] + s = S("ABC") + assert_equal s.object_id, s.bytes {|x| res << x }.object_id + assert_equal(65, res[0]) + assert_equal(66, res[1]) + assert_equal(67, res[2]) + + assert_equal [65, 66, 67], S("ABC").bytes + end + + def test_each_char + res = [] + s = S("ABC") + assert_equal s.object_id, s.each_char {|x| res << x }.object_id + assert_equal("A", res[0]) + assert_equal("B", res[1]) + assert_equal("C", res[2]) + + assert_equal "A", S("ABC").each_char.next + end + + def test_chars + res = [] + s = S("ABC") + assert_equal s.object_id, s.chars {|x| res << x }.object_id + assert_equal("A", res[0]) + assert_equal("B", res[1]) + assert_equal("C", res[2]) + + assert_equal ["A", "B", "C"], S("ABC").chars + end + + def test_each_codepoint + # Single byte optimization + assert_equal 65, S("ABC").each_codepoint.next + + res = [] + s = S("\u3042\u3044\u3046") + assert_equal s.object_id, s.each_codepoint {|x| res << x }.object_id + assert_equal(0x3042, res[0]) + assert_equal(0x3044, res[1]) + assert_equal(0x3046, res[2]) + + assert_equal 0x3042, S("\u3042\u3044\u3046").each_codepoint.next + end + + def test_codepoints + # Single byte optimization + assert_equal [65, 66, 67], S("ABC").codepoints + + res = [] + s = S("\u3042\u3044\u3046") + assert_equal s.object_id, s.codepoints {|x| res << x }.object_id + assert_equal(0x3042, res[0]) + assert_equal(0x3044, res[1]) + assert_equal(0x3046, res[2]) + + assert_equal [0x3042, 0x3044, 0x3046], S("\u3042\u3044\u3046").codepoints end def test_each_line save = $/ $/ = "\n" res=[] - S("hello\nworld").lines.each {|x| res << x} + S("hello\nworld").each_line {|x| res << x} assert_equal(S("hello\n"), res[0]) assert_equal(S("world"), res[1]) res=[] - S("hello\n\n\nworld").lines(S('')).each {|x| res << x} + S("hello\n\n\nworld").each_line(S('')) {|x| res << x} assert_equal(S("hello\n\n\n"), res[0]) assert_equal(S("world"), res[1]) $/ = "!" res=[] - S("hello!world").lines.each {|x| res << x} + S("hello!world").each_line {|x| res << x} assert_equal(S("hello!"), res[0]) assert_equal(S("world"), res[1]) @@ -659,6 +723,19 @@ def test_each_line s = nil "foo\nbar".each_line(nil) {|s2| s = s2 } assert_equal("foo\nbar", s) + + assert_equal "hello\n", S("hello\nworld").each_line.next + assert_equal "hello\nworld", S("hello\nworld").each_line(nil).next + end + + def test_lines + res=[] + S("hello\nworld").lines {|x| res << x} + assert_equal(S("hello\n"), res[0]) + assert_equal(S("world"), res[1]) + + assert_equal ["hello\n", "world"], S("hello\nworld").lines + assert_equal ["hello\nworld"], S("hello\nworld").lines(nil) end def test_empty?