-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Support Marshal.{dump,load} for core Set #13185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This was missed when adding core Set, because it's handled implicitly for T_OBJECT. I attempted to allow core Set to support Marshal.load with stdlib Set marshal output, but it doesn't look like Marshal supports this, raising `dump format error (ArgumentError)`. If the object was previously relying on the default Marshal.dump implementation for T_OBJECT, it doesn't look like you can restore it to a non-T_OBJECT, regardless of how that object implements _load/marshal_load. I think it would require changes to Marshal to allow that.
Can't we use It's generally understood that it's preferable not to exchange Marshal across Ruby versions, but this isn't always respected. |
Thanks for pointing this out, I was not aware of it. I'm guessing it will work, I'll try implementing support using it.
Agreed, that's why I had a goal to support it. |
If you consider the backward compatibility, use $ ./ruby -e 'Marshal.dump(Set[1,2,3], STDOUT)' | ruby3.4 -e 'Marshal.load(STDIN)'
<internal:marshal>:34:in 'Marshal.load': instance of Set needs to have method 'marshal_load' (TypeError)
from -e:1:in '<main>' This is my patch from 8a0b358. diff --git i/hash.c w/hash.c
index 98f39c86324..2a2d5c31a45 100644
--- i/hash.c
+++ w/hash.c
@@ -2253,7 +2253,7 @@ rb_hash_default(int argc, VALUE *argv, VALUE hash)
* See {Hash Default}[rdoc-ref:Hash@Hash+Default].
*/
-static VALUE
+VALUE
rb_hash_set_default(VALUE hash, VALUE ifnone)
{
rb_hash_modify_check(hash);
diff --git i/internal/hash.h w/internal/hash.h
index 676f1404960..03cd830506a 100644
--- i/internal/hash.h
+++ w/internal/hash.h
@@ -72,6 +72,7 @@ struct RHash {
/* hash.c */
void rb_hash_st_table_set(VALUE hash, st_table *st);
VALUE rb_hash_default_value(VALUE hash, VALUE key);
+VALUE rb_hash_set_default(VALUE hash, VALUE ifnone);
VALUE rb_hash_set_default_proc(VALUE hash, VALUE proc);
long rb_dbl_long_hash(double d);
st_table *rb_init_identtable(void);
diff --git i/set.c w/set.c
index 6f11f12e63e..d69b43078fd 100644
--- i/set.c
+++ w/set.c
@@ -99,6 +99,7 @@ VALUE rb_cSet;
static ID id_each_entry;
static ID id_any_p;
static ID id_new;
+static ID id_i_hash;
static ID id_set_iter_lev;
#define RSET_INITIALIZED FL_USER1
@@ -1851,59 +1852,64 @@ set_i_hash(VALUE set)
}
/* :nodoc: */
-static VALUE
-set_i_marshal_dump(VALUE set)
+static int
+set_to_hash_i(st_data_t key, st_data_t arg)
{
- VALUE array = rb_ary_new_from_args(2, set_i_to_a(set), set_i_compare_by_identity_p(set));
+ rb_hash_aset((VALUE)arg, (VALUE)key, Qtrue);
+ return ST_CONTINUE;
+}
- if (FL_TEST(set, FL_EXIVAR)) {
- rb_copy_generic_ivar(array, set);
- FL_SET(array, FL_EXIVAR);
+static VALUE
+set_i_to_h(VALUE set)
+{
+ st_index_t size = RSET_SIZE(set);
+ VALUE hash;
+ if (RSET_COMPARE_BY_IDENTITY(set)) {
+ hash = rb_ident_hash_new_with_size(size);
+ }
+ else {
+ hash = rb_hash_new_with_size(size);
}
+ rb_hash_set_default(hash, Qfalse);
- return array;
+ if (size == 0) return hash;
+
+ set_iter(set, set_to_hash_i, (st_data_t)hash);
+ return hash;
}
-/* :nodoc: */
static VALUE
-set_i_marshal_load(VALUE set, VALUE array)
+compat_dumper(VALUE set)
{
- rb_check_frozen(set);
- if (RBASIC(set)->flags & RSET_INITIALIZED) {
- rb_raise(rb_eArgError, "set is already initialized");
- }
- if (!RB_TYPE_P(array, T_ARRAY)) {
- rb_raise(rb_eArgError, "argument must be array");
- }
- if (RARRAY_LEN(array) != 2) {
- rb_raise(rb_eArgError, "argument must have 2 elements");
- }
- VALUE compare_by_identity = RARRAY_AREF(array, 1);
- array = RARRAY_AREF(array, 0);
- if (!RB_TYPE_P(array, T_ARRAY)) {
- rb_raise(rb_eArgError, "first element in argument must be array");
- }
- if (compare_by_identity != Qtrue && compare_by_identity != Qfalse) {
- rb_raise(rb_eArgError, "second element in argument must be true or false");
+ VALUE dumper = rb_class_new_instance(0, 0, rb_cObject);
+ rb_ivar_set(dumper, id_i_hash, set_i_to_h(set));
+ return dumper;
+}
+
+static int
+set_i_from_hash_i(st_data_t key, st_data_t val, st_data_t set)
+{
+ if ((VALUE)val != Qtrue) {
+ rb_raise(rb_eRuntimeError, "expect true as Set value: %"PRIsVALUE, rb_obj_class((VALUE)val));
}
- RBASIC(set)->flags |= RSET_INITIALIZED;
- if (RTEST(compare_by_identity)) set_i_compare_by_identity(set);
- set_merge_enum_into(set, array);
+ set_i_add((VALUE)set, (VALUE)key);
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_i_from_hash(VALUE set, VALUE hash)
+{
+ Check_Type(hash, T_HASH);
+ if (rb_hash_compare_by_id_p(hash)) set_i_compare_by_identity(set);
+ rb_hash_stlike_foreach(hash, set_i_from_hash_i, (st_data_t)set);
return set;
}
-/* :nodoc: */
-/*
- * This is to attempt to load marshalled values from stdlib set, but it
- * doesn't work (Marshal does not attempt to call it):
- *
- * 'Marshal.load': dump format error (ArgumentError)
- *
static VALUE
-set_s__load(VALUE klass, VALUE str)
+compat_loader(VALUE self, VALUE a)
{
+ return set_i_from_hash(self, rb_ivar_get(a, id_i_hash));
}
-*/
/*
* Document-class: Set
@@ -2123,11 +2129,11 @@ Init_Set(void)
id_each_entry = rb_intern_const("each_entry");
id_any_p = rb_intern_const("any?");
id_new = rb_intern_const("new");
+ id_i_hash = rb_intern_const("@hash");
id_set_iter_lev = rb_make_internal_id();
rb_define_alloc_func(rb_cSet, set_s_alloc);
rb_define_singleton_method(rb_cSet, "[]", set_s_create, -1);
- // rb_define_singleton_method(rb_cSet, "_load", set_s__load, 1);
rb_define_method(rb_cSet, "initialize", set_i_initialize, -1);
rb_define_method(rb_cSet, "initialize_copy", set_i_initialize_copy, 1);
@@ -2170,8 +2176,6 @@ Init_Set(void)
rb_define_method(rb_cSet, "intersect?", set_i_intersect, 1);
rb_define_method(rb_cSet, "join", set_i_join, -1);
rb_define_method(rb_cSet, "keep_if", set_i_keep_if, 0);
- rb_define_method(rb_cSet, "marshal_dump", set_i_marshal_dump, 0);
- rb_define_method(rb_cSet, "marshal_load", set_i_marshal_load, 1);
rb_define_method(rb_cSet, "merge", set_i_merge, -1);
rb_define_method(rb_cSet, "proper_subset?", set_i_proper_subset, 1);
rb_define_alias(rb_cSet, "<", "proper_subset?");
@@ -2190,7 +2194,13 @@ Init_Set(void)
rb_define_method(rb_cSet, "superset?", set_i_superset, 1);
rb_define_alias(rb_cSet, ">=", "superset?");
rb_define_method(rb_cSet, "to_a", set_i_to_a, 0);
+ rb_define_method(rb_cSet, "to_h", set_i_to_h, 0);
rb_define_method(rb_cSet, "to_set", set_i_to_set, -1);
+ // rb_define_singleton_method(rb_cSet, "from_hash", compat_loader, 1);
+
+ /* :nodoc: */
+ VALUE compat = rb_define_class_under(rb_cSet, "compatible", rb_cObject);
+ rb_marshal_define_compat(rb_cSet, compat, compat_dumper, compat_loader);
rb_provide("set.rb");
}
diff --git i/test/ruby/test_set.rb w/test/ruby/test_set.rb
index f2b5fa16226..8446b14a1af 100644
--- i/test/ruby/test_set.rb
+++ w/test/ruby/test_set.rb
@@ -23,10 +23,8 @@
assert_equal(set.compare_by_identity?, mset.compare_by_identity?)
assert_equal(1, mset.instance_variable_get(:@a))
- # Not sure if possible to support?
- # 'Marshal.load': dump format error (ArgumentError)
- # old_stdlib_set_data = "\x04\bo:\bSet\x06:\n@hash}\bi\x06Ti\aTi\bTF".b
- # assert_equal(Set[1, 2, 3], Marshal.load(old_stdlib_set_data))
+ old_stdlib_set_data = "\x04\bo:\bSet\x06:\n@hash}\bi\x06Ti\aTi\bTF".b
+ assert_equal(Set[1, 2, 3], Marshal.load(old_stdlib_set_data))
end
def test_aref |
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2e61054
to
b41f62f
Compare
@nobu Thank you. I don't have a problem with making it backwards compatible both ways. Doing so bloats the marshal string for larger sets, but I guess that isn't a major problem. Your approach also seems more elegant than mine. I've replaced my previous commit with yours, but kept the extra specs I added. |
Compatibility definitely trumps a slightly smaller payload size. |
This comment has been minimized.
This comment has been minimized.
set.c
Outdated
rb_define_method(rb_cSet, "to_set", set_i_to_set, -1); | ||
// rb_define_singleton_method(rb_cSet, "from_hash", compat_loader, 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a purpose to this commented out method?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume that leaving it commented out makes it easier to test manually. However, it seems like it should be a regular method and not a singleton method, as compat_loader
calls set_i_to_hash
, which accepts a Set instance and not a class. @nobu is that correct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it was for just debugging but a mistake.
Please remove it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pushed a commit to remove it. I'll be squashing these commits when merging.
I just noticed thanks to one of my gems test suite that we have a similar problem with
|
This was missed when adding core Set, because it's handled implicitly for T_OBJECT.
I attempted to allow core Set to support Marshal.load with stdlib Set marshal output, but it doesn't look like Marshal supports this, raising
dump format error (ArgumentError)
. If the object was previously relying on the default Marshal.dump implementation for T_OBJECT, it doesn't look like you can restore it to a non-T_OBJECT, regardless of how that object implements _load/marshal_load. I think it would require changes to Marshal to allow that.