Skip to content

Commit a0db9f7

Browse files
committed
Fix unicode array pickling
1 parent 774b622 commit a0db9f7

File tree

2 files changed

+60
-43
lines changed

2 files changed

+60
-43
lines changed

extra_tests/snippets/stdlib_array.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from testutils import assert_raises
22
from array import array
3+
from pickle import dumps, loads
34

45
a1 = array("b", [0, 1, 2, 3])
56

@@ -96,4 +97,10 @@ def test_array_frombytes():
9697
with assert_raises(IndexError):
9798
a[0] = 42
9899
with assert_raises(IndexError):
99-
del a[42]
100+
del a[42]
101+
102+
test_str = '🌉abc🌐def🌉🌐'
103+
u = array('u', test_str)
104+
assert u.__reduce_ex__(1)[1][1] == list(test_str)
105+
assert str(loads(dumps(u, 1))) == f"array('u', '{test_str}')"
106+
assert loads(dumps(u, 1)) == loads(dumps(u, 3))

stdlib/src/array.rs

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod array {
1313
use crate::vm::{
1414
builtins::{
1515
IntoPyFloat, PyByteArray, PyBytes, PyBytesRef, PyDictRef, PyFloat, PyInt, PyIntRef,
16-
PyList, PyListRef, PySliceRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef,
16+
PyList, PyListRef, PySliceRef, PyStr, PyStrRef, PyTypeRef,
1717
},
1818
class_or_notimplemented,
1919
function::{ArgBytesLike, ArgIterable, OptionalArg},
@@ -561,12 +561,8 @@ mod array {
561561
fn byteswap(self) -> Self {
562562
Self(self.0.swap_bytes())
563563
}
564-
fn to_object(self, vm: &VirtualMachine) -> PyObjectRef {
565-
vm.ctx.new_str(
566-
char::from_u32(self.0 as u32)
567-
.unwrap_or_default()
568-
.to_string(),
569-
)
564+
fn to_object(self, _vm: &VirtualMachine) -> PyObjectRef {
565+
unreachable!()
570566
}
571567
}
572568

@@ -757,6 +753,38 @@ mod array {
757753
}
758754
}
759755

756+
fn _wchar_bytes_to_string(
757+
bytes: &[u8],
758+
item_size: usize,
759+
vm: &VirtualMachine,
760+
) -> PyResult<String> {
761+
if item_size == 2 {
762+
// safe because every configuration of bytes for the types we support are valid
763+
let utf16 = unsafe {
764+
std::slice::from_raw_parts(
765+
bytes.as_ptr() as *const u16,
766+
bytes.len() / std::mem::size_of::<u16>(),
767+
)
768+
};
769+
Ok(String::from_utf16_lossy(utf16))
770+
} else {
771+
// safe because every configuration of bytes for the types we support are valid
772+
let chars = unsafe {
773+
std::slice::from_raw_parts(
774+
bytes.as_ptr() as *const u32,
775+
bytes.len() / std::mem::size_of::<u32>(),
776+
)
777+
};
778+
chars
779+
.iter()
780+
.map(|&ch| {
781+
// cpython issue 17223
782+
u32_to_char(ch).map_err(|msg| vm.new_value_error(msg))
783+
})
784+
.try_collect()
785+
}
786+
}
787+
760788
fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec<u8> {
761789
if item_size == 2 {
762790
utf8.encode_utf16()
@@ -797,31 +825,7 @@ mod array {
797825
));
798826
}
799827
let bytes = array.get_bytes();
800-
if self.itemsize() == 2 {
801-
// safe because every configuration of bytes for the types we support are valid
802-
let utf16 = unsafe {
803-
std::slice::from_raw_parts(
804-
bytes.as_ptr() as *const u16,
805-
bytes.len() / std::mem::size_of::<u16>(),
806-
)
807-
};
808-
Ok(String::from_utf16_lossy(utf16))
809-
} else {
810-
// safe because every configuration of bytes for the types we support are valid
811-
let chars = unsafe {
812-
std::slice::from_raw_parts(
813-
bytes.as_ptr() as *const u32,
814-
bytes.len() / std::mem::size_of::<u32>(),
815-
)
816-
};
817-
chars
818-
.iter()
819-
.map(|&ch| {
820-
// cpython issue 17223
821-
u32_to_char(ch).map_err(|msg| vm.new_value_error(msg))
822-
})
823-
.try_collect()
824-
}
828+
Self::_wchar_bytes_to_string(bytes, self.itemsize(), vm)
825829
}
826830

827831
fn _from_bytes(&self, b: &[u8], itemsize: usize, vm: &VirtualMachine) -> PyResult<()> {
@@ -1111,21 +1115,21 @@ mod array {
11111115
zelf: PyRef<Self>,
11121116
proto: usize,
11131117
vm: &VirtualMachine,
1114-
) -> PyResult<(PyObjectRef, PyTupleRef, Option<PyDictRef>)> {
1118+
) -> PyResult<(PyObjectRef, PyObjectRef, Option<PyDictRef>)> {
11151119
if proto < 3 {
1116-
return Ok(Self::reduce(zelf, vm));
1120+
return Self::reduce(zelf, vm);
11171121
}
11181122
let array = zelf.read();
11191123
let cls = zelf.as_object().clone_class().into_object();
1120-
let typecode = vm.ctx.new_str(array.typecode_str());
1124+
let typecode = vm.ctx.new_utf8_str(array.typecode_str());
11211125
let bytes = vm.ctx.new_bytes(array.get_bytes().to_vec());
11221126
let code = MachineFormatCode::from_typecode(array.typecode()).unwrap();
11231127
let code = PyInt::from(u8::from(code)).into_object(vm);
11241128
let module = vm.import("array", None, 0)?;
11251129
let func = vm.get_attribute(module, "_array_reconstructor")?;
11261130
Ok((
11271131
func,
1128-
PyTupleRef::with_elements(vec![cls, typecode, code, bytes], &vm.ctx),
1132+
vm.ctx.new_tuple(vec![cls, typecode, code, bytes]),
11291133
zelf.as_object().dict(),
11301134
))
11311135
}
@@ -1134,16 +1138,22 @@ mod array {
11341138
fn reduce(
11351139
zelf: PyRef<Self>,
11361140
vm: &VirtualMachine,
1137-
) -> (PyObjectRef, PyTupleRef, Option<PyDictRef>) {
1141+
) -> PyResult<(PyObjectRef, PyObjectRef, Option<PyDictRef>)> {
11381142
let array = zelf.read();
11391143
let cls = zelf.as_object().clone_class().into_object();
1140-
let typecode = vm.ctx.new_str(array.typecode_str());
1141-
let values = vm.ctx.new_list(array.get_objects(vm));
1142-
(
1144+
let typecode = vm.ctx.new_utf8_str(array.typecode_str());
1145+
let values = if array.typecode() == 'u' {
1146+
let s = Self::_wchar_bytes_to_string(array.get_bytes(), array.itemsize(), vm)?;
1147+
s.chars().map(|x| x.into_pyobject(vm)).collect()
1148+
} else {
1149+
array.get_objects(vm)
1150+
};
1151+
let values = vm.ctx.new_list(values);
1152+
Ok((
11431153
cls,
1144-
PyTupleRef::with_elements(vec![typecode, values], &vm.ctx),
1154+
vm.ctx.new_tuple(vec![typecode, values]),
11451155
zelf.as_object().dict(),
1146-
)
1156+
))
11471157
}
11481158
}
11491159

0 commit comments

Comments
 (0)