From 03b7299150fc00b841f721b2919ca0940094317f Mon Sep 17 00:00:00 2001 From: silmeth Date: Mon, 8 Jul 2019 19:52:00 +0200 Subject: [PATCH 1/3] make TextIOBase writable, handle malformed utf-8 in read() --- vm/src/builtins.rs | 4 +++ vm/src/exceptions.rs | 13 ++++++++ vm/src/stdlib/io.rs | 72 +++++++++++++++++++++++++++++++++++++++----- vm/src/vm.rs | 5 +++ 4 files changed, 86 insertions(+), 8 deletions(-) diff --git a/vm/src/builtins.rs b/vm/src/builtins.rs index b7bcce7232..4b7b249ead 100644 --- a/vm/src/builtins.rs +++ b/vm/src/builtins.rs @@ -889,6 +889,10 @@ pub fn make_module(vm: &VirtualMachine, module: PyObjectRef) { "FileNotFoundError" => ctx.exceptions.file_not_found_error.clone(), "FileExistsError" => ctx.exceptions.file_exists_error.clone(), "StopIteration" => ctx.exceptions.stop_iteration.clone(), + "UnicodeError" => ctx.exceptions.unicode_error.clone(), + "UnicodeDecodeError" => ctx.exceptions.unicode_decode_error.clone(), + "UnicodeEncodeError" => ctx.exceptions.unicode_encode_error.clone(), + "UnicodeTranslateError" => ctx.exceptions.unicode_translate_error.clone(), "ZeroDivisionError" => ctx.exceptions.zero_division_error.clone(), "KeyError" => ctx.exceptions.key_error.clone(), "OSError" => ctx.exceptions.os_error.clone(), diff --git a/vm/src/exceptions.rs b/vm/src/exceptions.rs index cfd611c3ff..eef1fae00b 100644 --- a/vm/src/exceptions.rs +++ b/vm/src/exceptions.rs @@ -215,6 +215,10 @@ pub struct ExceptionZoo { pub syntax_error: PyClassRef, pub type_error: PyClassRef, pub value_error: PyClassRef, + pub unicode_error: PyClassRef, + pub unicode_decode_error: PyClassRef, + pub unicode_encode_error: PyClassRef, + pub unicode_translate_error: PyClassRef, pub zero_division_error: PyClassRef, pub eof_error: PyClassRef, @@ -258,6 +262,11 @@ impl ExceptionZoo { let permission_error = create_type("PermissionError", &type_type, &os_error); let file_exists_error = create_type("FileExistsError", &type_type, &os_error); let eof_error = create_type("EOFError", &type_type, &exception_type); + let unicode_error = create_type("UnicodeError", &type_type, &value_error); + let unicode_decode_error = create_type("UnicodeDecodeError", &type_type, &unicode_error); + let unicode_encode_error = create_type("UnicodeEncodeError", &type_type, &unicode_error); + let unicode_translate_error = + create_type("UnicodeTranslateError", &type_type, &unicode_error); let warning = create_type("Warning", &type_type, &exception_type); let bytes_warning = create_type("BytesWarning", &type_type, &warning); @@ -294,6 +303,10 @@ impl ExceptionZoo { syntax_error, type_error, value_error, + unicode_error, + unicode_decode_error, + unicode_encode_error, + unicode_translate_error, zero_division_error, eof_error, warning, diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index c9148d82ca..010b2bb21c 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -17,7 +17,7 @@ use crate::obj::objbytes::PyBytes; use crate::obj::objint; use crate::obj::objstr; use crate::obj::objtype; -use crate::obj::objtype::PyClassRef; +use crate::obj::objtype::{PyClass, PyClassRef}; use crate::pyobject::TypeProtocol; use crate::pyobject::{BufferProtocol, PyObjectRef, PyRef, PyResult, PyValue}; use crate::vm::VirtualMachine; @@ -442,16 +442,71 @@ fn text_io_wrapper_init(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { fn text_io_base_read(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { arg_check!(vm, args, required = [(text_io_base, None)]); + let io_module = vm.import("_io", &vm.ctx.new_tuple(vec![]), 0)?; + let buffered_reader_class = vm + .get_attribute(io_module.clone(), "BufferedReader") + .unwrap() + .downcast::() + .unwrap(); let raw = vm.get_attribute(text_io_base.clone(), "buffer").unwrap(); - if let Ok(bytes) = vm.call_method(&raw, "read", PyFuncArgs::default()) { - let value = objbytes::get_value(&bytes).to_vec(); + if objtype::isinstance(&raw, &buffered_reader_class) { + if let Ok(bytes) = vm.call_method(&raw, "read", PyFuncArgs::default()) { + let value = objbytes::get_value(&bytes).to_vec(); + + //format bytes into string + let rust_string = String::from_utf8(value).map_err(|e| { + vm.new_unicode_decode_error(format!( + "cannot decode byte at index: {}", + e.utf8_error().valid_up_to() + )) + })?; + Ok(vm.ctx.new_str(rust_string)) + } else { + Err(vm.new_value_error("Error unpacking Bytes".to_string())) + } + } else { + // TODO: this should be io.UnsupportedOperation error which derives both from ValueError *and* OSError + Err(vm.new_value_error("not readable".to_string())) + } +} - //format bytes into string - let rust_string = String::from_utf8(value).unwrap(); - Ok(vm.ctx.new_str(rust_string)) +fn text_io_base_write(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!( + vm, + args, + required = [(text_io_base, None), (obj, Some(vm.ctx.str_type()))] + ); + + let io_module = vm.import("_io", &vm.ctx.new_tuple(vec![]), 0)?; + let buffered_writer_class = vm + .get_attribute(io_module.clone(), "BufferedWriter") + .unwrap() + .downcast::() + .unwrap(); + let raw = vm.get_attribute(text_io_base.clone(), "buffer").unwrap(); + if objtype::isinstance(&raw, &buffered_writer_class) { + let write = vm + .get_method(raw.clone(), "write") + .ok_or_else(|| vm.new_attribute_error("BufferedWriter has no write method".to_owned())) + .and_then(|it| it)?; + let bytes = objstr::get_value(obj).into_bytes(); + + let len = vm.invoke( + write, + PyFuncArgs::new(vec![vm.ctx.new_bytes(bytes.clone())], vec![]), + )?; + let len = objint::get_value(&len).to_usize().ok_or_else(|| { + vm.new_overflow_error("int to large to convert to Rust usize".to_string()) + })?; + + // returns the count of unicode code points written + Ok(vm + .ctx + .new_int(String::from_utf8_lossy(&bytes[0..len]).chars().count())) } else { - Err(vm.new_value_error("Error unpacking Bytes".to_string())) + // TODO: this should be io.UnsupportedOperation error which derives from ValueError and OSError + Err(vm.new_value_error("not writable".to_string())) } } @@ -594,7 +649,8 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { //TextIO Base has no public constructor let text_io_base = py_class!(ctx, "TextIOBase", io_base.clone(), { - "read" => ctx.new_rustfunc(text_io_base_read) + "read" => ctx.new_rustfunc(text_io_base_read), + "write" => ctx.new_rustfunc(text_io_base_write) }); // RawBaseIO Subclasses diff --git a/vm/src/vm.rs b/vm/src/vm.rs index f117b83f31..42043bfb3a 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -218,6 +218,11 @@ impl VirtualMachine { self.new_exception(os_error, msg) } + pub fn new_unicode_decode_error(&self, msg: String) -> PyObjectRef { + let unicode_decode_error = self.ctx.exceptions.unicode_decode_error.clone(); + self.new_exception(unicode_decode_error, msg) + } + /// Create a new python ValueError object. Useful for raising errors from /// python functions implemented in rust. pub fn new_value_error(&self, msg: String) -> PyObjectRef { From 5450f8e8a38da27bf23e46909954bdadded54310 Mon Sep 17 00:00:00 2001 From: silmeth Date: Mon, 8 Jul 2019 20:44:36 +0200 Subject: [PATCH 2/3] use vm.try_class, early return on unsupported op error --- vm/src/stdlib/io.rs | 91 ++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 010b2bb21c..a6a6f27493 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -17,7 +17,7 @@ use crate::obj::objbytes::PyBytes; use crate::obj::objint; use crate::obj::objstr; use crate::obj::objtype; -use crate::obj::objtype::{PyClass, PyClassRef}; +use crate::obj::objtype::PyClassRef; use crate::pyobject::TypeProtocol; use crate::pyobject::{BufferProtocol, PyObjectRef, PyRef, PyResult, PyValue}; use crate::vm::VirtualMachine; @@ -442,32 +442,27 @@ fn text_io_wrapper_init(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { fn text_io_base_read(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { arg_check!(vm, args, required = [(text_io_base, None)]); - let io_module = vm.import("_io", &vm.ctx.new_tuple(vec![]), 0)?; - let buffered_reader_class = vm - .get_attribute(io_module.clone(), "BufferedReader") - .unwrap() - .downcast::() - .unwrap(); + let buffered_reader_class = vm.try_class("_io", "BufferedReader")?; let raw = vm.get_attribute(text_io_base.clone(), "buffer").unwrap(); - if objtype::isinstance(&raw, &buffered_reader_class) { - if let Ok(bytes) = vm.call_method(&raw, "read", PyFuncArgs::default()) { - let value = objbytes::get_value(&bytes).to_vec(); - - //format bytes into string - let rust_string = String::from_utf8(value).map_err(|e| { - vm.new_unicode_decode_error(format!( - "cannot decode byte at index: {}", - e.utf8_error().valid_up_to() - )) - })?; - Ok(vm.ctx.new_str(rust_string)) - } else { - Err(vm.new_value_error("Error unpacking Bytes".to_string())) - } - } else { + if !objtype::isinstance(&raw, &buffered_reader_class) { // TODO: this should be io.UnsupportedOperation error which derives both from ValueError *and* OSError - Err(vm.new_value_error("not readable".to_string())) + return Err(vm.new_value_error("not readable".to_string())); + } + + if let Ok(bytes) = vm.call_method(&raw, "read", PyFuncArgs::default()) { + let value = objbytes::get_value(&bytes).to_vec(); + + //format bytes into string + let rust_string = String::from_utf8(value).map_err(|e| { + vm.new_unicode_decode_error(format!( + "cannot decode byte at index: {}", + e.utf8_error().valid_up_to() + )) + })?; + Ok(vm.ctx.new_str(rust_string)) + } else { + Err(vm.new_value_error("Error unpacking Bytes".to_string())) } } @@ -478,36 +473,32 @@ fn text_io_base_write(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { required = [(text_io_base, None), (obj, Some(vm.ctx.str_type()))] ); - let io_module = vm.import("_io", &vm.ctx.new_tuple(vec![]), 0)?; - let buffered_writer_class = vm - .get_attribute(io_module.clone(), "BufferedWriter") - .unwrap() - .downcast::() - .unwrap(); + let buffered_writer_class = vm.try_class("_io", "BufferedWriter")?; let raw = vm.get_attribute(text_io_base.clone(), "buffer").unwrap(); - if objtype::isinstance(&raw, &buffered_writer_class) { - let write = vm - .get_method(raw.clone(), "write") - .ok_or_else(|| vm.new_attribute_error("BufferedWriter has no write method".to_owned())) - .and_then(|it| it)?; - let bytes = objstr::get_value(obj).into_bytes(); - - let len = vm.invoke( - write, - PyFuncArgs::new(vec![vm.ctx.new_bytes(bytes.clone())], vec![]), - )?; - let len = objint::get_value(&len).to_usize().ok_or_else(|| { - vm.new_overflow_error("int to large to convert to Rust usize".to_string()) - })?; - // returns the count of unicode code points written - Ok(vm - .ctx - .new_int(String::from_utf8_lossy(&bytes[0..len]).chars().count())) - } else { + if !objtype::isinstance(&raw, &buffered_writer_class) { // TODO: this should be io.UnsupportedOperation error which derives from ValueError and OSError - Err(vm.new_value_error("not writable".to_string())) + return Err(vm.new_value_error("not writable".to_string())); } + + let write = vm + .get_method(raw.clone(), "write") + .ok_or_else(|| vm.new_attribute_error("BufferedWriter has no write method".to_owned())) + .and_then(|it| it)?; + let bytes = objstr::get_value(obj).into_bytes(); + + let len = vm.invoke( + write, + PyFuncArgs::new(vec![vm.ctx.new_bytes(bytes.clone())], vec![]), + )?; + let len = objint::get_value(&len).to_usize().ok_or_else(|| { + vm.new_overflow_error("int to large to convert to Rust usize".to_string()) + })?; + + // returns the count of unicode code points written + Ok(vm + .ctx + .new_int(String::from_utf8_lossy(&bytes[0..len]).chars().count())) } fn split_mode_string(mode_string: String) -> Result<(String, String), String> { From afec714a440d00a1d9a70d33ed8c18fcb32af51f Mon Sep 17 00:00:00 2001 From: silmeth Date: Tue, 9 Jul 2019 10:53:40 +0200 Subject: [PATCH 3/3] use vm.call_method() instead of get_method&invoke, remove String alloc --- vm/src/stdlib/io.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index a6a6f27493..88a8fae554 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -467,6 +467,8 @@ fn text_io_base_read(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { } fn text_io_base_write(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { + use std::str::from_utf8; + arg_check!( vm, args, @@ -481,24 +483,19 @@ fn text_io_base_write(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { return Err(vm.new_value_error("not writable".to_string())); } - let write = vm - .get_method(raw.clone(), "write") - .ok_or_else(|| vm.new_attribute_error("BufferedWriter has no write method".to_owned())) - .and_then(|it| it)?; let bytes = objstr::get_value(obj).into_bytes(); - let len = vm.invoke( - write, - PyFuncArgs::new(vec![vm.ctx.new_bytes(bytes.clone())], vec![]), - )?; + let len = vm.call_method(&raw, "write", vec![vm.ctx.new_bytes(bytes.clone())])?; let len = objint::get_value(&len).to_usize().ok_or_else(|| { vm.new_overflow_error("int to large to convert to Rust usize".to_string()) })?; // returns the count of unicode code points written - Ok(vm - .ctx - .new_int(String::from_utf8_lossy(&bytes[0..len]).chars().count())) + let len = from_utf8(&bytes[..len]) + .unwrap_or_else(|e| from_utf8(&bytes[..e.valid_up_to()]).unwrap()) + .chars() + .count(); + Ok(vm.ctx.new_int(len)) } fn split_mode_string(mode_string: String) -> Result<(String, String), String> {