-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Switch to libbz2-rs-sys
and finish bz2 impl
#5709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,28 +12,48 @@ mod _bz2 { | |
object::{PyPayload, PyResult}, | ||
types::Constructor, | ||
}; | ||
use crate::zlib::{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since there is at-least 1 other decompression module that uses the same format ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree - I was planning on doing that in a followup, to reduce the amount of code movement in this pr. |
||
DecompressArgs, DecompressError, DecompressState, DecompressStatus, Decompressor, | ||
}; | ||
use bzip2::{Decompress, Status, write::BzEncoder}; | ||
use rustpython_vm::convert::ToPyException; | ||
use std::{fmt, io::Write}; | ||
|
||
// const BUFSIZ: i32 = 8192; | ||
|
||
struct DecompressorState { | ||
decoder: Decompress, | ||
eof: bool, | ||
needs_input: bool, | ||
// input_buffer: Vec<u8>, | ||
// output_buffer: Vec<u8>, | ||
} | ||
const BUFSIZ: usize = 8192; | ||
|
||
#[pyattr] | ||
#[pyclass(name = "BZ2Decompressor")] | ||
#[derive(PyPayload)] | ||
struct BZ2Decompressor { | ||
state: PyMutex<DecompressorState>, | ||
state: PyMutex<DecompressState<Decompress>>, | ||
} | ||
|
||
impl Decompressor for Decompress { | ||
type Flush = (); | ||
type Status = Status; | ||
type Error = bzip2::Error; | ||
|
||
fn total_in(&self) -> u64 { | ||
self.total_in() | ||
} | ||
fn decompress_vec( | ||
&mut self, | ||
input: &[u8], | ||
output: &mut Vec<u8>, | ||
(): Self::Flush, | ||
) -> Result<Self::Status, Self::Error> { | ||
self.decompress_vec(input, output) | ||
} | ||
} | ||
|
||
impl DecompressStatus for Status { | ||
fn is_stream_end(&self) -> bool { | ||
*self == Status::StreamEnd | ||
} | ||
} | ||
|
||
impl fmt::Debug for BZ2Decompressor { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "_bz2.BZ2Decompressor") | ||
} | ||
} | ||
|
@@ -43,13 +63,7 @@ mod _bz2 { | |
|
||
fn py_new(cls: PyTypeRef, _: Self::Args, vm: &VirtualMachine) -> PyResult { | ||
Self { | ||
state: PyMutex::new(DecompressorState { | ||
decoder: Decompress::new(false), | ||
eof: false, | ||
needs_input: true, | ||
// input_buffer: Vec::new(), | ||
// output_buffer: Vec::new(), | ||
}), | ||
state: PyMutex::new(DecompressState::new(Decompress::new(false), vm)), | ||
} | ||
.into_ref_with_type(vm, cls) | ||
.map(Into::into) | ||
|
@@ -59,107 +73,34 @@ mod _bz2 { | |
#[pyclass(with(Constructor))] | ||
impl BZ2Decompressor { | ||
#[pymethod] | ||
fn decompress( | ||
&self, | ||
data: ArgBytesLike, | ||
// TODO: PyIntRef | ||
max_length: OptionalArg<i32>, | ||
vm: &VirtualMachine, | ||
) -> PyResult<PyBytesRef> { | ||
let max_length = max_length.unwrap_or(-1); | ||
if max_length >= 0 { | ||
return Err(vm.new_not_implemented_error( | ||
"the max_value argument is not implemented yet".to_owned(), | ||
)); | ||
} | ||
// let max_length = if max_length < 0 || max_length >= BUFSIZ { | ||
// BUFSIZ | ||
// } else { | ||
// max_length | ||
// }; | ||
fn decompress(&self, args: DecompressArgs, vm: &VirtualMachine) -> PyResult<Vec<u8>> { | ||
let max_length = args.max_length(); | ||
let data = &*args.data(); | ||
|
||
let mut state = self.state.lock(); | ||
let DecompressorState { | ||
decoder, | ||
eof, | ||
.. | ||
// needs_input, | ||
// input_buffer, | ||
// output_buffer, | ||
} = &mut *state; | ||
|
||
if *eof { | ||
return Err(vm.new_exception_msg( | ||
vm.ctx.exceptions.eof_error.to_owned(), | ||
"End of stream already reached".to_owned(), | ||
)); | ||
} | ||
|
||
// data.with_ref(|data| input_buffer.extend(data)); | ||
|
||
// If max_length is negative: | ||
// read the input X bytes at a time, compress it and append it to output. | ||
// Once you're out of input, setting needs_input to true and return the | ||
// output as bytes. | ||
// | ||
// TODO: | ||
// If max_length is non-negative: | ||
// Read the input X bytes at a time, compress it and append it to | ||
// the output. If output reaches `max_length` in size, return | ||
// it (up to max_length), and store the rest of the output | ||
// for later. | ||
|
||
// TODO: arbitrary choice, not the right way to do it. | ||
let mut buf = Vec::with_capacity(data.len() * 32); | ||
|
||
let before = decoder.total_in(); | ||
let res = data.with_ref(|data| decoder.decompress_vec(data, &mut buf)); | ||
let _written = (decoder.total_in() - before) as usize; | ||
|
||
let res = match res { | ||
Ok(x) => x, | ||
// TODO: error message | ||
_ => return Err(vm.new_os_error("Invalid data stream".to_owned())), | ||
}; | ||
|
||
if res == Status::StreamEnd { | ||
*eof = true; | ||
} | ||
Ok(vm.ctx.new_bytes(buf.to_vec())) | ||
state | ||
.decompress(data, max_length, BUFSIZ, vm) | ||
.map_err(|e| match e { | ||
DecompressError::Decompress(err) => vm.new_os_error(err.to_string()), | ||
DecompressError::Eof(err) => err.to_pyexception(vm), | ||
}) | ||
} | ||
|
||
#[pygetset] | ||
fn eof(&self) -> bool { | ||
let state = self.state.lock(); | ||
state.eof | ||
self.state.lock().eof() | ||
} | ||
|
||
#[pygetset] | ||
fn unused_data(&self, vm: &VirtualMachine) -> PyBytesRef { | ||
// Data found after the end of the compressed stream. | ||
// If this attribute is accessed before the end of the stream | ||
// has been reached, its value will be b''. | ||
vm.ctx.new_bytes(b"".to_vec()) | ||
// alternatively, be more honest: | ||
// Err(vm.new_not_implemented_error( | ||
// "unused_data isn't implemented yet".to_owned(), | ||
// )) | ||
// | ||
// TODO | ||
// let state = self.state.lock(); | ||
// if state.eof { | ||
// vm.ctx.new_bytes(state.input_buffer.to_vec()) | ||
// else { | ||
// vm.ctx.new_bytes(b"".to_vec()) | ||
// } | ||
fn unused_data(&self) -> PyBytesRef { | ||
self.state.lock().unused_data() | ||
} | ||
|
||
#[pygetset] | ||
fn needs_input(&self) -> bool { | ||
// False if the decompress() method can provide more | ||
// decompressed data before requiring new uncompressed input. | ||
let state = self.state.lock(); | ||
state.needs_input | ||
self.state.lock().needs_input() | ||
} | ||
|
||
// TODO: mro()? | ||
|
@@ -178,7 +119,7 @@ mod _bz2 { | |
} | ||
|
||
impl fmt::Debug for BZ2Compressor { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "_bz2.BZ2Compressor") | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@coolreader18 @arihant2math what cause this regression? could this be fixed in future?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A flag probably needs to be added to the decompressor state. I believe the issue is the
wt
andrt
formats. Although I'm not to sure, I suppose looking at the cpython source might help.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason this needed to be flagged now is because the whole test file wasn't running at all before - bz2 was not included in the
--features=stdlib,threading,...
flag in CI, so the module wasn't even getting compiled or tested at all.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, didn't noticed that. Thanks!