From 7dc82cf23ef0e9261fa7edeb1b96bfcac6b9ec93 Mon Sep 17 00:00:00 2001 From: Derek Anderson Date: Sat, 1 Apr 2023 20:05:45 -0700 Subject: [PATCH 1/5] added a reset function to allow buffer reuse. see: https://github.com/micropython/micropython/issues/11146 --- extmod/moduzlib.c | 26 ++++++++++++++++++++++++++ tests/extmod/uzlib_decompio.py | 8 ++++++++ 2 files changed, 34 insertions(+) diff --git a/extmod/moduzlib.c b/extmod/moduzlib.c index 14d15321a3c1e..b33c8ac438ed5 100644 --- a/extmod/moduzlib.c +++ b/extmod/moduzlib.c @@ -125,9 +125,35 @@ STATIC mp_uint_t decompio_read(mp_obj_t o_in, void *buf, mp_uint_t size, int *er return o->decomp.dest - (byte *)buf; } +STATIC mp_obj_t mod_uzlib_reset(size_t n_args, const mp_obj_t *args) { + mp_obj_decompio_t *o = args[0]; + TINF_DATA *decomp = &o->decomp; + unsigned char *dict_ring = decomp->dict_ring; + unsigned int dict_size = decomp->dict_size; + memset(&o->decomp, 0, sizeof(o->decomp)); + o->decomp.readSource = read_src_stream; + o->src_stream = args[1]; + o->eof = false; + decomp->dict_ring = dict_ring; + decomp->dict_size = dict_size; + uzlib_zlib_parse_header(decomp); + for (uint i=0; ieof = 0; + decomp->bitcount = 0; + decomp->bfinal = 0; + decomp->btype = -1; + decomp->dict_idx = 0; + decomp->curlen = 0; + return o; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_uzlib_reset_obj, 1, 3, mod_uzlib_reset); + #if !MICROPY_ENABLE_DYNRUNTIME STATIC const mp_rom_map_elem_t decompio_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_read), MP_ROM_PTR(&mp_stream_read_obj) }, + { MP_ROM_QSTR(MP_QSTR_reset), MP_ROM_PTR(&mod_uzlib_reset_obj) }, { MP_ROM_QSTR(MP_QSTR_readinto), MP_ROM_PTR(&mp_stream_readinto_obj) }, { MP_ROM_QSTR(MP_QSTR_readline), MP_ROM_PTR(&mp_stream_unbuffered_readline_obj) }, }; diff --git a/tests/extmod/uzlib_decompio.py b/tests/extmod/uzlib_decompio.py index fae901aad0a48..6cb5a76e238c7 100644 --- a/tests/extmod/uzlib_decompio.py +++ b/tests/extmod/uzlib_decompio.py @@ -31,3 +31,11 @@ print(inp.read()) except OSError as e: print(repr(e)) + + +inp = zlib.DecompIO(io.BytesIO(b'x\x9c+.)JM\xcc5\x04\x00\x0b\xe0\x02\xbe')) +assert inp.read()==b'stream1' +inp.reset(io.BytesIO(b'x\x9c+.)JM\xcc5\x02\x00\x0b\xe1\x02\xbf')) +assert inp.read()==b'stream2' + + From 81d82fb202d9c5c676c7e74fe8c33d61c9260791 Mon Sep 17 00:00:00 2001 From: Derek Anderson Date: Sun, 2 Apr 2023 14:06:44 -0700 Subject: [PATCH 2/5] verify old and new buffers are the same size --- extmod/moduzlib.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/extmod/moduzlib.c b/extmod/moduzlib.c index b33c8ac438ed5..b794e1be62b13 100644 --- a/extmod/moduzlib.c +++ b/extmod/moduzlib.c @@ -66,21 +66,8 @@ STATIC int read_src_stream(TINF_DATA *data) { return c; } -STATIC mp_obj_t decompio_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) { - mp_arg_check_num(n_args, n_kw, 1, 2, false); - mp_get_stream_raise(args[0], MP_STREAM_OP_READ); - mp_obj_decompio_t *o = mp_obj_malloc(mp_obj_decompio_t, type); - memset(&o->decomp, 0, sizeof(o->decomp)); - o->decomp.readSource = read_src_stream; - o->src_stream = args[0]; - o->eof = false; - - mp_int_t dict_opt = 0; +STATIC uint calc_dict_sz(mp_int_t dict_opt, mp_obj_decompio_t *o) { uint dict_sz; - if (n_args > 1) { - dict_opt = mp_obj_get_int(args[1]); - } - if (dict_opt >= 16) { int st = uzlib_gzip_parse_header(&o->decomp); if (st != TINF_OK) { @@ -100,6 +87,23 @@ STATIC mp_obj_t decompio_make_new(const mp_obj_type_t *type, size_t n_args, size } else { dict_sz = 1 << -dict_opt; } + return dict_sz; +} + +STATIC mp_obj_t decompio_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) { + mp_arg_check_num(n_args, n_kw, 1, 2, false); + mp_get_stream_raise(args[0], MP_STREAM_OP_READ); + mp_obj_decompio_t *o = mp_obj_malloc(mp_obj_decompio_t, type); + memset(&o->decomp, 0, sizeof(o->decomp)); + o->decomp.readSource = read_src_stream; + o->src_stream = args[0]; + o->eof = false; + + mp_int_t dict_opt = 0; + if (n_args > 1) { + dict_opt = mp_obj_get_int(args[1]); + } + uint dict_sz = calc_dict_sz(dict_opt, o); uzlib_uncompress_init(&o->decomp, m_new(byte, dict_sz), dict_sz); return MP_OBJ_FROM_PTR(o); @@ -126,6 +130,7 @@ STATIC mp_uint_t decompio_read(mp_obj_t o_in, void *buf, mp_uint_t size, int *er } STATIC mp_obj_t mod_uzlib_reset(size_t n_args, const mp_obj_t *args) { + mp_get_stream_raise(args[1], MP_STREAM_OP_READ); mp_obj_decompio_t *o = args[0]; TINF_DATA *decomp = &o->decomp; unsigned char *dict_ring = decomp->dict_ring; @@ -136,7 +141,18 @@ STATIC mp_obj_t mod_uzlib_reset(size_t n_args, const mp_obj_t *args) { o->eof = false; decomp->dict_ring = dict_ring; decomp->dict_size = dict_size; - uzlib_zlib_parse_header(decomp); + + mp_int_t dict_opt = 0; + if (n_args > 2) { + dict_opt = mp_obj_get_int(args[2]); + } + uint dict_sz = calc_dict_sz(dict_opt, o); + + if (dict_sz != dict_size) { + mp_raise_ValueError(MP_ERROR_TEXT("compression header buffer sizes must match (to reuse buffer)")); + } + +// dict_opt = uzlib_zlib_parse_header(decomp); for (uint i=0; i Date: Sun, 2 Apr 2023 14:25:46 -0700 Subject: [PATCH 3/5] changed the new function name to set_stream() (reset is too generic) added documentation --- docs/library/zlib.rst | 12 ++++++++++++ extmod/moduzlib.c | 6 +++--- tests/extmod/uzlib_decompio.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/library/zlib.rst b/docs/library/zlib.rst index 96d6c245232ba..1374d95d4b40c 100644 --- a/docs/library/zlib.rst +++ b/docs/library/zlib.rst @@ -36,3 +36,15 @@ Functions This class is MicroPython extension. It's included on provisional basis and may be changed considerably or removed in later versions. + +.. function:: DecompIO.set_stream(obj, stream, wbits=0, /) + + Update a `DecompIO` to use a new stream object. This is exactly like + creating a new `DecompIO` object, except the buffer (usually 32k) is + reused and therefore doesn't have to be reallocated. This can + prevent random `MemoryError`s due to memory fragmentation when + processing large numbers of streams. + + The buffer sizes of the two streams must match, or this will throw + a `ValueError`. + diff --git a/extmod/moduzlib.c b/extmod/moduzlib.c index b794e1be62b13..6b2c507ab440a 100644 --- a/extmod/moduzlib.c +++ b/extmod/moduzlib.c @@ -129,7 +129,7 @@ STATIC mp_uint_t decompio_read(mp_obj_t o_in, void *buf, mp_uint_t size, int *er return o->decomp.dest - (byte *)buf; } -STATIC mp_obj_t mod_uzlib_reset(size_t n_args, const mp_obj_t *args) { +STATIC mp_obj_t mod_uzlib_set_stream(size_t n_args, const mp_obj_t *args) { mp_get_stream_raise(args[1], MP_STREAM_OP_READ); mp_obj_decompio_t *o = args[0]; TINF_DATA *decomp = &o->decomp; @@ -164,12 +164,12 @@ STATIC mp_obj_t mod_uzlib_reset(size_t n_args, const mp_obj_t *args) { decomp->curlen = 0; return o; } -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_uzlib_reset_obj, 1, 3, mod_uzlib_reset); +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_uzlib_set_stream_obj, 1, 3, mod_uzlib_set_stream); #if !MICROPY_ENABLE_DYNRUNTIME STATIC const mp_rom_map_elem_t decompio_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_read), MP_ROM_PTR(&mp_stream_read_obj) }, - { MP_ROM_QSTR(MP_QSTR_reset), MP_ROM_PTR(&mod_uzlib_reset_obj) }, + { MP_ROM_QSTR(MP_QSTR_set_stream), MP_ROM_PTR(&mod_uzlib_set_stream_obj) }, { MP_ROM_QSTR(MP_QSTR_readinto), MP_ROM_PTR(&mp_stream_readinto_obj) }, { MP_ROM_QSTR(MP_QSTR_readline), MP_ROM_PTR(&mp_stream_unbuffered_readline_obj) }, }; diff --git a/tests/extmod/uzlib_decompio.py b/tests/extmod/uzlib_decompio.py index 6cb5a76e238c7..b351da6b45694 100644 --- a/tests/extmod/uzlib_decompio.py +++ b/tests/extmod/uzlib_decompio.py @@ -35,7 +35,7 @@ inp = zlib.DecompIO(io.BytesIO(b'x\x9c+.)JM\xcc5\x04\x00\x0b\xe0\x02\xbe')) assert inp.read()==b'stream1' -inp.reset(io.BytesIO(b'x\x9c+.)JM\xcc5\x02\x00\x0b\xe1\x02\xbf')) +inp.set_stream(io.BytesIO(b'x\x9c+.)JM\xcc5\x02\x00\x0b\xe1\x02\xbf')) assert inp.read()==b'stream2' From 6c4b6303089ab7cf6658a21a25fb8d8efaf6157c Mon Sep 17 00:00:00 2001 From: Derek Anderson Date: Sun, 2 Apr 2023 14:38:22 -0700 Subject: [PATCH 4/5] fixed doc formatting error --- docs/library/zlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/library/zlib.rst b/docs/library/zlib.rst index 1374d95d4b40c..2cc4ab3d731b2 100644 --- a/docs/library/zlib.rst +++ b/docs/library/zlib.rst @@ -42,7 +42,7 @@ Functions Update a `DecompIO` to use a new stream object. This is exactly like creating a new `DecompIO` object, except the buffer (usually 32k) is reused and therefore doesn't have to be reallocated. This can - prevent random `MemoryError`s due to memory fragmentation when + prevent a random `MemoryError` due to memory fragmentation when processing large numbers of streams. The buffer sizes of the two streams must match, or this will throw From 36765736d4c7100f1035f47836a5c49bda0f4f6b Mon Sep 17 00:00:00 2001 From: Derek Anderson Date: Sun, 2 Apr 2023 14:43:37 -0700 Subject: [PATCH 5/5] code formatter --- extmod/moduzlib.c | 6 +++--- tests/extmod/uzlib_decompio.py | 11 ++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/extmod/moduzlib.c b/extmod/moduzlib.c index 6b2c507ab440a..96ac9121b4208 100644 --- a/extmod/moduzlib.c +++ b/extmod/moduzlib.c @@ -147,14 +147,14 @@ STATIC mp_obj_t mod_uzlib_set_stream(size_t n_args, const mp_obj_t *args) { dict_opt = mp_obj_get_int(args[2]); } uint dict_sz = calc_dict_sz(dict_opt, o); - + if (dict_sz != dict_size) { mp_raise_ValueError(MP_ERROR_TEXT("compression header buffer sizes must match (to reuse buffer)")); } // dict_opt = uzlib_zlib_parse_header(decomp); - for (uint i=0; ieof = 0; decomp->bitcount = 0; diff --git a/tests/extmod/uzlib_decompio.py b/tests/extmod/uzlib_decompio.py index b351da6b45694..9168b449c7f59 100644 --- a/tests/extmod/uzlib_decompio.py +++ b/tests/extmod/uzlib_decompio.py @@ -31,11 +31,8 @@ print(inp.read()) except OSError as e: print(repr(e)) - - -inp = zlib.DecompIO(io.BytesIO(b'x\x9c+.)JM\xcc5\x04\x00\x0b\xe0\x02\xbe')) -assert inp.read()==b'stream1' -inp.set_stream(io.BytesIO(b'x\x9c+.)JM\xcc5\x02\x00\x0b\xe1\x02\xbf')) -assert inp.read()==b'stream2' - +inp = zlib.DecompIO(io.BytesIO(b"x\x9c+.)JM\xcc5\x04\x00\x0b\xe0\x02\xbe")) +assert inp.read() == b"stream1" +inp.set_stream(io.BytesIO(b"x\x9c+.)JM\xcc5\x02\x00\x0b\xe1\x02\xbf")) +assert inp.read() == b"stream2"