From 94ab2483d17970dd8b74135762996bd03f71b4b1 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 11 May 2025 22:03:58 +0100 Subject: [PATCH] Convert dict_content to take Py_buffer --- Modules/_zstd/clinic/zstddict.c.h | 43 +++++++++++++++--- Modules/_zstd/compressor.c | 16 +++---- Modules/_zstd/decompressor.c | 16 +++---- Modules/_zstd/zstddict.c | 72 +++++++++++++++---------------- Modules/_zstd/zstddict.h | 6 ++- 5 files changed, 90 insertions(+), 63 deletions(-) diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 34e0e4b3ecfe72..87866d1c1e2b51 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -26,7 +26,7 @@ PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, "by multiple ZstdCompressor or ZstdDecompressor objects."); static PyObject * -_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, int is_raw); static PyObject * @@ -64,7 +64,7 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; - PyObject *dict_content; + Py_buffer dict_content = {NULL, NULL}; int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, @@ -72,7 +72,9 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (!fastargs) { goto exit; } - dict_content = fastargs[0]; + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } if (!noptargs) { goto skip_optional_kwonly; } @@ -81,12 +83,43 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto exit; } skip_optional_kwonly: - return_value = _zstd_ZstdDict_new_impl(type, dict_content, is_raw); + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + return return_value; } +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" @@ -202,4 +235,4 @@ _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) return return_value; } -/*[clinic end generated code: output=bfb31c1187477afd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0418adb04c9d85a7 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index f794acbc19cfc7..84a3a7fa5402ca 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -170,8 +170,8 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Create ZSTD_CDict instance */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); + char *dict_buffer = self->dict_buffer; + Py_ssize_t dict_len = self->dict_len; Py_BEGIN_ALLOW_THREADS cdict = ZSTD_createCDict(dict_buffer, dict_len, @@ -284,19 +284,15 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) /* Load a dictionary. It doesn't override compression context's parameters. */ Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); Py_END_CRITICAL_SECTION2(); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); Py_END_CRITICAL_SECTION2(); } else { diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 852b796a872eef..c3101d62039b19 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -64,8 +64,8 @@ _get_DDict(ZstdDict *self) Py_BEGIN_CRITICAL_SECTION(self); if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); + char *dict_buffer = self->dict_buffer; + Py_ssize_t dict_len = self->dict_len; Py_BEGIN_ALLOW_THREADS self->d_dict = ZSTD_createDDict(dict_buffer, dict_len); @@ -213,19 +213,15 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary */ Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); Py_END_CRITICAL_SECTION2(); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); Py_END_CRITICAL_SECTION2(); } else { diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 264946e8da2a99..f5a5b23e5f2fc4 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -25,7 +25,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" /*[clinic input] @classmethod _zstd.ZstdDict.__new__ as _zstd_ZstdDict_new - dict_content: object + dict_content: Py_buffer The content of a Zstandard dictionary as a bytes-like object. / * @@ -41,16 +41,15 @@ by multiple ZstdCompressor or ZstdDecompressor objects. [clinic start generated code]*/ static PyObject * -_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, int is_raw) -/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/ +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; self->d_dict = NULL; /* ZSTD_CDict dict */ @@ -60,24 +59,28 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, } /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { + if (dict_content == NULL) { PyErr_SetString(PyExc_TypeError, "dict_content argument should be bytes-like object."); goto error; } - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { + self->dict_buffer = PyMem_RawMalloc(dict_content->len); + if (!self->dict_buffer) { + return PyErr_NoMemory(); + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; + + /* Both ordinary and "raw content" dictionaries must be 8 bytes minimum */ + if (self->dict_len < 8) { PyErr_SetString(PyExc_ValueError, "Zstandard dictionary content should at least 8 bytes."); goto error; } /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); /* Check validity for ordinary dictionary */ if (!is_raw && self->dict_id == 0) { @@ -86,15 +89,12 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, goto error; } - // Can only track self once self->dict_content is included PyObject_GC_Track(self); return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + PyObject_GC_Del(self); return NULL; } @@ -108,12 +108,12 @@ ZstdDict_dealloc(PyObject *ob) /* Free ZSTD_DDict instance */ ZSTD_freeDDict(self->d_dict); - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + /* Release dict_buffer after Free ZSTD_CDict/ZSTD_DDict instances */ + PyMem_RawFree(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } @@ -124,23 +124,33 @@ PyDoc_STRVAR(ZstdDict_dictid_doc, "The special value '0' means a 'raw content' dictionary," "without any restrictions on format or content."); -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of a Zstandard dictionary, as a bytes object."); - static PyObject * ZstdDict_str(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("", - dict->dict_id, Py_SIZE(dict->dict_content)); + dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, {NULL} }; +/*[clinic input] +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + /*[clinic input] @critical_section @getter @@ -207,6 +217,7 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF @@ -217,8 +228,7 @@ static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -226,15 +236,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); - return 0; -} - -static int -ZstdDict_clear(PyObject *ob) -{ - ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); return 0; } @@ -247,7 +248,6 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, - {Py_tp_clear, ZstdDict_clear}, {0, 0} }; diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h index e8a55a3670b869..6911ebcd971ba0 100644 --- a/Modules/_zstd/zstddict.h +++ b/Modules/_zstd/zstddict.h @@ -15,8 +15,10 @@ typedef struct { ZSTD_DDict *d_dict; PyObject *c_dicts; - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + /* Dictionary id */ uint32_t dict_id; } ZstdDict;