Skip to content

gh-132983: Convert dict_content to take Py_buffer #133924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 38 additions & 5 deletions Modules/_zstd/clinic/zstddict.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 5 additions & 12 deletions Modules/_zstd/compressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,8 @@ _get_CDict(ZstdDict *self, int compressionLevel)
}
if (capsule == NULL) {
/* Create ZSTD_CDict instance */
char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
Py_ssize_t dict_len = Py_SIZE(self->dict_content);
Py_BEGIN_ALLOW_THREADS
cdict = ZSTD_createCDict(dict_buffer,
dict_len,
cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len,
compressionLevel);
Py_END_ALLOW_THREADS

Expand Down Expand Up @@ -236,17 +233,13 @@ _zstd_load_impl(ZstdCompressor *self, ZstdDict *zd,
else if (type == DICT_TYPE_UNDIGESTED) {
/* Load a dictionary.
It doesn't override compression context's parameters. */
zstd_ret = ZSTD_CCtx_loadDictionary(
self->cctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer,
zd->dict_len);
}
else if (type == DICT_TYPE_PREFIX) {
/* Load a prefix */
zstd_ret = ZSTD_CCtx_refPrefix(
self->cctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer,
zd->dict_len);
}
else {
Py_UNREACHABLE();
Expand Down
16 changes: 5 additions & 11 deletions Modules/_zstd/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,8 @@ _get_DDict(ZstdDict *self)

if (self->d_dict == NULL) {
/* Create ZSTD_DDict instance from dictionary content */
char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
Py_ssize_t dict_len = Py_SIZE(self->dict_content);
Py_BEGIN_ALLOW_THREADS
ret = ZSTD_createDDict(dict_buffer, dict_len);
ret = ZSTD_createDDict(self->dict_buffer, self->dict_len);
Py_END_ALLOW_THREADS
self->d_dict = ret;

Expand Down Expand Up @@ -160,17 +158,13 @@ _zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd,
}
else if (type == DICT_TYPE_UNDIGESTED) {
/* Load a dictionary */
zstd_ret = ZSTD_DCtx_loadDictionary(
self->dctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer,
zd->dict_len);
}
else if (type == DICT_TYPE_PREFIX) {
/* Load a prefix */
zstd_ret = ZSTD_DCtx_refPrefix(
self->dctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer,
zd->dict_len);
}
else {
/* Impossible code path */
Expand Down
88 changes: 45 additions & 43 deletions Modules/_zstd/zstddict.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec"
/*[clinic input]
@classmethod
_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new
dict_content: object
dict_content: Py_buffer
The content of a Zstandard dictionary as a bytes-like object.
/
*
Expand All @@ -42,17 +42,25 @@ by multiple ZstdCompressor or ZstdDecompressor objects.
[clinic start generated code]*/

static PyObject *
_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content,
_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content,
int is_raw)
/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/
/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/
{
/* All dictionaries must be at least 8 bytes */
if (dict_content->len < 8) {
PyErr_SetString(PyExc_ValueError,
"Zstandard dictionary content too short "
"(must have at least eight bytes)");
return NULL;
}

ZstdDict* self = PyObject_GC_New(ZstdDict, type);
if (self == NULL) {
goto error;
return NULL;
}

self->dict_content = NULL;
self->d_dict = NULL;
self->dict_buffer = NULL;
self->dict_id = 0;
self->lock = (PyMutex){0};

Expand All @@ -62,39 +70,26 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content,
goto error;
}

/* Check dict_content's type */
self->dict_content = PyBytes_FromObject(dict_content);
if (self->dict_content == NULL) {
PyErr_SetString(PyExc_TypeError,
"dict_content argument should be bytes-like object.");
goto error;
}

/* Both ordinary dictionary and "raw content" dictionary should
at least 8 bytes */
if (Py_SIZE(self->dict_content) < 8) {
PyErr_SetString(PyExc_ValueError,
"Zstandard dictionary content should at least "
"8 bytes.");
self->dict_buffer = PyMem_Malloc(dict_content->len);
if (!self->dict_buffer) {
PyErr_NoMemory();
goto error;
}
memcpy(self->dict_buffer, dict_content->buf, dict_content->len);
self->dict_len = dict_content->len;

/* Get dict_id, 0 means "raw content" dictionary. */
self->dict_id = ZSTD_getDictID_fromDict(
PyBytes_AS_STRING(self->dict_content),
Py_SIZE(self->dict_content));
self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len);

/* Check validity for ordinary dictionary */
if (!is_raw && self->dict_id == 0) {
char *msg = "Invalid Zstandard dictionary and is_raw not set.\n";
PyErr_SetString(PyExc_ValueError, msg);
PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary");
goto error;
}

// Can only track self once self->dict_content is included
PyObject_GC_Track(self);

return (PyObject*)self;
return (PyObject *)self;

error:
Py_XDECREF(self);
Expand All @@ -115,12 +110,12 @@ ZstdDict_dealloc(PyObject *ob)

assert(!PyMutex_IsLocked(&self->lock));

/* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */
Py_CLEAR(self->dict_content);
/* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */
PyMem_Free(self->dict_buffer);
Py_CLEAR(self->c_dicts);

PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_Del(ob);
tp->tp_free(self);
Py_DECREF(tp);
}

Expand All @@ -131,25 +126,33 @@ PyDoc_STRVAR(ZstdDict_dictid_doc,
"The special value '0' means a 'raw content' dictionary,"
"without any restrictions on format or content.");

PyDoc_STRVAR(ZstdDict_dictcontent_doc,
"The content of a Zstandard dictionary, as a bytes object.");

static PyObject *
ZstdDict_str(PyObject *ob)
ZstdDict_repr(PyObject *ob)
{
ZstdDict *dict = ZstdDict_CAST(ob);
return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>",
dict->dict_id, Py_SIZE(dict->dict_content));
(unsigned int)dict->dict_id, dict->dict_len);
}

static PyMemberDef ZstdDict_members[] = {
{"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY,
ZstdDict_dictid_doc},
{"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content),
Py_READONLY, ZstdDict_dictcontent_doc},
{"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc},
{NULL}
};

/*[clinic input]
@getter
_zstd.ZstdDict.dict_content

The content of a Zstandard dictionary, as a bytes object.
[clinic start generated code]*/

static PyObject *
_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self)
/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/
{
return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len);
}

/*[clinic input]
@getter
_zstd.ZstdDict.as_digested_dict
Expand Down Expand Up @@ -219,6 +222,7 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self)
}

static PyGetSetDef ZstdDict_getset[] = {
_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF
_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
Expand All @@ -229,24 +233,22 @@ static Py_ssize_t
ZstdDict_length(PyObject *ob)
{
ZstdDict *self = ZstdDict_CAST(ob);
assert(PyBytes_Check(self->dict_content));
return Py_SIZE(self->dict_content);
return self->dict_len;
}

static int
ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg)
{
ZstdDict *self = ZstdDict_CAST(ob);
Py_VISIT(self->c_dicts);
Py_VISIT(self->dict_content);
return 0;
}

static int
ZstdDict_clear(PyObject *ob)
{
ZstdDict *self = ZstdDict_CAST(ob);
Py_CLEAR(self->dict_content);
Py_CLEAR(self->c_dicts);
return 0;
}

Expand All @@ -255,7 +257,7 @@ static PyType_Slot zstddict_slots[] = {
{Py_tp_getset, ZstdDict_getset},
{Py_tp_new, _zstd_ZstdDict_new},
{Py_tp_dealloc, ZstdDict_dealloc},
{Py_tp_str, ZstdDict_str},
{Py_tp_repr, ZstdDict_repr},
{Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__},
{Py_sq_length, ZstdDict_length},
{Py_tp_traverse, ZstdDict_traverse},
Expand Down
6 changes: 4 additions & 2 deletions Modules/_zstd/zstddict.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ typedef struct {
ZSTD_DDict *d_dict;
PyObject *c_dicts;

/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary content. */
char *dict_buffer;
Py_ssize_t dict_len;

/* Dictionary id */
uint32_t dict_id;

Expand Down
Loading