Skip to content

gh-132983: Convert dict_content to take Py_buffer #133924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 38 additions & 5 deletions Modules/_zstd/clinic/zstddict.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 6 additions & 10 deletions Modules/_zstd/compressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ _get_CDict(ZstdDict *self, int compressionLevel)
}

/* Create ZSTD_CDict instance */
char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
Py_ssize_t dict_len = Py_SIZE(self->dict_content);
char *dict_buffer = self->dict_buffer;
Py_ssize_t dict_len = self->dict_len;
Py_BEGIN_ALLOW_THREADS
cdict = ZSTD_createCDict(dict_buffer,
dict_len,
Comment on lines +173 to 177
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
char *dict_buffer = self->dict_buffer;
Py_ssize_t dict_len = self->dict_len;
Py_BEGIN_ALLOW_THREADS
cdict = ZSTD_createCDict(dict_buffer,
dict_len,
Py_BEGIN_ALLOW_THREADS
cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len,

Expand Down Expand Up @@ -284,19 +284,15 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict)
/* Load a dictionary.
It doesn't override compression context's parameters. */
Py_BEGIN_CRITICAL_SECTION2(self, zd);
zstd_ret = ZSTD_CCtx_loadDictionary(
self->cctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer,
zd->dict_len);
Py_END_CRITICAL_SECTION2();
}
else if (type == DICT_TYPE_PREFIX) {
/* Load a prefix */
Py_BEGIN_CRITICAL_SECTION2(self, zd);
zstd_ret = ZSTD_CCtx_refPrefix(
self->cctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer,
zd->dict_len);
Py_END_CRITICAL_SECTION2();
}
else {
Expand Down
16 changes: 6 additions & 10 deletions Modules/_zstd/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ _get_DDict(ZstdDict *self)
Py_BEGIN_CRITICAL_SECTION(self);
if (self->d_dict == NULL) {
/* Create ZSTD_DDict instance from dictionary content */
char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
Py_ssize_t dict_len = Py_SIZE(self->dict_content);
char *dict_buffer = self->dict_buffer;
Py_ssize_t dict_len = self->dict_len;
Py_BEGIN_ALLOW_THREADS
self->d_dict = ZSTD_createDDict(dict_buffer,
dict_len);
Comment on lines +67 to 71
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just inline them, as in the code below.

Suggested change
char *dict_buffer = self->dict_buffer;
Py_ssize_t dict_len = self->dict_len;
Py_BEGIN_ALLOW_THREADS
self->d_dict = ZSTD_createDDict(dict_buffer,
dict_len);
Py_BEGIN_ALLOW_THREADS
self->d_dict = ZSTD_createDDict(self->dict_buffer, self->dict_len);

Expand Down Expand Up @@ -213,19 +213,15 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
else if (type == DICT_TYPE_UNDIGESTED) {
/* Load a dictionary */
Py_BEGIN_CRITICAL_SECTION2(self, zd);
zstd_ret = ZSTD_DCtx_loadDictionary(
self->dctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer,
zd->dict_len);
Py_END_CRITICAL_SECTION2();
}
else if (type == DICT_TYPE_PREFIX) {
/* Load a prefix */
Py_BEGIN_CRITICAL_SECTION2(self, zd);
zstd_ret = ZSTD_DCtx_refPrefix(
self->dctx,
PyBytes_AS_STRING(zd->dict_content),
Py_SIZE(zd->dict_content));
zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer,
zd->dict_len);
Py_END_CRITICAL_SECTION2();
}
else {
Expand Down
72 changes: 36 additions & 36 deletions Modules/_zstd/zstddict.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec"
/*[clinic input]
@classmethod
_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new
dict_content: object
dict_content: Py_buffer
The content of a Zstandard dictionary as a bytes-like object.
/
*
Expand All @@ -41,16 +41,15 @@ by multiple ZstdCompressor or ZstdDecompressor objects.
[clinic start generated code]*/

static PyObject *
_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content,
_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content,
int is_raw)
/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/
/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/
{
ZstdDict* self = PyObject_GC_New(ZstdDict, type);
if (self == NULL) {
goto error;
return NULL;
}

self->dict_content = NULL;
self->d_dict = NULL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self->d_dict = NULL;
self->d_dict = NULL;
self->dict_buffer = NULL;

Otherwise self->dict_buffer is not initialized in the destructor.


/* ZSTD_CDict dict */
Expand All @@ -60,24 +59,28 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content,
}

/* Check dict_content's type */
self->dict_content = PyBytes_FromObject(dict_content);
if (self->dict_content == NULL) {
if (dict_content == NULL) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is never NULL.

BTW, overriding arbitrary exception with a TypeError in the current code is wrong.

PyErr_SetString(PyExc_TypeError,
"dict_content argument should be bytes-like object.");
goto error;
}

/* Both ordinary dictionary and "raw content" dictionary should
at least 8 bytes */
if (Py_SIZE(self->dict_content) < 8) {
self->dict_buffer = PyMem_RawMalloc(dict_content->len);
if (!self->dict_buffer) {
return PyErr_NoMemory();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self is leaked here.

}
memcpy(self->dict_buffer, dict_content->buf, dict_content->len);
self->dict_len = dict_content->len;
Comment on lines +68 to +73
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @vstinner, I think RawMalloc is correct here but would be grateful for a review / confirmation please

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like you told the GIL in this code, so you can use PyMem_Malloc() instead. The Raw variant is when you don't hold the GIL.


/* Both ordinary and "raw content" dictionaries must be 8 bytes minimum */
if (self->dict_len < 8) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be checked at the beginning, before any allocations.

PyErr_SetString(PyExc_ValueError,
"Zstandard dictionary content should at least 8 bytes.");
goto error;
}

/* Get dict_id, 0 means "raw content" dictionary. */
self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content),
Py_SIZE(self->dict_content));
self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len);

/* Check validity for ordinary dictionary */
if (!is_raw && self->dict_id == 0) {
Expand All @@ -86,15 +89,12 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content,
goto error;
}

// Can only track self once self->dict_content is included
PyObject_GC_Track(self);

return (PyObject*)self;

error:
if (self != NULL) {
PyObject_GC_Del(self);
}
PyObject_GC_Del(self);
return NULL;
}

Expand All @@ -108,12 +108,12 @@ ZstdDict_dealloc(PyObject *ob)
/* Free ZSTD_DDict instance */
ZSTD_freeDDict(self->d_dict);

/* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */
Py_CLEAR(self->dict_content);
/* Release dict_buffer after Free ZSTD_CDict/ZSTD_DDict instances */
PyMem_RawFree(self->dict_buffer);
Py_CLEAR(self->c_dicts);

PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_Del(ob);
tp->tp_free(self);
Py_DECREF(tp);
}

Expand All @@ -124,23 +124,33 @@ PyDoc_STRVAR(ZstdDict_dictid_doc,
"The special value '0' means a 'raw content' dictionary,"
"without any restrictions on format or content.");

PyDoc_STRVAR(ZstdDict_dictcontent_doc,
"The content of a Zstandard dictionary, as a bytes object.");

static PyObject *
ZstdDict_str(PyObject *ob)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is wrong to implement __str__. __repr__ should be implemented.

{
ZstdDict *dict = ZstdDict_CAST(ob);
return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>",
dict->dict_id, Py_SIZE(dict->dict_content));
dict->dict_id, dict->dict_len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dict->dict_id, dict->dict_len);
(unsigned int)dict->dict_id, dict->dict_len);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dict_id is already a uint32_t, is there a reason for adding this cast?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On every supported platform int is 32-bit, but it is not harm to be pedantic. This just goes to show that we didn't miss it.

}

static PyMemberDef ZstdDict_members[] = {
{"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc},
{"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc},
{NULL}
};

/*[clinic input]
@getter
_zstd.ZstdDict.dict_content

The content of a Zstandard dictionary, as a bytes object.
[clinic start generated code]*/

static PyObject *
_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self)
/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/
{
return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len);
}

/*[clinic input]
@critical_section
@getter
Expand Down Expand Up @@ -207,6 +217,7 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self)
}

static PyGetSetDef ZstdDict_getset[] = {
_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF
_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
Expand All @@ -217,24 +228,14 @@ static Py_ssize_t
ZstdDict_length(PyObject *ob)
{
ZstdDict *self = ZstdDict_CAST(ob);
assert(PyBytes_Check(self->dict_content));
return Py_SIZE(self->dict_content);
return self->dict_len;
}

static int
ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg)
{
ZstdDict *self = ZstdDict_CAST(ob);
Py_VISIT(self->c_dicts);
Py_VISIT(self->dict_content);
return 0;
}

static int
ZstdDict_clear(PyObject *ob)
{
ZstdDict *self = ZstdDict_CAST(ob);
Py_CLEAR(self->dict_content);
return 0;
}

Expand All @@ -247,7 +248,6 @@ static PyType_Slot zstddict_slots[] = {
{Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__},
{Py_sq_length, ZstdDict_length},
{Py_tp_traverse, ZstdDict_traverse},
{Py_tp_clear, ZstdDict_clear},
{0, 0}
};

Expand Down
6 changes: 4 additions & 2 deletions Modules/_zstd/zstddict.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ typedef struct {
ZSTD_DDict *d_dict;
PyObject *c_dicts;

/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary content. */
char *dict_buffer;
Py_ssize_t dict_len;

/* Dictionary id */
uint32_t dict_id;
} ZstdDict;
Expand Down
Loading