Skip to content

gh-132983: Clean-ups for _zstd #133670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 40 additions & 34 deletions Lib/compression/zstd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@

import _zstd
import enum
from _zstd import *
from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
get_frame_size, zstd_version)
from compression.zstd._zstdfile import ZstdFile, open, _nbytes

COMPRESSION_LEVEL_DEFAULT = _zstd._compressionLevel_values[0]
# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
_zstd.zstd_version_number % 100)
"""Version number of the runtime zstd library as a tuple of integers."""

COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
"""The default compression level for Zstandard, currently '3'."""


Expand Down Expand Up @@ -167,28 +173,28 @@ def decompress(data, zstd_dict=None, options=None):
class CompressionParameter(enum.IntEnum):
"""Compression parameters."""

compression_level = _zstd._ZSTD_c_compressionLevel
window_log = _zstd._ZSTD_c_windowLog
hash_log = _zstd._ZSTD_c_hashLog
chain_log = _zstd._ZSTD_c_chainLog
search_log = _zstd._ZSTD_c_searchLog
min_match = _zstd._ZSTD_c_minMatch
target_length = _zstd._ZSTD_c_targetLength
strategy = _zstd._ZSTD_c_strategy

enable_long_distance_matching = _zstd._ZSTD_c_enableLongDistanceMatching
ldm_hash_log = _zstd._ZSTD_c_ldmHashLog
ldm_min_match = _zstd._ZSTD_c_ldmMinMatch
ldm_bucket_size_log = _zstd._ZSTD_c_ldmBucketSizeLog
ldm_hash_rate_log = _zstd._ZSTD_c_ldmHashRateLog

content_size_flag = _zstd._ZSTD_c_contentSizeFlag
checksum_flag = _zstd._ZSTD_c_checksumFlag
dict_id_flag = _zstd._ZSTD_c_dictIDFlag

nb_workers = _zstd._ZSTD_c_nbWorkers
job_size = _zstd._ZSTD_c_jobSize
overlap_log = _zstd._ZSTD_c_overlapLog
compression_level = _zstd.ZSTD_c_compressionLevel
window_log = _zstd.ZSTD_c_windowLog
hash_log = _zstd.ZSTD_c_hashLog
chain_log = _zstd.ZSTD_c_chainLog
search_log = _zstd.ZSTD_c_searchLog
min_match = _zstd.ZSTD_c_minMatch
target_length = _zstd.ZSTD_c_targetLength
strategy = _zstd.ZSTD_c_strategy

enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog

content_size_flag = _zstd.ZSTD_c_contentSizeFlag
checksum_flag = _zstd.ZSTD_c_checksumFlag
dict_id_flag = _zstd.ZSTD_c_dictIDFlag

nb_workers = _zstd.ZSTD_c_nbWorkers
job_size = _zstd.ZSTD_c_jobSize
overlap_log = _zstd.ZSTD_c_overlapLog

def bounds(self):
"""Return the (lower, upper) int bounds of a compression parameter.
Expand All @@ -201,7 +207,7 @@ def bounds(self):
class DecompressionParameter(enum.IntEnum):
"""Decompression parameters."""

window_log_max = _zstd._ZSTD_d_windowLogMax
window_log_max = _zstd.ZSTD_d_windowLogMax

def bounds(self):
"""Return the (lower, upper) int bounds of a decompression parameter.
Expand All @@ -219,15 +225,15 @@ class Strategy(enum.IntEnum):
the numeric value might change.
"""

fast = _zstd._ZSTD_fast
dfast = _zstd._ZSTD_dfast
greedy = _zstd._ZSTD_greedy
lazy = _zstd._ZSTD_lazy
lazy2 = _zstd._ZSTD_lazy2
btlazy2 = _zstd._ZSTD_btlazy2
btopt = _zstd._ZSTD_btopt
btultra = _zstd._ZSTD_btultra
btultra2 = _zstd._ZSTD_btultra2
fast = _zstd.ZSTD_fast
dfast = _zstd.ZSTD_dfast
greedy = _zstd.ZSTD_greedy
lazy = _zstd.ZSTD_lazy
lazy2 = _zstd.ZSTD_lazy2
btlazy2 = _zstd.ZSTD_btlazy2
btopt = _zstd.ZSTD_btopt
btultra = _zstd.ZSTD_btultra
btultra2 = _zstd.ZSTD_btultra2


# Check validity of the CompressionParameter & DecompressionParameter types
Expand Down
8 changes: 3 additions & 5 deletions Lib/compression/zstd/_zstdfile.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import io
from os import PathLike
from _zstd import (ZstdCompressor, ZstdDecompressor, _ZSTD_DStreamSizes,
ZstdError)
from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdError,
ZSTD_DStreamOutSize)
from compression._common import _streams

__all__ = ("ZstdFile", "open")

_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1]

_MODE_CLOSED = 0
_MODE_READ = 1
_MODE_WRITE = 2
Expand Down Expand Up @@ -188,7 +186,7 @@ def read1(self, size=-1):
# Note this should *not* be io.DEFAULT_BUFFER_SIZE.
# ZSTD_DStreamOutSize is the minimum amount to read guaranteeing
# a full block is read.
size = _ZSTD_DStreamOutSize
size = ZSTD_DStreamOutSize
return self._buffer.read1(size)

def readinto(self, b):
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_zstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_compress_parameters(self):
with self.assertRaisesRegex(ZstdError,
(r'Error when setting zstd compression parameter "window_log", '
r'it should \d+ <= value <= \d+, provided value is 100\. '
r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')):
r'\((?:32|64)-bit build\)')):
compress(b'', options=option)

def test_unknown_compression_parameter(self):
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_decompress_parameters(self):
with self.assertRaisesRegex(ZstdError,
(r'Error when setting zstd decompression parameter "window_log_max", '
r'it should \d+ <= value <= \d+, provided value is 100\. '
r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')):
r'\((?:32|64)-bit build\)')):
decompress(b'', options=options)

def test_unknown_decompression_parameter(self):
Expand Down
194 changes: 61 additions & 133 deletions Modules/_zstd/_zstdmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ set_zstd_error(const _zstd_state* const state,
case ERR_COMPRESS:
msg = "Unable to compress zstd data: %s";
break;
case ERR_SET_PLEDGED_INPUT_SIZE:
msg = "Unable to set pledged uncompressed content size: %s";
break;

case ERR_LOAD_D_DICT:
msg = "Unable to load zstd dictionary or prefix for decompression: %s";
Expand Down Expand Up @@ -151,19 +148,19 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
}
if (ZSTD_isError(bounds.error)) {
PyErr_Format(state->ZstdError,
"Zstd %s parameter \"%s\" is invalid. (zstd v%s)",
type, name, ZSTD_versionString());
"Zstd %s parameter \"%s\" is invalid.",
type, name);
return;
}

/* Error message */
PyErr_Format(state->ZstdError,
"Error when setting zstd %s parameter \"%s\", it "
"should %d <= value <= %d, provided value is %d. "
"(zstd v%s, %d-bit build)",
"(%d-bit build)",
type, name,
bounds.lowerBound, bounds.upperBound, value_v,
ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t));
8*(int)sizeof(Py_ssize_t));
}

static inline _zstd_state*
Expand Down Expand Up @@ -558,150 +555,81 @@ static PyMethodDef _zstd_methods[] = {
};


#define ADD_INT_PREFIX_MACRO(module, macro) \
do { \
if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \
return -1; \
} \
} while(0)

static int
add_parameters(PyObject *module)
{
/* If add new parameters, please also add to cp_list/dp_list above. */

/* Compression parameters */
ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy);

ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog);

ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag);

ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog);

/* Decompression parameters */
ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax);

/* ZSTD_strategy enum */
ADD_INT_PREFIX_MACRO(module, ZSTD_fast);
ADD_INT_PREFIX_MACRO(module, ZSTD_dfast);
ADD_INT_PREFIX_MACRO(module, ZSTD_greedy);
ADD_INT_PREFIX_MACRO(module, ZSTD_lazy);
ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2);
ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2);
ADD_INT_PREFIX_MACRO(module, ZSTD_btopt);
ADD_INT_PREFIX_MACRO(module, ZSTD_btultra);
ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2);

return 0;
}

static inline PyObject *
get_zstd_version_info(void)
{
uint32_t ver = ZSTD_versionNumber();
uint32_t major, minor, release;

major = ver / 10000;
minor = (ver / 100) % 100;
release = ver % 100;

return Py_BuildValue("III", major, minor, release);
}

static inline int
add_vars_to_module(PyObject *module)
add_vars_to_module(PyObject *m)
{
PyObject *obj;
#define ADD_INT_MACRO(MACRO) \
if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \
return -1; \
}

/* zstd_version, a str. */
if (PyModule_AddStringConstant(module, "zstd_version",
ZSTD_versionString()) < 0) {
/* zstd_version_number, int */
if (PyModule_AddIntConstant(m, "zstd_version_number",
ZSTD_versionNumber()) < 0) {
return -1;
}

/* zstd_version_info, a tuple. */
obj = get_zstd_version_info();
if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) {
Py_XDECREF(obj);
/* zstd_version, str */
if (PyModule_AddStringConstant(m, "zstd_version",
ZSTD_versionString()) < 0) {
return -1;
}
Py_DECREF(obj);

/* Add zstd parameters */
if (add_parameters(module) < 0) {
/* ZSTD_CLEVEL_DEFAULT, int */
#if ZSTD_VERSION_NUMBER >= 10500
if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT",
ZSTD_defaultCLevel()) < 0) {
return -1;
}

/* _compressionLevel_values: (default, min, max)
ZSTD_defaultCLevel() was added in zstd v1.5.0 */
obj = Py_BuildValue("iii",
#if ZSTD_VERSION_NUMBER < 10500
ZSTD_CLEVEL_DEFAULT,
#else
ZSTD_defaultCLevel(),
ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT);
#endif
ZSTD_minCLevel(),
ZSTD_maxCLevel());
if (PyModule_AddObjectRef(module,
"_compressionLevel_values",
obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);

/* _ZSTD_CStreamSizes */
obj = Py_BuildValue("II",
(uint32_t)ZSTD_CStreamInSize(),
(uint32_t)ZSTD_CStreamOutSize());
if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) {
Py_XDECREF(obj);
/* ZSTD_DStreamOutSize, int */
if (PyModule_Add(m, "ZSTD_DStreamOutSize",
PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) {
return -1;
}
Py_DECREF(obj);

/* _ZSTD_DStreamSizes */
obj = Py_BuildValue("II",
(uint32_t)ZSTD_DStreamInSize(),
(uint32_t)ZSTD_DStreamOutSize());
if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
/* Add zstd compression parameters. All should also be in cp_list. */
ADD_INT_MACRO(ZSTD_c_compressionLevel);
ADD_INT_MACRO(ZSTD_c_windowLog);
ADD_INT_MACRO(ZSTD_c_hashLog);
ADD_INT_MACRO(ZSTD_c_chainLog);
ADD_INT_MACRO(ZSTD_c_searchLog);
ADD_INT_MACRO(ZSTD_c_minMatch);
ADD_INT_MACRO(ZSTD_c_targetLength);
ADD_INT_MACRO(ZSTD_c_strategy);

/* _ZSTD_CONFIG */
obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c",
Py_False,
Py_True,
/* User mremap output buffer */
#if defined(HAVE_MREMAP)
Py_True
#else
Py_False
#endif
);
if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching);
ADD_INT_MACRO(ZSTD_c_ldmHashLog);
ADD_INT_MACRO(ZSTD_c_ldmMinMatch);
ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog);
ADD_INT_MACRO(ZSTD_c_ldmHashRateLog);

ADD_INT_MACRO(ZSTD_c_contentSizeFlag);
ADD_INT_MACRO(ZSTD_c_checksumFlag);
ADD_INT_MACRO(ZSTD_c_dictIDFlag);

ADD_INT_MACRO(ZSTD_c_nbWorkers);
ADD_INT_MACRO(ZSTD_c_jobSize);
ADD_INT_MACRO(ZSTD_c_overlapLog);

/* Add zstd decompression parameters. All should also be in dp_list. */
ADD_INT_MACRO(ZSTD_d_windowLogMax);

/* ZSTD_strategy enum */
ADD_INT_MACRO(ZSTD_fast);
ADD_INT_MACRO(ZSTD_dfast);
ADD_INT_MACRO(ZSTD_greedy);
ADD_INT_MACRO(ZSTD_lazy);
ADD_INT_MACRO(ZSTD_lazy2);
ADD_INT_MACRO(ZSTD_btlazy2);
ADD_INT_MACRO(ZSTD_btopt);
ADD_INT_MACRO(ZSTD_btultra);
ADD_INT_MACRO(ZSTD_btultra2);

#undef ADD_INT_MACRO

return 0;
}
Expand Down
Loading
Loading