diff --git a/.flake8 b/.flake8 index e6b17013..27acc6a7 100644 --- a/.flake8 +++ b/.flake8 @@ -12,3 +12,4 @@ exclude = fixture, notebooks, numcodecs.egg-info, + numcodecs/version.py, diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index bccd9160..9c117cc3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ Contributing ============ -Please see the [project documentation](http://numcodecs.readthedocs.io/en/stable/contributing.html) for information about contributing to NumCodecs. +Please see the [project documentation](https://numcodecs.readthedocs.io/en/stable/contributing.html) for information about contributing to NumCodecs. diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index bd617ba6..b5d1571a 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -2,17 +2,21 @@ name: Linux CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -46,7 +50,7 @@ jobs: run: | conda activate env export DISABLE_NUMCODECS_AVX2="" - python -m pip install -v -e .[test,msgpack,zfpy] + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index d5fd3c78..db09a12e 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -2,17 +2,21 @@ name: OSX CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: macos-latest strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -45,8 +49,8 @@ jobs: shell: "bash -l {0}" run: | conda activate env - export CC=clang - python -m pip install -v -e .[test,msgpack,zfpy] + export DISABLE_NUMCODECS_AVX2="" + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 64b70938..201a2ac4 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -2,17 +2,21 @@ name: Windows CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: windows-latest strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -38,7 +42,7 @@ jobs: shell: "bash -l {0}" run: | conda activate env - python -m pip install -v -e .[test,msgpack,zfpy] + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2d5a1fdf..7ae21d34 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 39ed5353..69dd297b 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -2,6 +2,10 @@ name: Wheels on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build_wheels: name: Build wheel on ${{ matrix.os }} @@ -11,18 +15,18 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] env: - CIBW_TEST_COMMAND: pytest --pyargs numcodecs - CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: python -c "import numcodecs" CIBW_SKIP: "pp* cp36-* *-musllinux_* *win32 *_i686 *_s390x" - CIBW_ENVIRONMENT: "DISABLE_NUMCODECS_AVX2=1" - CIBW_ENVIRONMENT_MACOS: 'MACOSX_DEPLOYMENT_TARGET=10.9 DISABLE_NUMCODECS_AVX2=1 CFLAGS="$CFLAGS -Wno-implicit-function-declaration"' - + CIBW_ARCHS_MACOS: 'x86_64 arm64' + CIBW_TEST_SKIP: '*-macosx_arm64' + # note: CIBW_ENVIRONMENT is now set in pyproject.toml + steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true - - uses: pypa/cibuildwheel@v2.11.3 + - uses: pypa/cibuildwheel@v2.16.2 - uses: actions/upload-artifact@v3 with: @@ -33,7 +37,7 @@ jobs: name: Build source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 984ffc46..1405493c 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,3 +17,5 @@ python: path: . extra_requirements: - docs + - msgpack + - zfpy diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 93175dd6..b7395b1c 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -40,7 +40,7 @@ Project maintainers who do not follow or enforce the Code of Conduct in good fai ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct/][version] -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +[homepage]: https://www.contributor-covenant.org/ +[version]: https://www.contributor-covenant.org/version/1/4/code-of-conduct/ diff --git a/README.rst b/README.rst index 9850c2e7..c9e58f52 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ Numcodecs is a Python package providing buffer compression and transformation codecs for use in data storage and communication applications. .. image:: https://readthedocs.org/projects/numcodecs/badge/?version=latest - :target: http://numcodecs.readthedocs.io/en/latest/?badge=latest + :target: https://numcodecs.readthedocs.io/en/latest/?badge=latest .. image:: https://github.com/zarr-developers/numcodecs/workflows/Linux%20CI/badge.svg?branch=main :target: https://github.com/zarr-developers/numcodecs/actions?query=workflow%3A%22Linux+CI%22 diff --git a/adhoc/blosc_memleak_check.py b/adhoc/blosc_memleak_check.py index 6f38967f..3a875449 100644 --- a/adhoc/blosc_memleak_check.py +++ b/adhoc/blosc_memleak_check.py @@ -1,13 +1,11 @@ import sys - -import numcodecs as codecs -from numcodecs import blosc +import numcodecs import numpy as np from numpy.testing import assert_array_equal -codec = codecs.Blosc() +codec = numcodecs.Blosc() data = np.arange(int(sys.argv[1])) for i in range(int(sys.argv[2])): enc = codec.encode(data) diff --git a/docs/Makefile b/docs/Makefile index fe6a0bc4..19bcf031 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,10 +7,12 @@ SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) -endif +.PHONY: sphinx +sphinx: + # User-friendly check for sphinx-build + ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) + endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 @@ -54,44 +56,44 @@ clean: rm -rf $(BUILDDIR)/* .PHONY: html -html: +html: sphinx $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml -dirhtml: +dirhtml: sphinx $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml -singlehtml: +singlehtml: sphinx $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle -pickle: +pickle: sphinx $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json -json: +json: sphinx $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp -htmlhelp: +htmlhelp: sphinx $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp -qthelp: +qthelp: sphinx $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -101,7 +103,7 @@ qthelp: @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" .PHONY: applehelp -applehelp: +applehelp: sphinx $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @@ -110,7 +112,7 @@ applehelp: "bundle." .PHONY: devhelp -devhelp: +devhelp: sphinx $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @@ -120,19 +122,19 @@ devhelp: @echo "# devhelp" .PHONY: epub -epub: +epub: sphinx $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 -epub3: +epub3: sphinx $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex -latex: +latex: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @@ -140,33 +142,33 @@ latex: "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf -latexpdf: +latexpdf: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja -latexpdfja: +latexpdfja: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text -text: +text: sphinx $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man -man: +man: sphinx $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo -texinfo: +texinfo: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @@ -174,57 +176,58 @@ texinfo: "(use \`make info' here to do that automatically)." .PHONY: info -info: +info: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext -gettext: +gettext: sphinx $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes -changes: +changes: sphinx $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck -linkcheck: +linkcheck: sphinx $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest -doctest: +doctest: sphinx $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage -coverage: +coverage: sphinx $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml -xml: +xml: sphinx $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml -pseudoxml: +pseudoxml: sphinx $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy -dummy: +dummy: sphinx $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." + diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 1d5522e2..5b2013f8 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -22,3 +22,24 @@ Adler32 .. automethod:: decode .. automethod:: get_config .. automethod:: from_config + + +Fletcher32 +---------- + +.. autoclass:: numcodecs.fletcher32.Fletcher32 + + .. autoattribute:: codec_id + .. automethod:: encode + .. automethod:: decode + +JenkinsLookup3 +-------------- + +.. autoclass:: JenkinsLookup3 + + .. autoattribute:: codec_id + .. autoattribute:: initval + .. autoattribute:: prefix + .. automethod:: encode + .. automethod:: decode diff --git a/docs/conf.py b/docs/conf.py index e2922fef..fba84bc7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # numcodecs documentation build configuration file, created by # sphinx-quickstart on Mon May 2 21:40:09 2016. diff --git a/docs/index.rst b/docs/index.rst index db9f935d..86a185f1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,9 +11,8 @@ Installation ------------ Numcodecs depends on NumPy. It is generally best to `install NumPy -`_ first using -whatever method is most appropriate for you operating system and -Python distribution. +`_ first using whatever method is most +appropriate for you operating system and Python distribution. Install from PyPI:: @@ -29,28 +28,35 @@ library. Wheels are available for most platforms. Installing a wheel or via conda will install a pre-compiled binary distribution. However, if you have a newer CPU that supports the AVX2 instruction set (e.g., Intel Haswell, Broadwell or Skylake) then installing via pip is preferable, -because this will compile the Blosc library from source with optimisations -for AVX2. +because you can compile the Blosc library from source with optimisations +for AVX2.:: + + $ pip install -v --no-cache-dir --no-binary numcodecs numcodecs Note that if you compile the C extensions on a machine with AVX2 support you probably then cannot use the same binaries on a machine without AVX2. -To disable compilation with AVX2 support regardless of the machine -architecture:: - $ export DISABLE_NUMCODECS_AVX2= - $ pip install -v --no-cache-dir --no-binary numcodecs numcodecs +If you specifically want to disable AVX2 or SSE2 when compiling, you can use +the following environment variables:: -To work with Numcodecs source code in development, install from GitHub:: + $ export DISABLE_NUMCODECS_AVX2=1 + $ export DISABLE_NUMCODECS_SSE2=1 + + +To work with Numcodecs source code in development, clone the repository from GitHub +and then install in editable mode using `pip`.:: $ git clone --recursive https://github.com/zarr-developers/numcodecs.git $ cd numcodecs - $ python setup.py install + $ pip install -e .[test,msgpack,zfpy] + +Note: if you prefer to use the GitHub CLI ``gh`` you will need to append ``-- --recurse-submodules`` +to the clone command to everything works properly. To verify that Numcodecs has been fully installed (including the Blosc extension) run the test suite:: - $ pip install nose - $ python -m nose -v numcodecs + $ pytest -v Contents -------- @@ -105,7 +111,7 @@ documentation, code reviews, comments and/or ideas: Numcodecs bundles the `c-blosc `_ library. Development of this package is supported by the -`MRC Centre for Genomics and Global Health `_. +`MRC Centre for Genomics and Global Health `_. Indices and tables ------------------ @@ -114,4 +120,4 @@ Indices and tables * :ref:`modindex` * :ref:`search` -.. _Blosc: http://www.blosc.org/ +.. _Blosc: https://www.blosc.org/ diff --git a/docs/release.rst b/docs/release.rst index 60159c7c..2ec8e1b9 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,49 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. +.. _release_0.12.1: + +0.12.1 +------ + +Fix +~~~ + +* ``Codec`` is now derived from ``abc.ABC`` + By :user:`Mads R. B. Kristensen `, :issue:`472`. +* Fix handling of entry points on older Python versions where ``importlib_metadata`` compatibility is concerned + By :user:`Vyas Ramasubramani `, :issue:`478`. +* Make shuffle pyx functions ``noexcept`` + By :user:`Martin Durant `, :issue:`477`. + +.. _release_0.12.0: + +0.12.0 +------ + +Enhancements +~~~~~~~~~~~~ + +* Add ``fletcher32`` checksum codec + By :user:`Martin Durant `, :issue:`410`. +* Add ``jenkins_lookup3`` checksum codec + By :user:`Mark Kittisopkul `, :issue:`445`. +* Support Python 3.12. + By :user:`John Kirkham `, :issue:`471`. + +Fix +~~~ + +* Fixed docs/Makefile error message when sphinx is not present + By :user:`Mark Kittisopikul `, :issue:`451`. +* Add missing RTD requirements + By :user:`John Kirkham `, :issue:`455`. + +Maintenance +~~~~~~~~~~~ + +* Cleanup ``import``\ s in ``adhoc/blosc_memleak_check.py`` + By :user:`John Kirkham `, :issue:`408`. .. _release_0.11.0: @@ -669,7 +712,7 @@ Fixed project description in setup.py. ----- First release. This version is a port of the ``codecs`` module from `Zarr -`_ 2.1.0. The following changes have been made from +`_ 2.1.0. The following changes have been made from the original Zarr module: * Codec classes have been re-organized into separate modules, mostly one per diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 53f3e795..3d7befe2 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -98,9 +98,10 @@ from numcodecs.msgpacks import MsgPack register_codec(MsgPack) -from numcodecs.checksum32 import CRC32, Adler32 +from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3 register_codec(CRC32) register_codec(Adler32) +register_codec(JenkinsLookup3) from numcodecs.json import JSON register_codec(JSON) @@ -111,3 +112,6 @@ register_codec(VLenUTF8) register_codec(VLenBytes) register_codec(VLenArray) + +from numcodecs.fletcher32 import Fletcher32 +register_codec(Fletcher32) diff --git a/numcodecs/_shuffle.pyx b/numcodecs/_shuffle.pyx index 308ea491..0f0dafeb 100644 --- a/numcodecs/_shuffle.pyx +++ b/numcodecs/_shuffle.pyx @@ -8,7 +8,7 @@ cimport cython @cython.boundscheck(False) @cython.wraparound(False) -cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) nogil: +cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) noexcept nogil: cdef Py_ssize_t count, i, j, offset, byte_index count = len(src) // element_size for i in range(count): @@ -20,7 +20,7 @@ cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_s @cython.boundscheck(False) @cython.wraparound(False) -cpdef void _doUnshuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) nogil: +cpdef void _doUnshuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) noexcept nogil: cdef Py_ssize_t count, i, j, offset, byte_index count = len(src) // element_size for i in range(element_size): diff --git a/numcodecs/abc.py b/numcodecs/abc.py index 4aa9c1a4..703ba037 100644 --- a/numcodecs/abc.py +++ b/numcodecs/abc.py @@ -29,10 +29,10 @@ """ -from abc import abstractmethod +from abc import ABC, abstractmethod -class Codec: +class Codec(ABC): """Codec abstract base class.""" # override in sub-class diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index 06dfbdb4..35a5ab99 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -2,10 +2,12 @@ import numpy as np +import struct from .abc import Codec from .compat import ensure_contiguous_ndarray, ndarray_copy +from .jenkins import jenkins_lookup3 class Checksum32(Codec): @@ -40,3 +42,58 @@ class Adler32(Checksum32): codec_id = 'adler32' checksum = zlib.adler32 + + +class JenkinsLookup3(Checksum32): + """Bob Jenkin's lookup3 checksum with 32-bit output + + This is the HDF5 implementation. + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + + Attributes: + initval: initial seed passed to the hash algorithm, default: 0 + prefix: bytes prepended to the buffer before evaluating the hash, default: None + """ + + checksum = jenkins_lookup3 + codec_id = "jenkins_lookup3" + + def __init__(self, initval: int = 0, prefix=None): + self.initval = initval + if prefix is None: + self.prefix = None + else: + self.prefix = np.frombuffer(prefix, dtype='uint8') + + def encode(self, buf): + """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + if self.prefix is None: + val = jenkins_lookup3(buf, self.initval) + else: + val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval) + return buf.tobytes() + struct.pack(" 360 else len + len -= tlen + while True: + sum1 += ((data[0]) << 8) | (data[1]) + data += 2 + sum2 += sum1 + tlen -= 1 + if tlen < 1: + break + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + if _len % 2: + sum1 += (((data[0])) << 8) + sum2 += sum1 + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + return (sum2 << 16) | sum1 + + +class Fletcher32(Codec): + """The fletcher checksum with 16-bit words and 32-bit output + + This is the netCDF4/HED5 implementation, which is not equivalent + to the one in wikipedia + https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + """ + + codec_id = "fletcher32" + + def encode(self, buf): + """Return buffer plus 4-byte fletcher checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + cdef const uint8_t[::1] b_ptr = buf + val = _fletcher32(b_ptr) + return buf.tobytes() + struct.pack("0xdeadbeef) + (length) + initval + + # Return immediately for empty bytes + if length == 0: + return c + + cdef: + const uint8_t *k = &_data[0] + + # We are adding uint32_t values (words) byte by byte so we do not assume endianness or alignment + # lookup3.c hashlittle checks for alignment + + # all but the last block: affect some 32 bits of (a,b,c) + while length > 12: + a += k[0] + a += (k[1]) << 8 + a += (k[2]) << 16 + a += (k[3]) << 24 + b += k[4] + b += (k[5]) << 8 + b += (k[6]) << 16 + b += (k[7]) << 24 + c += k[8] + c += (k[9]) << 8 + c += (k[10]) << 16 + c += (k[11]) << 24 + a, b, c = _jenkins_lookup3_mix(a, b, c) + length -= 12 + k += 12 + + # -------------------------------- last block: affect all 32 bits of (c) + if length == 12: + c += (k[11]) << 24 + length -= 1 + + if length == 11: + c += (k[10]) << 16 + length -= 1 + + if length == 10: + c += (k[9]) << 8 + length -= 1 + + if length == 9: + c += k[8] + length -= 1 + + if length == 8: + b += (k[7]) << 24 + length -= 1 + + if length == 7: + b += (k[6]) << 16 + length -= 1 + + if length == 6: + b += (k[5]) << 8 + length -= 1 + + if length == 5: + b += k[4] + length -= 1 + + if length == 4: + a += (k[3]) << 24 + length -= 1 + + if length == 3: + a += (k[2]) << 16 + length -= 1 + + if length == 2: + a += (k[1]) << 8 + length -= 1 + + if length == 1: + a += k[0] + length -= 1 + + if length == 0: + pass + + return _jenkins_lookup3_final(a, b, c) + +cdef inline uint32_t _jenkins_lookup3_final(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c + + Pairs of (a,b,c) values differing in only a few bits will usually + produce values of c that look totally different. This was tested for + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 + and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 + """ + c ^= b + c -= _jenkins_lookup3_rot(b,14) + a ^= c + a -= _jenkins_lookup3_rot(c,11) + b ^= a + b -= _jenkins_lookup3_rot(a,25) + c ^= b + c -= _jenkins_lookup3_rot(b,16) + a ^= c + a -= _jenkins_lookup3_rot(c,4) + b ^= a + b -= _jenkins_lookup3_rot(a,14) + c ^= b + c -= _jenkins_lookup3_rot(b,24) + return c + +cdef inline uint32_t _jenkins_lookup3_rot(uint32_t x, uint8_t k): + return (((x) << (k)) ^ ((x) >> (32 - (k)))) + +cdef inline (uint32_t, uint32_t, uint32_t) _jenkins_lookup3_mix(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_mix -- mix 3 32-bit values reversibly. + + This is reversible, so any information in (a,b,c) before mix() is + still in (a,b,c) after mix(). + + If four pairs of (a,b,c) inputs are run through mix(), or through + mix() in reverse, there are at least 32 bits of the output that + are sometimes the same for one pair and different for another pair. + This was tested for: + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 + Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing + for "differ" defined as + with a one-bit base and a two-bit delta. I + used http://burtleburtle.net/bob/hash/avalanche.html to choose + the operations, constants, and arrangements of the variables. + + This does not achieve avalanche. There are input bits of (a,b,c) + that fail to affect some output bits of (a,b,c), especially of a. The + most thoroughly mixed value is c, but it doesn't really even achieve + avalanche in c. + + This allows some parallelism. Read-after-writes are good at doubling + the number of bits affected, so the goal of mixing pulls in the opposite + direction as the goal of parallelism. I did what I could. Rotates + seem to cost as much as shifts on every machine I could lay my hands + on, and rotates are much kinder to the top and bottom bits, so I used + rotates. + """ + a -= c + a ^= _jenkins_lookup3_rot(c, 4) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 6) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 8) + b += a + a -= c + a ^= _jenkins_lookup3_rot(c, 16) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 19) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 4) + b += a + return a, b, c + + diff --git a/numcodecs/json.py b/numcodecs/json.py index 670f2235..b803a77b 100644 --- a/numcodecs/json.py +++ b/numcodecs/json.py @@ -54,7 +54,10 @@ def __init__(self, encoding='utf-8', skipkeys=False, ensure_ascii=True, self._decoder = _json.JSONDecoder(**self._decoder_config) def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return self._encoder.encode(items).encode(self._text_encoding) diff --git a/numcodecs/msgpacks.py b/numcodecs/msgpacks.py index 026f583a..65564984 100644 --- a/numcodecs/msgpacks.py +++ b/numcodecs/msgpacks.py @@ -52,7 +52,10 @@ def __init__(self, use_single_float=False, use_bin_type=True, raw=False): self.raw = raw def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return msgpack.packb(items, use_bin_type=self.use_bin_type, diff --git a/numcodecs/registry.py b/numcodecs/registry.py index 532e9967..d0cd0748 100644 --- a/numcodecs/registry.py +++ b/numcodecs/registry.py @@ -1,7 +1,7 @@ """The registry module provides some simple convenience functions to enable applications to dynamically register and look-up codec classes.""" +from importlib.metadata import entry_points import logging -from contextlib import suppress logger = logging.getLogger("numcodecs") codec_registry = {} @@ -9,13 +9,17 @@ def run_entrypoints(): - import entrypoints entries.clear() - entries.update(entrypoints.get_group_named("numcodecs.codecs")) + eps = entry_points() + if hasattr(eps, 'select'): + # If entry_points() has a select method, use that. Python 3.10+ + entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")}) + else: + # Otherwise, fallback to using get + entries.update({e.name: e for e in eps.get("numcodecs.codecs", [])}) -with suppress(ImportError): - run_entrypoints() +run_entrypoints() def get_codec(config): diff --git a/numcodecs/tests/test_entrypoints.py b/numcodecs/tests/test_entrypoints.py index 81af635d..2923ac22 100644 --- a/numcodecs/tests/test_entrypoints.py +++ b/numcodecs/tests/test_entrypoints.py @@ -7,7 +7,6 @@ here = os.path.abspath(os.path.dirname(__file__)) -pytest.importorskip("entrypoints") @pytest.fixture() @@ -20,7 +19,7 @@ def set_path(): numcodecs.registry.codec_registry.pop("test") -@pytest.mark.xfail(reason="FIXME: not working in wheels build") -def test_entrypoint_codec(set_path): +@pytest.mark.usefixtures("set_path") +def test_entrypoint_codec(): cls = numcodecs.registry.get_codec({"id": "test"}) assert cls.codec_id == "test" diff --git a/numcodecs/tests/test_entrypoints_backport.py b/numcodecs/tests/test_entrypoints_backport.py new file mode 100644 index 00000000..4e0459e5 --- /dev/null +++ b/numcodecs/tests/test_entrypoints_backport.py @@ -0,0 +1,32 @@ +import os.path +import pkgutil +import sys + +import pytest + +from multiprocessing import Process + +import numcodecs.registry + +if not pkgutil.find_loader("importlib_metadata"): # pragma: no cover + pytest.skip("This test module requires importlib_metadata to be installed") + +here = os.path.abspath(os.path.dirname(__file__)) + + +def get_entrypoints_with_importlib_metadata_loaded(): + # importlib_metadata patches importlib.metadata, which can lead to breaking changes + # to the APIs of EntryPoint objects used when registering entrypoints. Attempt to + # isolate those changes to just this test. + import importlib_metadata # noqa: F401 + sys.path.append(here) + numcodecs.registry.run_entrypoints() + cls = numcodecs.registry.get_codec({"id": "test"}) + assert cls.codec_id == "test" + + +def test_entrypoint_codec_with_importlib_metadata(): + p = Process(target=get_entrypoints_with_importlib_metadata_loaded) + p.start() + p.join() + assert p.exitcode == 0 diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py new file mode 100644 index 00000000..aa4ca1ab --- /dev/null +++ b/numcodecs/tests/test_fletcher32.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest + +from numcodecs.fletcher32 import Fletcher32 + + +@pytest.mark.parametrize( + "dtype", + ["uint8", "int32", "float32"] +) +def test_with_data(dtype): + data = np.arange(100, dtype=dtype) + f = Fletcher32() + arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype) + assert (arr == data).all() + + +def test_error(): + data = np.arange(100) + f = Fletcher32() + enc = f.encode(data) + enc2 = bytearray(enc) + enc2[0] += 1 + with pytest.raises(RuntimeError) as e: + f.decode(enc2) + assert "fletcher32 checksum" in str(e.value) + + +def test_known(): + data = ( + b'w\x07\x00\x00\x00\x00\x00\x00\x85\xf6\xff\xff\xff\xff\xff\xff' + b'i\x07\x00\x00\x00\x00\x00\x00\x94\xf6\xff\xff\xff\xff\xff\xff' + b'\x88\t\x00\x00\x00\x00\x00\x00i\x03\x00\x00\x00\x00\x00\x00' + b'\x93\xfd\xff\xff\xff\xff\xff\xff\xc3\xfc\xff\xff\xff\xff\xff\xff' + b"'\x02\x00\x00\x00\x00\x00\x00\xba\xf7\xff\xff\xff\xff\xff\xff" + b'\xfd%\x86d') + data3 = Fletcher32().decode(data) + outarr = np.frombuffer(data3, dtype="=1.7", ] requires-python = ">=3.8" @@ -47,7 +45,7 @@ Homepage = "https://github.com/zarr-developers/numcodecs" [project.optional-dependencies] docs = [ - "sphinx", + "sphinx<7.0.0", "sphinx-issues", "numpydoc", "mock", @@ -58,6 +56,9 @@ test = [ "pytest", "pytest-cov", ] +test_extras = [ + "importlib_metadata", +] msgpack = [ "msgpack", ] @@ -71,6 +72,12 @@ package-dir = {"" = "."} packages = ["numcodecs", "numcodecs.tests"] zip-safe = false +[tool.setuptools.package-data] +numcodecs = [ + "tests/package_with_entrypoint/__init__.py", + "tests/package_with_entrypoint-0.1.dist-info/entry_points.txt" +] + [tool.setuptools_scm] version_scheme = "guess-next-dev" local_scheme = "dirty-tag" @@ -108,3 +115,10 @@ norecursedirs = [ "notebooks", "numcodecs.egg-info", ] +[tool.cibuildwheel] +environment = { DISABLE_NUMCODECS_AVX2=1 } +[tool.cibuildwheel.macos] +environment = { MACOSX_DEPLOYMENT_TARGET=10.9, DISABLE_NUMCODECS_AVX2=1, CFLAGS="$CFLAGS -Wno-implicit-function-declaration" } +[[tool.cibuildwheel.overrides]] +select = "*-macosx_arm64" +environment = { DISABLE_NUMCODECS_AVX2=1, DISABLE_NUMCODECS_SSE2=1 } diff --git a/setup.py b/setup.py index cf950f40..f07cf8d4 100644 --- a/setup.py +++ b/setup.py @@ -198,6 +198,56 @@ def vlen_extension(): return extensions +def fletcher_extension(): + info('setting up fletcher32 extension') + + extra_compile_args = base_compile_args.copy() + define_macros = [] + + # setup sources + include_dirs = ['numcodecs'] + # define_macros += [('CYTHON_TRACE', '1')] + + sources = ['numcodecs/fletcher32.pyx'] + + # define extension module + extensions = [ + Extension('numcodecs.fletcher32', + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ), + ] + + return extensions + + +def jenkins_extension(): + info('setting up jenkins extension') + + extra_compile_args = base_compile_args.copy() + define_macros = [] + + # setup sources + include_dirs = ['numcodecs'] + define_macros += [('CYTHON_TRACE', '1')] + + sources = ['numcodecs/jenkins.pyx'] + + # define extension module + extensions = [ + Extension('numcodecs.jenkins', + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ), + ] + + return extensions + + def compat_extension(): info('setting up compat extension') @@ -265,7 +315,8 @@ def run_setup(with_extensions): if with_extensions: ext_modules = (blosc_extension() + zstd_extension() + lz4_extension() + - compat_extension() + shuffle_extension() + vlen_extension()) + compat_extension() + shuffle_extension() + vlen_extension() + + fletcher_extension() + jenkins_extension()) cmdclass = dict(build_ext=ve_build_ext) else: