diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..b696b926 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,33 @@ +name: docs + +on: ["push", "pull_request"] + +jobs: + docs: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: "pip" + cache-dependency-path: | + requirements.txt + docs/requirements.txt + + - name: Build + run: | + pip install -r requirements.txt + make cython + + - name: Sphinx Documentation Generator + run: | + pip install -r docs/requirements.txt + make docs diff --git a/.github/workflows/black.yaml b/.github/workflows/lint.yaml similarity index 51% rename from .github/workflows/black.yaml rename to .github/workflows/lint.yaml index 0a0a737d..198cf7b5 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/lint.yaml @@ -1,25 +1,22 @@ -name: Black +name: lint on: ["push", "pull_request"] jobs: - black: + lint: # We want to run on external PRs, but not on our own internal PRs as they'll be run # by the push to the branch. if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - architecture: 'x64' - - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + + - name: ruff check + run: | + pipx run ruff check --diff msgpack/ test/ setup.py - - name: Black Code Formatter + - name: ruff format run: | - pip install black==22.3.0 - black -S --diff --check msgpack/ test/ setup.py + pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d01d74cb..60894797 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,26 +9,32 @@ jobs: test: strategy: matrix: - os: [ubuntu-20.04, windows-2022, macos-10.15] - py: ["3.11-dev", "3.10", "3.9", "3.8", "3.7", "3.6"] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] + py: ["3.14-dev", "3.13", "3.12", "3.11", "3.10", "3.9"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.py }} + allow-prereleases: true cache: "pip" - - name: Build + - name: Prepare shell: bash run: | + pip install -U pip pip install -r requirements.txt pytest + + - name: Build + shell: bash + run: | make cython pip install . @@ -42,8 +48,14 @@ jobs: run: | MSGPACK_PUREPYTHON=1 pytest -v test - - name: Publish Wheels to TestPyPI - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 + - name: build packages + shell: bash + run: | + pip install build + python -m build + + - name: upload packages + uses: actions/upload-artifact@v4 with: - password: ${{ secrets.PYPI_API_TOKEN }} + name: dist-${{ matrix.os }}-${{ matrix.py }} + path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d73898c2..12600363 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -1,57 +1,88 @@ -name: Build Wheels +name: Build sdist and Wheels on: push: branches: [main] - create: + release: + types: + - published + workflow_dispatch: jobs: build_wheels: strategy: matrix: - os: [ubuntu-20.04, windows-2022, macos-10.15] + # macos-13 is for intel + os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "macos-13", "macos-latest"] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Set up QEMU - if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v1 - with: - platforms: arm64 - - - name: Set up Python 3.9 - uses: actions/setup-python@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: "3.x" cache: "pip" - - - name: Prepare + - name: Cythonize shell: bash run: | pip install -r requirements.txt make cython - name: Build - uses: pypa/cibuildwheel@v2.6.0 + uses: pypa/cibuildwheel@v3.1.1 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_ARCHS_LINUX: auto aarch64 - CIBW_ARCHS_MACOS: x86_64 universal2 arm64 - CIBW_SKIP: pp* + CIBW_SKIP: "pp* cp38-*" + + - name: Build sdist + if: runner.os == 'Linux' && runner.arch == 'X64' + run: | + pip install build + python -m build -s -o wheelhouse - name: Upload Wheels to artifact - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: - name: Wheels + name: wheels-${{ matrix.os }} path: wheelhouse - - name: Publish Wheels to TestPyPI - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 with: - packages_dir: wheelhouse - password: ${{ secrets.PYPI_API_TOKEN }} + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + #with: + # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 800f1c22..341be631 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,13 @@ MANIFEST build/* dist/* .tox +.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py +msgpack/*.c msgpack/*.cpp *.egg-info /venv diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..88d87182 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file for Sphinx projects. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + apt_packages: + - build-essential + jobs: + pre_install: + - pip install -r requirements.txt + - make cython + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/conf.py diff --git a/ChangeLog.rst b/ChangeLog.rst index a11c8144..418c444c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,92 @@ +1.1.1 +===== + +Release Date: 2025-06-13 + +* No change from 1.1.1rc1. + +1.1.1rc1 +======== + +Release Date: 2025-06-06 + +* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. + +1.1.0 +===== + +Release Date: 2024-09-10 + +* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with + future Cython. (#620) + +1.1.0rc2 +======== + +Release Date: 2024-08-19 + +* Update Cython to 3.0.11 for better Python 3.13 support. +* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + +1.1.0rc1 +======== + +Release Date: 2024-05-07 + +* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. +* Stop using C++ mode in Cython to reduce compile error on some compilers. +* ``Packer()`` has ``buf_size`` option to specify initial size of + internal buffer to reduce reallocation. +* The default internal buffer size of ``Packer()`` is reduced from + 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` + if you are packing large data. +* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become + more accurate by avoiding floating point calculations. (#591) +* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. +* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only + arguments to improve compatibility with the Cython implementation. + +1.0.8 +===== + +Release Date: 2024-03-01 + +* Update Cython to 3.0.8. This fixes memory leak when iterating + ``Unpacker`` object on Python 3.12. +* Do not include C/Cython files in binary wheels. + + +1.0.7 +===== + +Release Date: 2023-09-28 + +* Fix build error of extension module on Windows. (#567) +* ``setup.py`` doesn't skip build error of extension module. (#568) + + +1.0.6 +===== + +Release Date: 2023-09-21 + +.. note:: + v1.0.6 Wheels for Windows don't contain extension module. + Please upgrade to v1.0.7 or newer. + +* Add Python 3.12 wheels (#517) +* Remove Python 2.7, 3.6, and 3.7 support + + +1.0.5 +===== + +Release Date: 2023-03-08 + +* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) +* Add Python 3.11 wheels (#517) +* fallback: Fix packing multidimensional memoryview (#527) + 1.0.4 ===== @@ -89,7 +178,7 @@ Important changes * unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) -* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into +* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into ``UnpackValueError``. If you want to catch all exception during unpack, you need to use ``try ... except Exception`` with minimum try code block. (#323, #233) diff --git a/DEVELOP.md b/DEVELOP.md index 9c823c34..27adf8c0 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -1,13 +1,5 @@ # Developer's note -## Wheels - -Wheels for macOS and Linux are built on Travis and AppVeyr, in -[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. - -Wheels for Windows are built on Github Actions in this repository. - - ### Build ``` diff --git a/Makefile b/Makefile index 415dcfdd..51f3e0ef 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,28 @@ +PYTHON_SOURCES = msgpack test setup.py + .PHONY: all all: cython python setup.py build_ext -i -f -.PHONY: black -black: - black -S msgpack/ test/ setup.py +.PHONY: format +format: + ruff format $(PYTHON_SOURCES) + +.PHONY: lint +lint: + ruff check $(PYTHON_SOURCES) + +.PHONY: doc +doc: + cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html + +.PHONY: pyupgrade +pyupgrade: + @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; .PHONY: cython cython: - cython --cplus msgpack/_cmsgpack.pyx + cython msgpack/_cmsgpack.pyx .PHONY: test test: cython diff --git a/README.md b/README.md index cb816485..61a03c60 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # MessagePack for Python -[![Build Status](https://travis-ci.org/msgpack/msgpack-python.svg?branch=master)](https://travis-ci.org/msgpack/msgpack-python) +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) ## What's this @@ -10,53 +10,6 @@ It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. - -## Very important notes for existing users - -### PyPI package name - -Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. - -When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before -`pip install -U msgpack`. - - -### Compatibility with the old format - -You can use `use_bin_type=False` option to pack `bytes` -object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. - -You can unpack old msgpack format using `raw=True` option. -It unpacks str (raw) type in msgpack into Python bytes. - -See note below for detail. - - -### Major breaking changes in msgpack 1.0 - -* Python 2 - - * The extension module does not support Python 2 anymore. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - -* Packer - - * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. - **If you are still using Python 2, you must use unicode for all string types.** - You can use `use_bin_type=False` to encode into old msgpack format. - * `encoding` option is removed. UTF-8 is used always. - -* Unpacker - - * `raw=False` by default. It assumes str types are valid UTF-8 string - and decode them to Python str (unicode) object. - * `encoding` option is removed. You can use `raw=True` to support old format. - * Default value of `max_buffer_size` is changed from 0 to 100 MiB. - * Default value of `strict_map_key` is changed to True to avoid hashdos. - You need to pass `strict_map_key=False` if you have data which contain map keys - which type is not bytes or str. - - ## Install ``` @@ -65,12 +18,9 @@ $ pip install msgpack ### Pure Python implementation -The extension module in msgpack (`msgpack._cmsgpack`) does not support -Python 2 and PyPy. - -But msgpack provides a pure Python implementation (`msgpack.fallback`) -for PyPy and Python 2. +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. ### Windows @@ -82,10 +32,6 @@ Without extension, using pure Python implementation on CPython runs slowly. ## How to use -NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users -using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. - - ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. @@ -97,23 +43,13 @@ msgpack provides `dumps` and `loads` as an alias for compatibility with ```pycon >>> import msgpack ->>> msgpack.packb([1, 2, 3], use_bin_type=True) +>>> msgpack.packb([1, 2, 3]) '\x93\x01\x02\x03' ->>> msgpack.unpackb(_, raw=False) +>>> msgpack.unpackb(_) [1, 2, 3] ``` -`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: - -```pycon ->>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) -(1, 2, 3) -``` - -You should always specify the `use_list` keyword argument for backward compatibility. -See performance issues relating to `use_list option`_ below. - -Read the docstring for other options. +Read the docstring for options. ### Streaming unpacking @@ -127,11 +63,11 @@ from io import BytesIO buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(i, use_bin_type=True)) + buf.write(msgpack.packb(i)) buf.seek(0) -unpacker = msgpack.Unpacker(buf, raw=False) +unpacker = msgpack.Unpacker(buf) for unpacked in unpacker: print(unpacked) ``` @@ -162,14 +98,17 @@ def encode_datetime(obj): return obj -packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) -this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) ``` `Unpacker`'s `object_hook` callback receives a dict; the `object_pairs_hook` callback may instead be used to receive a list of key-value pairs. +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + ### Extended types @@ -191,8 +130,8 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... return ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) ->>> packed = msgpack.packb(data, default=default, use_bin_type=True) ->>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) >>> data == unpacked True ``` @@ -210,7 +149,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### string and binary type +### string and binary type in old msgpack spec Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -220,9 +159,9 @@ and `raw=True` options. ```pycon >>> import msgpack ->>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] ->>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] ``` @@ -263,3 +202,41 @@ You can use `gc.disable()` when unpacking large message. List is the default sequence type of Python. But tuple is lighter than list. You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module does not support Python 2 anymore. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action (setup-python) no longer support Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in bin type in msgpack. + * The `encoding` option is removed. UTF-8 is used always. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * Default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attack. + You need to pass `max_buffer_size=0` if you have large but safe data. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys + which type is not bytes or str. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..75f0c541 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +## Security contact information + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/docs/_static/README.txt b/docs/_static/README.txt new file mode 100644 index 00000000..1c70594f --- /dev/null +++ b/docs/_static/README.txt @@ -0,0 +1 @@ +Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/api.rst b/docs/api.rst index 93827e19..f5dfbbd2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -:func:`dump` is alias for :func:`pack` +``dump()`` is an alias for :func:`pack` .. autofunction:: packb -:func:`dumps` is alias for :func:`packb` +``dumps()`` is an alias for :func:`packb` .. autofunction:: unpack -:func:`load` is alias for :func:`unpack` +``load()`` is an alias for :func:`unpack` .. autofunction:: unpackb -:func:`loads` is alias for :func:`unpackb` +``loads()`` is an alias for :func:`unpackb` .. autoclass:: Packer :members: diff --git a/docs/conf.py b/docs/conf.py index 6b432be0..28116cd6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -11,12 +9,12 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- @@ -40,8 +38,8 @@ master_doc = "index" # General information about the project. -project = u"msgpack" -copyright = u"Inada Naoki" +project = "msgpack" +copyright = "Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -91,7 +89,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinxdoc" +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -181,7 +179,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "msgpack.tex", u"msgpack Documentation", u"Author", "manual"), + ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -209,7 +207,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "msgpack", u"msgpack Documentation", [u"Author"], 1)] +man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -224,8 +222,8 @@ ( "index", "msgpack", - u"msgpack Documentation", - u"Author", + "msgpack Documentation", + "Author", "msgpack", "One line description of project.", "Miscellaneous", @@ -245,10 +243,10 @@ # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u"msgpack" -epub_author = u"Author" -epub_publisher = u"Author" -epub_copyright = u"2013, Author" +epub_title = "msgpack" +epub_author = "Author" +epub_publisher = "Author" +epub_copyright = "2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..26002de4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx~=7.3.7 +sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 50710218..ad68271d 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,22 +1,20 @@ -# coding: utf-8 -from .exceptions import * -from .ext import ExtType, Timestamp - +# ruff: noqa: F401 import os -import sys +from .exceptions import * # noqa: F403 +from .ext import ExtType, Timestamp -version = (1, 0, 4) -__version__ = "1.0.4" +version = (1, 1, 1) +__version__ = "1.1.1" -if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: - from .fallback import Packer, unpackb, Unpacker +if os.environ.get("MSGPACK_PUREPYTHON"): + from .fallback import Packer, Unpacker, unpackb else: try: - from ._cmsgpack import Packer, unpackb, Unpacker + from ._cmsgpack import Packer, Unpacker, unpackb except ImportError: - from .fallback import Packer, unpackb, Unpacker + from .fallback import Packer, Unpacker, unpackb def pack(o, stream, **kwargs): diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 396da0c2..402b6946 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -16,8 +16,6 @@ from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) - char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL - cdef extern from "pack.h": struct msgpack_packer: @@ -26,26 +24,21 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_bin(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); - int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) - -cdef extern from "buff_converter.h": - object buff_to_buff(char *, Py_ssize_t) + int msgpack_pack_nil(msgpack_packer* pk) except -1 + int msgpack_pack_true(msgpack_packer* pk) except -1 + int msgpack_pack_false(msgpack_packer* pk) except -1 + int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 + int msgpack_pack_float(msgpack_packer* pk, float d) except -1 + int msgpack_pack_double(msgpack_packer* pk, double d) except -1 + int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -59,7 +52,7 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer(object): +cdef class Packer: """ MessagePack Packer @@ -71,7 +64,8 @@ cdef class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -98,32 +92,47 @@ cdef class Packer(object): If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + The size of the internal buffer. (default: 256*1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default cdef object _berrors cdef const char *unicode_errors + cdef size_t exports # number of exported buffers cdef bint strict_types cdef bint use_float cdef bint autoreset cdef bint datetime - def __cinit__(self): - cdef int buf_size = 1024*1024 + def __cinit__(self, buf_size=256*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 + self.exports = 0 + + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + assert self.exports == 0 + + cdef _check_exports(self): + if self.exports > 0: + raise BufferError("Existing exports of data: Packer cannot be changed") def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None): + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -140,159 +149,129 @@ cdef class Packer(object): else: self.unicode_errors = self._berrors - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + # returns -2 when default should(o) be called + cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: cdef long long llval cdef unsigned long long ullval cdef unsigned long ulval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d + cdef const char* rawval cdef Py_ssize_t L - cdef int default_used = 0 - cdef bint strict_types = self.strict_types cdef Py_buffer view - - if nest_limit < 0: - raise ValueError("recursion limit exceeded.") - - while True: - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif o is True: - ret = msgpack_pack_true(&self.pk) - elif o is False: - ret = msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): - # PyInt_Check(long) is True for Python 3. - # So we should test long before int. - try: - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if not default_used and self._default is not None: - o = self._default(o) - default_used = True - continue - else: - raise OverflowError("Integer value out of range") - elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) + cdef bint strict = self.strict_types + + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict else PyLong_Check(o): + try: + if o > 0: + ullval = o + msgpack_pack_unsigned_long_long(&self.pk, ullval) else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.unicode_errors == NULL: - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise ValueError("unicode string is too large") + llval = o + msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if will_default: + return -2 else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - rawval = o - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o): - d = o - L = len(d) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif not strict_types and PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif type(o) is ExtType if strict_types else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - longval = o.code - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - ret = msgpack_pack_ext(&self.pk, longval, L) - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + raise OverflowError("Integer value out of range") + elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): + if self.use_float: + msgpack_pack_float(&self.pk, o) + else: + msgpack_pack_double(&self.pk, o) + elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): + if self.unicode_errors == NULL: + rawval = PyUnicode_AsUTF8AndSize(o, &L) + if L >ITEM_LIMIT: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) L = Py_SIZE(o) if L > ITEM_LIMIT: - raise ValueError("list is too large") - ret = msgpack_pack_array(&self.pk, L) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyMemoryView_Check(o): - if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise ValueError("could not get buffer for memoryview") - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + raise ValueError("unicode string is too large") + rawval = o + msgpack_pack_raw(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o) if strict else PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + msgpack_pack_map(&self.pk, L) + for k, v in o.items(): + self._pack(k, nest_limit) + self._pack(v, nest_limit) + elif type(o) is ExtType if strict else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + msgpack_pack_ext(&self.pk, o.code, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + msgpack_pack_array(&self.pk, L) + for v in o: + self._pack(v, nest_limit) + elif PyMemoryView_Check(o): + PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) + L = view.len + if L > ITEM_LIMIT: PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif not default_used and self._default: + raise ValueError("memoryview is too large") + try: + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, view.buf, L) + finally: + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif will_default: + return -2 + elif self.datetime and PyDateTime_CheckExact(o): + # this should be later than will_default + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef int ret + if nest_limit < 0: + raise ValueError("recursion limit exceeded.") + nest_limit -= 1 + if self._default is not None: + ret = self._pack_inner(o, 1, nest_limit) + if ret == -2: o = self._default(o) - default_used = 1 - continue - elif self.datetime and PyDateTime_CheckExact(o): - PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) - return ret + return ret + return self._pack_inner(o, 0, nest_limit) - cpdef pack(self, object obj): + def pack(self, object obj): cdef int ret + self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -306,30 +285,27 @@ cdef class Packer(object): return buf def pack_ext_type(self, typecode, data): + self._check_exports() + if len(data) > ITEM_LIMIT: + raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) def pack_array_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("array too large") + msgpack_pack_array(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf def pack_map_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("map too learge") + msgpack_pack_map(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 @@ -342,17 +318,14 @@ cdef class Packer(object): *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + self._check_exports() + size = len(pairs) + if size > ITEM_LIMIT: + raise ValueError("map too large") + msgpack_pack_map(&self.pk, size) + for k, v in pairs: + self._pack(k) + self._pack(v) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 @@ -363,6 +336,7 @@ cdef class Packer(object): This method is useful only when autoreset=False. """ + self._check_exports() self.pk.length = 0 def bytes(self): @@ -370,5 +344,15 @@ cdef class Packer(object): return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) def getbuffer(self): - """Return view of internal buffer.""" - return buff_to_buff(self.pk.buf, self.pk.length) + """Return memoryview of internal buffer. + + Note: Packer now supports buffer protocol. You can use memoryview(packer). + """ + return memoryview(self) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + self.exports += 1 + + def __releasebuffer__(self, Py_buffer *buffer): + self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 8b06661e..34ff3304 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -35,7 +35,7 @@ cdef extern from "unpack.h": PyObject* timestamp_t PyObject *giga; PyObject *utc; - char *unicode_errors + const char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -210,14 +210,14 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker(object): +cdef class Unpacker: """Streaming unpacker. Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -236,17 +236,17 @@ cdef class Unpacker(object): 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). Python 2 is not supported. + 3 - datetime.datetime (UTC). :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h deleted file mode 100644 index 86b4196d..00000000 --- a/msgpack/buff_converter.h +++ /dev/null @@ -1,8 +0,0 @@ -#include "Python.h" - -/* cython does not support this preprocessor check => write it in raw C */ -static PyObject * -buff_to_buff(char *buff, Py_ssize_t size) -{ - return PyMemoryView_FromMemory(buff, size, PyBUF_READ); -} diff --git a/msgpack/ext.py b/msgpack/ext.py index 25544c55..9694819a 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,21 +1,6 @@ -# coding: utf-8 -from collections import namedtuple import datetime -import sys import struct - - -PY2 = sys.version_info[0] == 2 - -if PY2: - int_types = (int, long) - _utc = None -else: - int_types = int - try: - _utc = datetime.timezone.utc - except AttributeError: - _utc = datetime.timezone(datetime.timedelta(0)) +from collections import namedtuple class ExtType(namedtuple("ExtType", "code data")): @@ -28,14 +13,15 @@ def __new__(cls, code, data): raise TypeError("data must be bytes") if not 0 <= code <= 127: raise ValueError("code must be 0~127") - return super(ExtType, cls).__new__(cls, code, data) + return super().__new__(cls, code, data) -class Timestamp(object): +class Timestamp: """Timestamp represents the Timestamp extension type in msgpack. - When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python - msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. This class is immutable: Do not override seconds and nanoseconds. """ @@ -53,31 +39,25 @@ def __init__(self, seconds, nanoseconds=0): Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. Default is 0. - Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. """ - if not isinstance(seconds, int_types): - raise TypeError("seconds must be an interger") - if not isinstance(nanoseconds, int_types): + if not isinstance(seconds, int): + raise TypeError("seconds must be an integer") + if not isinstance(nanoseconds, int): raise TypeError("nanoseconds must be an integer") if not (0 <= nanoseconds < 10**9): - raise ValueError( - "nanoseconds must be a non-negative integer less than 999999999." - ) + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") self.seconds = seconds self.nanoseconds = nanoseconds def __repr__(self): """String representation of Timestamp.""" - return "Timestamp(seconds={0}, nanoseconds={1})".format( - self.seconds, self.nanoseconds - ) + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" def __eq__(self, other): """Check for equality with another Timestamp object""" if type(other) is self.__class__: - return ( - self.seconds == other.seconds and self.nanoseconds == other.nanoseconds - ) + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds return False def __ne__(self, other): @@ -140,7 +120,7 @@ def from_unix(unix_sec): """Create a Timestamp from posix timestamp in seconds. :param unix_float: Posix timestamp in seconds. - :type unix_float: int or float. + :type unix_float: int or float """ seconds = int(unix_sec // 1) nanoseconds = int((unix_sec % 1) * 10**9) @@ -174,20 +154,17 @@ def to_unix_nano(self): def to_datetime(self): """Get the timestamp as a UTC datetime. - Python 2 is not supported. - - :rtype: datetime. + :rtype: `datetime.datetime` """ - return datetime.datetime.fromtimestamp(0, _utc) + datetime.timedelta( - seconds=self.to_unix() + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 ) @staticmethod def from_datetime(dt): """Create a Timestamp from datetime with tzinfo. - Python 2 is not supported. - :rtype: Timestamp """ - return Timestamp.from_unix(dt.timestamp()) + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f560c7b5..b02e47cf 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,60 +1,22 @@ """Fallback pure Python implementation of msgpack""" -from datetime import datetime as _DateTime -import sys -import struct - - -PY2 = sys.version_info[0] == 2 -if PY2: - int_types = (int, long) - - def dict_iteritems(d): - return d.iteritems() - -else: - int_types = int - unicode = str - xrange = range - - def dict_iteritems(d): - return d.items() - - -if sys.version_info < (3, 5): - # Ugly hack... - RecursionError = RuntimeError - - def _is_recursionerror(e): - return ( - len(e.args) == 1 - and isinstance(e.args[0], str) - and e.args[0].startswith("maximum recursion depth exceeded") - ) - -else: - - def _is_recursionerror(e): - return True +import struct +import sys +from datetime import datetime as _DateTime if hasattr(sys, "pypy_version_info"): - # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own - # StringBuilder is fastest. from __pypy__ import newlist_hint + from __pypy__.builders import BytesBuilder - try: - from __pypy__.builders import BytesBuilder as StringBuilder - except ImportError: - from __pypy__.builders import StringBuilder - USING_STRINGBUILDER = True + _USING_STRINGBUILDER = True - class StringIO(object): + class BytesIO: def __init__(self, s=b""): if s: - self.builder = StringBuilder(len(s)) + self.builder = BytesBuilder(len(s)) self.builder.append(s) else: - self.builder = StringBuilder() + self.builder = BytesBuilder() def write(self, s): if isinstance(s, memoryview): @@ -67,17 +29,17 @@ def getvalue(self): return self.builder.build() else: - USING_STRINGBUILDER = False - from io import BytesIO as StringIO + from io import BytesIO - newlist_hint = lambda size: [] + _USING_STRINGBUILDER = False + def newlist_hint(size): + return [] -from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError from .ext import ExtType, Timestamp - EX_SKIP = 0 EX_CONSTRUCT = 1 EX_READ_ARRAY_HEADER = 2 @@ -125,24 +87,13 @@ def unpackb(packed, **kwargs): ret = unpacker._unpack() except OutOfData: raise ValueError("Unpack failed: incomplete input") - except RecursionError as e: - if _is_recursionerror(e): - raise StackError - raise + except RecursionError: + raise StackError if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret -if sys.version_info < (2, 7, 6): - - def _unpack_from(f, b, o=0): - """Explicit type cast for legacy struct.unpack_from""" - return struct.unpack_from(f, bytes(b), o) - -else: - _unpack_from = struct.unpack_from - _NO_FORMAT_USED = "" _MSGPACK_HEADERS = { 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), @@ -176,14 +127,14 @@ def _unpack_from(f, b, o=0): } -class Unpacker(object): +class Unpacker: """Streaming unpacker. Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -202,17 +153,17 @@ class Unpacker(object): 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). Python 2 is not supported. + 3 - datetime.datetime (UTC). :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) @@ -275,6 +226,7 @@ class Unpacker(object): def __init__( self, file_like=None, + *, read_size=0, use_list=True, raw=False, @@ -359,9 +311,7 @@ def __init__( if object_pairs_hook is not None and not callable(object_pairs_hook): raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError( - "object_pairs_hook and object_hook are mutually " "exclusive" - ) + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") @@ -379,6 +329,7 @@ def feed(self, next_bytes): # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython self._buffer.extend(view) + view.release() def _consume(self): """Gets rid of the used parts of the buffer.""" @@ -453,20 +404,18 @@ def _read_header(self): n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError( - "%s exceeds max_array_len(%s)" % (n, self._max_array_len) - ) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") elif b == 0xC0: obj = None elif b == 0xC2: @@ -477,65 +426,61 @@ def _read_header(self): size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - n = _unpack_from(fmt, self._buffer, self._buff_i)[0] + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] else: n = self._buffer[self._buff_i] self._buff_i += size if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") obj = self._read(n) elif 0xC7 <= b <= 0xC9: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - L, n = _unpack_from(fmt, self._buffer, self._buff_i) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") obj = self._read(L) elif 0xCA <= b <= 0xD3: size, fmt = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - obj = _unpack_from(fmt, self._buffer, self._buff_i)[0] + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] else: obj = self._buffer[self._buff_i] self._buff_i += size elif 0xD4 <= b <= 0xD8: size, fmt, typ = _MSGPACK_HEADERS[b] if self._max_ext_len < size: - raise ValueError( - "%s exceeds max_ext_len(%s)" % (size, self._max_ext_len) - ) + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") self._reserve(size + 1) - n, obj = _unpack_from(fmt, self._buffer, self._buff_i) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size + 1 elif 0xD9 <= b <= 0xDB: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) else: n = self._buffer[self._buff_i] self._buff_i += size if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif 0xDC <= b <= 0xDD: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_array_len: - raise ValueError( - "%s exceeds max_array_len(%s)" % (n, self._max_array_len) - ) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif 0xDE <= b <= 0xDF: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj @@ -554,12 +499,12 @@ def _unpack(self, execute=EX_CONSTRUCT): # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) - for i in xrange(n): + for i in range(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) @@ -567,25 +512,22 @@ def _unpack(self, execute=EX_CONSTRUCT): return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) - for _ in xrange(n) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) ) else: ret = {} - for _ in xrange(n): + for _ in range(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (unicode, bytes): - raise ValueError( - "%s is not allowed for map key" % str(type(key)) - ) - if not PY2 and type(key) is str: + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + if isinstance(key, str): key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: @@ -659,7 +601,7 @@ def tell(self): return self._stream_offset -class Packer(object): +class Packer: """ MessagePack Packer @@ -671,7 +613,8 @@ class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -698,38 +641,18 @@ class Packer(object): If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - Example of streaming deserialize from file-like object:: - - unpacker = Unpacker(file_like) - for o in unpacker: - process(o) - - Example of streaming deserialize from socket:: - - unpacker = Unpacker() - while True: - buf = sock.recv(1024**2) - if not buf: - break - unpacker.feed(buf) - for o in unpacker: - process(o) - - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``OutOfData`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. + :param int buf_size: + Internal buffer size. This option is used only for C implementation. """ def __init__( self, + *, default=None, use_single_float=False, autoreset=True, @@ -737,19 +660,17 @@ def __init__( strict_types=False, datetime=False, unicode_errors=None, + buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = StringIO() - if PY2 and datetime: - raise ValueError("datetime is not supported in Python 2") + self._buffer = BytesIO() self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" - if default is not None: - if not callable(default): - raise TypeError("default must be callable") + if default is not None and not callable(default): + raise TypeError("default must be callable") self._default = default def _pack( @@ -774,7 +695,7 @@ def _pack( if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int_types): + if check(obj, int): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -806,7 +727,7 @@ def _pack( raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, unicode): + if check(obj, str): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) if n >= 2**32: @@ -814,7 +735,7 @@ def _pack( self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): - n = len(obj) * obj.itemsize + n = obj.nbytes if n >= 2**32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) @@ -855,13 +776,11 @@ def _pack( if check(obj, list_types): n = len(obj) self._pack_array_header(n) - for i in xrange(n): + for i in range(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs( - len(obj), dict_iteritems(obj), nest_limit - 1 - ) + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: obj = Timestamp.from_datetime(obj) @@ -874,26 +793,26 @@ def _pack( continue if self._datetime and check(obj, _DateTime): - raise ValueError("Cannot serialize %r where tzinfo=None" % (obj,)) + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") - raise TypeError("Cannot serialize %r" % (obj,)) + raise TypeError(f"Cannot serialize {obj!r}") def pack(self, obj): try: self._pack(obj) except: - self._buffer = StringIO() # force reset + self._buffer = BytesIO() # force reset raise if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_array_header(self, n): @@ -902,7 +821,7 @@ def pack_array_header(self, n): self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_header(self, n): @@ -911,7 +830,7 @@ def pack_map_header(self, n): self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_ext_type(self, typecode, data): @@ -963,7 +882,7 @@ def _pack_map_header(self, n): def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for (k, v) in pairs: + for k, v in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) @@ -1000,11 +919,11 @@ def reset(self): This method is useful only when autoreset=False. """ - self._buffer = StringIO() + self._buffer = BytesIO() def getbuffer(self): """Return view of internal buffer.""" - if USING_STRINGBUILDER or PY2: + if _USING_STRINGBUILDER: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index 4f3ce1d9..edf3a3fe 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,15 +21,12 @@ #include "sysdep.h" #include #include +#include #ifdef __cplusplus extern "C" { #endif -#ifdef _MSC_VER -#define inline __inline -#endif - typedef struct msgpack_packer { char *buf; size_t length; @@ -67,53 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" -// return -2 when o is too long -static inline int -msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) -{ -#if PY_MAJOR_VERSION >= 3 - assert(PyUnicode_Check(o)); - - Py_ssize_t len; - const char* buf = PyUnicode_AsUTF8AndSize(o, &len); - if (buf == NULL) - return -1; - - if (len > limit) { - return -2; - } - - int ret = msgpack_pack_raw(pk, len); - if (ret) return ret; - - return msgpack_pack_raw_body(pk, buf, len); -#else - PyObject *bytes; - Py_ssize_t len; - int ret; - - // py2 - bytes = PyUnicode_AsUTF8String(o); - if (bytes == NULL) - return -1; - - len = PyString_GET_SIZE(bytes); - if (len > limit) { - Py_DECREF(bytes); - return -2; - } - - ret = msgpack_pack_raw(pk, len); - if (ret) { - Py_DECREF(bytes); - return -1; - } - ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); - Py_DECREF(bytes); - return ret; -#endif -} - #ifdef __cplusplus } #endif diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 7d479b6d..b8959f02 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,18 +37,6 @@ * Integer */ -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -123,18 +111,6 @@ do { \ } \ } while(0) -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ -} while(0) - #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -264,49 +240,6 @@ do { \ } while(0) -static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -//#ifdef msgpack_pack_inline_func_cint - static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -372,192 +305,37 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 +#if SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + if (sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); + } else { + msgpack_pack_real_int64(x, d); + } #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LLONG_MAX) -#if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); -} else { msgpack_pack_real_int64(x, d); } -#endif -} - -static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(USHRT_MAX) -#if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(UINT_MAX) -#if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULONG_MAX) -#if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULLONG_MAX) -#if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { msgpack_pack_real_uint64(x, d); } -#endif -} - -//#undef msgpack_pack_inline_func_cint -//#endif - /* @@ -810,11 +588,9 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 -#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index ed9c1bc0..70673004 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else -#include /* __BYTE_ORDER */ +#else /* _WIN32 */ +#include /* ntohs, ntohl */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ @@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# elif defined(_byteswap_ulong) || defined(_MSC_VER) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +#if defined(_byteswap_uint64) || defined(_MSC_VER) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 23aa6220..58a2f4f5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -47,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong((long)d); + PyObject *p = PyLong_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromSize_t((size_t)d); + PyObject *p = PyLong_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } if (!p) return -1; @@ -84,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong(d); + PyObject *p = PyLong_FromLong(d); if (!p) return -1; *o = p; @@ -107,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } *o = p; return 0; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h new file mode 100644 index 00000000..c14a3c2b --- /dev/null +++ b/msgpack/unpack_container_header.h @@ -0,0 +1,51 @@ +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 8b9fcc19..cce29e7a 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,8 +75,7 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -template -static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -386,6 +385,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef construct_cb } +#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -397,68 +397,27 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef again_fixed_trail_if_zero #undef start_container -template -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; +static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(1, ctx, data, len, off); +} +static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(0, ctx, data, len, off); } -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END - -static const execute_fn unpack_construct = &unpack_execute; -static const execute_fn unpack_skip = &unpack_execute; -static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; - -#undef NEXT_CS +#define unpack_container_header read_array_header +#define fixed_offset 0x90 +#define var_offset 0xdc +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +#define unpack_container_header read_map_header +#define fixed_offset 0x80 +#define var_offset 0xde +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml index 195795f0..b1628322 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,55 @@ [build-system] -requires = [ - # Also declared in requirements.txt, if updating here please also update - # there - "Cython~=0.29.30", - "setuptools >= 35.0.2", -] +# 75.3.0 is the latest version supporting Python 3.8 +requires = ["setuptools >= 75.3.0"] build-backend = "setuptools.build_meta" + +[project] +name = "msgpack" +dynamic = ["version"] +# `license = "Apache-2.0"` is preferred. But keep old syntax for Python 3.8 compatibility. +# https://github.com/msgpack/msgpack-python/pull/637 +license = {text="Apache 2.0"} +authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] +description = "MessagePack serializer" +readme = "README.md" +keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Topic :: File Formats", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +[project.urls] +Homepage = "https://msgpack.org/" +Documentation = "https://msgpack-python.readthedocs.io/" +Repository = "https://github.com/msgpack/msgpack-python/" +Tracker = "https://github.com/msgpack/msgpack-python/issues" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" + +[tool.setuptools] +# Do not install C/C++/Cython source files +include-package-data = false + +[tool.setuptools.dynamic] +version = {attr = "msgpack.__version__"} + +[tool.ruff] +line-length = 100 +target-version = "py38" +lint.select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + #"UP", pyupgrade +] diff --git a/requirements.txt b/requirements.txt index 9f3c1a0d..5d2e20b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1 @@ -# Also declared in pyproject.toml, if updating here please also update there -Cython~=0.29.30 - -# dev only tools. no need to add pyproject -black==22.3.0 +Cython~=3.1.2 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 1cb6ce36..00000000 --- a/setup.cfg +++ /dev/null @@ -1,32 +0,0 @@ -[metadata] -name = msgpack -#version = attr: msgpack.__version__ -version = attr: msgpack.version -license = Apache 2.0 -author = Inada Naoki -author_email = songofacandy@gmail.com -description = MessagePack serializer -long_description = file: README.md -long_description_content_type = text/markdown -url = https://msgpack.org/ - -project_urls = - Documentation = https://msgpack-python.readthedocs.io/ - Source = https://github.com/msgpack/msgpack-python - Tracker = https://github.com/msgpack/msgpack-python/issues - -classifiers = - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Intended Audience :: Developers - License :: OSI Approved :: Apache Software License - -[flake8] -max_line_length = 100 - diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index 9630cda0..4029e9ed --- a/setup.py +++ b/setup.py @@ -1,89 +1,24 @@ #!/usr/bin/env python -# coding: utf-8 -import io import os import sys -from glob import glob -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext -from setuptools.command.sdist import sdist +from setuptools import Extension, setup PYPY = hasattr(sys, "pypy_version_info") -PY2 = sys.version_info[0] == 2 - - -class NoCython(Exception): - pass - - -try: - import Cython.Compiler.Main as cython_compiler - - have_cython = True -except ImportError: - have_cython = False - - -def cythonize(src): - sys.stderr.write("cythonize: %r\n" % (src,)) - cython_compiler.compile([src], cplus=True) - - -def ensure_source(src): - pyx = os.path.splitext(src)[0] + ".pyx" - - if not os.path.exists(src): - if not have_cython: - raise NoCython - cythonize(pyx) - elif ( - os.path.exists(pyx) - and os.stat(src).st_mtime < os.stat(pyx).st_mtime - and have_cython - ): - cythonize(pyx) - return src - - -class BuildExt(build_ext): - def build_extension(self, ext): - try: - ext.sources = list(map(ensure_source, ext.sources)) - except NoCython: - print("WARNING") - print("Cython is required for building extension from checkout.") - print("Install Cython >= 0.16 or install msgpack from PyPI.") - print("Falling back to pure Python implementation.") - return - try: - return build_ext.build_extension(self, ext) - except Exception as e: - print("WARNING: Failed to compile extension modules.") - print("msgpack uses fallback pure python implementation.") - print(e) - - -# Cython is required for sdist -class Sdist(sdist): - def __init__(self, *args, **kwargs): - cythonize("msgpack/_cmsgpack.pyx") - sdist.__init__(self, *args, **kwargs) - libraries = [] macros = [] +ext_modules = [] if sys.platform == "win32": libraries.append("ws2_32") macros = [("__LITTLE_ENDIAN__", "1")] -ext_modules = [] -if not PYPY and not PY2 and not os.environ.get("MSGPACK_PUREPYTHON"): +if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.cpp"], + sources=["msgpack/_cmsgpack.c"], libraries=libraries, include_dirs=["."], define_macros=macros, @@ -91,9 +26,7 @@ def __init__(self, *args, **kwargs): ) del libraries, macros - setup( - cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], ) diff --git a/test/test_buffer.py b/test/test_buffer.py index 62507cf4..2c5a14c5 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,12 +1,8 @@ -#!/usr/bin/env python -# coding: utf-8 +from pytest import raises -import sys -import pytest -from msgpack import packb, unpackb +from msgpack import Packer, packb, unpackb -@pytest.mark.skipif(sys.version_info[0] == 2, reason="Python 2 is not supported") def test_unpack_buffer(): from array import array @@ -31,3 +27,23 @@ def test_unpack_memoryview(): assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_packer_getbuffer(): + packer = Packer(autoreset=False) + packer.pack_array_header(2) + packer.pack(42) + packer.pack("hello") + buffer = packer.getbuffer() + assert isinstance(buffer, memoryview) + assert bytes(buffer) == b"\x92*\xa5hello" + + if Packer.__module__ == "msgpack._cmsgpack": # only for Cython + # cython Packer supports buffer protocol directly + assert bytes(packer) == b"\x92*\xa5hello" + + with raises(BufferError): + packer.pack(42) + buffer.release() + packer.pack(42) + assert bytes(packer) == b"\x92*\xa5hello*" diff --git a/test/test_case.py b/test/test_case.py index a0a3c5ad..c4c615e3 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import packb, unpackb def check(length, obj, use_bin_type=True): v = packb(obj, use_bin_type=use_bin_type) - assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) + assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" assert unpackb(v, use_list=0, raw=not use_bin_type) == obj @@ -120,11 +119,11 @@ def test_match(): ), ({}, b"\x80"), ( - dict([(x, x) for x in range(15)]), + {x: x for x in range(15)}, b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", ), ( - dict([(x, x) for x in range(16)]), + {x: x for x in range(16)}, b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", ), ] @@ -134,4 +133,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" + assert unpackb(packb("foobar"), use_list=1) == "foobar" diff --git a/test/test_except.py b/test/test_except.py index 5544f2bc..b77ac800 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 + +import datetime from pytest import raises -from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData -import datetime +from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb class DummyException(Exception): @@ -53,7 +53,7 @@ def test_invalidvalue(): def test_strict_map_key(): - valid = {u"unicode": 1, b"bytes": 2} + valid = {"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) assert valid == unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py index 6b365751..aaf0fd92 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,5 @@ -from __future__ import print_function import array + import msgpack from msgpack import ExtType @@ -17,9 +17,7 @@ def p(s): assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert ( - p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 - ) # ext 32 + assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 def test_unpack_ext_type(): @@ -49,16 +47,13 @@ def default(obj): except AttributeError: data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknown type object %r" % (obj,)) + raise TypeError(f"Unknown type object {obj!r}") def ext_hook(code, data): print("ext_hook called", code, data) assert code == 123 obj = array.array("d") - try: - obj.frombytes(data) - except AttributeError: # PY2 - obj.fromstring(data) + obj.frombytes(data) return obj obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] @@ -67,20 +62,14 @@ def ext_hook(code, data): assert obj == obj2 -import sys - -if sys.version > "3": - long = int - - def test_overriding_hooks(): def default(obj): - if isinstance(obj, long): + if isinstance(obj, int): return {"__type__": "long", "__data__": str(obj)} else: return obj - obj = {"testval": long(1823746192837461928374619)} + obj = {"testval": 1823746192837461928374619} refobj = {"testval": default(obj["testval"])} refout = msgpack.packb(refobj) assert isinstance(refout, (str, bytes)) diff --git a/test/test_format.py b/test/test_format.py index fbbc3f98..c06c87dc 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import unpackb @@ -25,9 +24,7 @@ def testFixRaw(): def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) def testUnsignedInt(): diff --git a/test/test_limits.py b/test/test_limits.py index 4314c2c0..9b92b4d9 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,17 +1,15 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals import pytest from msgpack import ( - packb, - unpackb, - Packer, - Unpacker, ExtType, + Packer, PackOverflowError, PackValueError, + Unpacker, UnpackValueError, + packb, + unpackb, ) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 84941db8..0a2a6f53 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,15 +1,8 @@ #!/usr/bin/env python -# coding: utf-8 -import pytest from array import array -from msgpack import packb, unpackb -import sys - -pytestmark = pytest.mark.skipif( - sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" -) +from msgpack import packb, unpackb def make_array(f, data): @@ -96,3 +89,11 @@ def test_bin32_from_byte(): def test_bin32_from_float(): _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_multidim_memoryview(): + # See https://github.com/msgpack/msgpack-python/issues/526 + view = memoryview(b"\00" * 6) + data = view.cast(view.format, (3, 2)) + packed = packb(data) + assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_newspec.py b/test/test_newspec.py index b7da486e..9e2f9be5 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,6 +1,4 @@ -# coding: utf-8 - -from msgpack import packb, unpackb, ExtType +from msgpack import ExtType, packb, unpackb def test_str8(): diff --git a/test/test_obj.py b/test/test_obj.py index 86c557cd..23be06d5 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 from pytest import raises + from msgpack import packb, unpackb @@ -34,7 +34,7 @@ def test_decode_pairs_hook(): prod_sum = 1 * 2 + 3 * 4 unpacked = unpackb( packed, - object_pairs_hook=lambda l: sum(k * v for k, v in l), + object_pairs_hook=lambda lst: sum(k * v for k, v in lst), use_list=1, strict_map_key=False, ) @@ -49,7 +49,7 @@ def test_only_one_obj_hook(): def test_bad_hook(): with raises(TypeError): packed = packb([3, 1 + 2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) + unpackb(packed, use_list=1) def _arr_to_str(arr): diff --git a/test/test_pack.py b/test/test_pack.py index a51d84c9..374d1549 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,16 +1,12 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals +import struct from collections import OrderedDict from io import BytesIO -import struct -import sys import pytest -from pytest import raises, xfail -from msgpack import packb, unpackb, Unpacker, Packer, pack +from msgpack import Packer, Unpacker, packb, unpackb def check(data, use_list=False): @@ -80,13 +76,8 @@ def testPackByteArrays(): check(td) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreUnicodeErrors(): - re = unpackb( - packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" - ) + re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") assert re == "abcdef" @@ -96,12 +87,9 @@ def testStrictUnicodeUnpack(): unpackb(packed, raw=False, use_list=1) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreErrorsPack(): re = unpackb( - packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), + packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), raw=False, use_list=1, ) @@ -114,8 +102,8 @@ def testDecodeBinary(): def testPackFloat(): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(str(">f"), 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(str(">d"), 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) def testArraySize(sizes=[0, 5, 50, 1000]): @@ -160,7 +148,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): bio.seek(0) unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: - assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + assert unpacker.unpack() == {i: i * 2 for i in range(size)} def test_odict(): diff --git a/test/test_read_size.py b/test/test_read_size.py index 33a7e7dd..0f6c1b50 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,5 +1,6 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from msgpack import packb, Unpacker, OutOfData + +from msgpack import OutOfData, Unpacker, packb UnexpectedTypeException = ValueError diff --git a/test/test_seq.py b/test/test_seq.py index 0d5d8065..8dee4620 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,9 +1,8 @@ -#!/usr/bin/env python -# coding: utf-8 - +# ruff: noqa: E501 +# ignore line length limit for long comments import io -import msgpack +import msgpack binarydata = bytes(bytearray(range(256))) @@ -35,7 +34,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert type(o) == bytes + assert isinstance(o, bytes) assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index c091076b..0f895d7d 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,11 +1,11 @@ #!/usr/bin/env python -# coding: utf-8 import io -from msgpack import Unpacker, BufferFull -from msgpack import pack, packb -from msgpack.exceptions import OutOfData + from pytest import raises +from msgpack import BufferFull, Unpacker, pack, packb +from msgpack.exceptions import OutOfData + def test_partialdata(): unpacker = Unpacker() diff --git a/test/test_stricttype.py b/test/test_stricttype.py index fe9ec6cd..72776a2c 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,7 +1,6 @@ -# coding: utf-8 - from collections import namedtuple -from msgpack import packb, unpackb, ExtType + +from msgpack import ExtType, packb, unpackb def test_namedtuple(): @@ -10,7 +9,7 @@ def test_namedtuple(): def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) @@ -23,7 +22,7 @@ def test_tuple(): def default(o): if isinstance(o, tuple): return {"__type__": "tuple", "value": list(o)} - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") def convert(o): if o.get("__type__") == "tuple": @@ -44,9 +43,7 @@ def test_tuple_ext(): def default(o): if isinstance(o, tuple): # Convert to list and pack - payload = packb( - list(o), strict_types=True, use_bin_type=True, default=default - ) + payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -54,7 +51,7 @@ def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError("Unknown Ext code {}".format(code)) + raise ValueError(f"Unknown Ext code {code}") data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index d91d4553..a911578c 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,9 +1,9 @@ #!/usr/bin/env python -# coding: utf-8 -from msgpack import packb, unpackb from collections import namedtuple +from msgpack import packb + class MyList(list): pass diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 253228e7..831141a1 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,12 +1,10 @@ -import pytest -import sys import datetime + +import pytest + import msgpack from msgpack.ext import Timestamp -if sys.version_info[0] > 2: - from msgpack.ext import _utc - def test_timestamp(): # timestamp32 @@ -85,33 +83,48 @@ def test_timestamp_to(): assert t.to_unix_nano() == 42000014000 -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_timestamp_datetime(): t = Timestamp(42, 14) - assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + utc = datetime.timezone.utc + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) + ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) + + assert ( + Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 + ) + + ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) + ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) + assert ( + Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 + ) + + assert Timestamp.from_datetime(ts).to_datetime() == ts -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_unpack_datetime(): t = Timestamp(42, 14) + utc = datetime.timezone.utc packed = msgpack.packb(t) unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_unpack_before_epoch(): - t_in = datetime.datetime(1960, 1, 1, tzinfo=_utc) + utc = datetime.timezone.utc + t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) packed = msgpack.packb(t_in, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == t_in -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime(): t = Timestamp(42, 14000) dt = t.to_datetime() - assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + utc = datetime.timezone.utc + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) packed2 = msgpack.packb(t) @@ -131,10 +144,10 @@ def test_pack_datetime(): assert msgpack.unpackb(packed) is None -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_issue451(): # https://github.com/msgpack/msgpack-python/issues/451 - dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=_utc) + utc = datetime.timezone.utc + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) assert packed == b"\xd6\xff\xf4\x86eL" @@ -142,7 +155,6 @@ def test_issue451(): assert dt == unpacked -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime_without_tzinfo(): dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) with pytest.raises(ValueError, match="where tzinfo=None"): @@ -152,7 +164,8 @@ def test_pack_datetime_without_tzinfo(): packed = msgpack.packb(dt, datetime=True, default=lambda x: None) assert packed == msgpack.packb(None) - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + utc = datetime.timezone.utc + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index aa4c01f8..b17c3c53 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,12 +1,9 @@ -from io import BytesIO import sys -from msgpack import Unpacker, packb, OutOfData, ExtType -from pytest import raises, mark +from io import BytesIO + +from pytest import mark, raises -try: - from itertools import izip as zip -except ImportError: - pass +from msgpack import ExtType, OutOfData, Unpacker, packb def test_unpack_array_header_from_file(): @@ -52,7 +49,7 @@ def hook(x): def test_unpacker_ext_hook(): class MyUnpacker(Unpacker): def __init__(self): - super(MyUnpacker, self).__init__(ext_hook=self._hook, raw=False) + super().__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: @@ -70,7 +67,7 @@ def _hook(self, code, data): def test_unpacker_tell(): - objects = 1, 2, u"abc", u"def", u"ghi" + objects = 1, 2, "abc", "def", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" positions = 1, 2, 6, 10, 14 unpacker = Unpacker(BytesIO(packed)) @@ -80,7 +77,7 @@ def test_unpacker_tell(): def test_unpacker_tell_read_bytes(): - objects = 1, u"abc", u"ghi" + objects = 1, "abc", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" raw_data = b"\x02", b"\xa3def", b"" lenghts = 1, 4, 999 diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 29c256d1..00000000 --- a/tox.ini +++ /dev/null @@ -1,42 +0,0 @@ -[tox] -envlist = - py27-pure, - {py35,py36,py37,py38}-{c,pure}, - {pypy,pypy3}-pure, - py27-x86, - py34-x86, -isolated_build = true - -[testenv] -deps= - pytest - -changedir=test -commands= - c,x86: python -c 'from msgpack import _cmsgpack' - c,x86: py.test - pure: py.test -setenv= - pure: MSGPACK_PUREPYTHON=x - -[testenv:py27-x86] -basepython=python2.7-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test - -[testenv:py34-x86] -basepython=python3.4-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test