diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..b696b926 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,33 @@ +name: docs + +on: ["push", "pull_request"] + +jobs: + docs: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: "pip" + cache-dependency-path: | + requirements.txt + docs/requirements.txt + + - name: Build + run: | + pip install -r requirements.txt + make cython + + - name: Sphinx Documentation Generator + run: | + pip install -r docs/requirements.txt + make docs diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..198cf7b5 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +name: lint + +on: ["push", "pull_request"] + +jobs: + lint: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: ruff check + run: | + pipx run ruff check --diff msgpack/ test/ setup.py + + - name: ruff format + run: | + pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..60894797 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Run tests +on: + push: + branches: [main] + pull_request: + create: + +jobs: + test: + strategy: + matrix: + os: ["ubuntu-latest", "windows-latest", "macos-latest"] + py: ["3.14-dev", "3.13", "3.12", "3.11", "3.10", "3.9"] + + runs-on: ${{ matrix.os }} + name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.py }} + allow-prereleases: true + cache: "pip" + + - name: Prepare + shell: bash + run: | + pip install -U pip + pip install -r requirements.txt pytest + + - name: Build + shell: bash + run: | + make cython + pip install . + + - name: Test (C extension) + shell: bash + run: | + pytest -v test + + - name: Test (pure Python fallback) + shell: bash + run: | + MSGPACK_PUREPYTHON=1 pytest -v test + + - name: build packages + shell: bash + run: | + pip install build + python -m build + + - name: upload packages + uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.os }}-${{ matrix.py }} + path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 00000000..12600363 --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,88 @@ +name: Build sdist and Wheels +on: + push: + branches: [main] + release: + types: + - published + workflow_dispatch: + +jobs: + build_wheels: + strategy: + matrix: + # macos-13 is for intel + os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "macos-13", "macos-latest"] + runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + cache: "pip" + - name: Cythonize + shell: bash + run: | + pip install -r requirements.txt + make cython + + - name: Build + uses: pypa/cibuildwheel@v3.1.1 + env: + CIBW_TEST_REQUIRES: "pytest" + CIBW_TEST_COMMAND: "pytest {package}/test" + CIBW_SKIP: "pp* cp38-*" + + - name: Build sdist + if: runner.os == 'Linux' && runner.arch == 'X64' + run: | + pip install build + python -m build -s -o wheelhouse + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }} + path: wheelhouse + + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + #with: + # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 800f1c22..341be631 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,13 @@ MANIFEST build/* dist/* .tox +.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py +msgpack/*.c msgpack/*.cpp *.egg-info /venv diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..88d87182 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file for Sphinx projects. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + apt_packages: + - build-essential + jobs: + pre_install: + - pip install -r requirements.txt + - make cython + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/conf.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7aac6648..00000000 --- a/.travis.yml +++ /dev/null @@ -1,45 +0,0 @@ -sudo: false -language: python -cache: pip - -python: - - "2.7" - - "3.5" - - "3.6" - - "3.7-dev" - -matrix: - include: - - sudo: required - language: python - services: - - docker - env: - - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 - install: - - pip install -U pip - - pip install cython - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx - - docker pull $DOCKER_IMAGE - script: - - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - python: "pypy" - install: - - pip install -e . - script: - - py.test -v test - - -install: - - pip install -U pip - - pip install cython - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx - - pip install -e . - -script: - - python -c 'import sys; print(hex(sys.maxsize))' - - python -c 'from msgpack import _packer, _unpacker' - - py.test -v test - - MSGPACK_PUREPYTHON=x py.test -v test - -# vim: sw=2 ts=2 diff --git a/ChangeLog.rst b/ChangeLog.rst index b6158c33..418c444c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,210 @@ +1.1.1 +===== + +Release Date: 2025-06-13 + +* No change from 1.1.1rc1. + +1.1.1rc1 +======== + +Release Date: 2025-06-06 + +* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. + +1.1.0 +===== + +Release Date: 2024-09-10 + +* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with + future Cython. (#620) + +1.1.0rc2 +======== + +Release Date: 2024-08-19 + +* Update Cython to 3.0.11 for better Python 3.13 support. +* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + +1.1.0rc1 +======== + +Release Date: 2024-05-07 + +* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. +* Stop using C++ mode in Cython to reduce compile error on some compilers. +* ``Packer()`` has ``buf_size`` option to specify initial size of + internal buffer to reduce reallocation. +* The default internal buffer size of ``Packer()`` is reduced from + 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` + if you are packing large data. +* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become + more accurate by avoiding floating point calculations. (#591) +* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. +* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only + arguments to improve compatibility with the Cython implementation. + +1.0.8 +===== + +Release Date: 2024-03-01 + +* Update Cython to 3.0.8. This fixes memory leak when iterating + ``Unpacker`` object on Python 3.12. +* Do not include C/Cython files in binary wheels. + + +1.0.7 +===== + +Release Date: 2023-09-28 + +* Fix build error of extension module on Windows. (#567) +* ``setup.py`` doesn't skip build error of extension module. (#568) + + +1.0.6 +===== + +Release Date: 2023-09-21 + +.. note:: + v1.0.6 Wheels for Windows don't contain extension module. + Please upgrade to v1.0.7 or newer. + +* Add Python 3.12 wheels (#517) +* Remove Python 2.7, 3.6, and 3.7 support + + +1.0.5 +===== + +Release Date: 2023-03-08 + +* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) +* Add Python 3.11 wheels (#517) +* fallback: Fix packing multidimensional memoryview (#527) + +1.0.4 +===== + +Release Date: 2022-06-03 + +* Support Python 3.11 (beta). +* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 +* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 +* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 + +1.0.3 +===== + +Release Date: 2021-11-24 JST + +* Fix Docstring (#459) +* Fix error formatting (#463) +* Improve error message about strict_map_key (#485) + +1.0.2 +===== + +* Fix year 2038 problem regression in 1.0.1. (#451) + +1.0.1 +===== + +* Add Python 3.9 and linux/arm64 wheels. (#439) +* Fixed Unpacker.tell() after read_bytes() (#426) +* Fixed unpacking datetime before epoch on Windows (#433) +* Fixed fallback Packer didn't check DateTime.tzinfo (#434) + +1.0.0 +===== + +Release Date: 2020-02-17 + +* Remove Python 2 support from the ``msgpack/_cmsgpack``. + ``msgpack/fallback`` still supports Python 2. +* Remove ``encoding`` option from the Packer and Unpacker. +* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB. +* Unpacker: ``strict_map_key`` is True by default now. +* Unpacker: String map keys are interned. +* Drop old buffer protocol support. +* Support Timestamp type. +* Support serializing and decerializing ``datetime`` object + with tzinfo. +* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352) + + +0.6.2 +===== + +Release Date: 2019-09-20 + +* Support Python 3.8. +* Update Cython to 0.29.13 for support Python 3.8. +* Some small optimizations. + + +0.6.1 +====== + +Release Date: 2019-01-25 + +This release is for mitigating pain caused by v0.6.0 reduced max input limits +for security reason. + +* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, + instead of static default sizes. + +* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``, + instead of static default sizes. + +* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated. + Since this is minor release, it's document only deprecation. + + +0.6.0 +====== + +Release Date: 2018-11-30 + +This release contains some backward incompatible changes for security reason (DoS). + +Important changes +----------------- + +* unpacker: Default value of input limits are smaller than before to avoid DoS attack. + If you need to handle large data, you need to specify limits manually. (#319) + +* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into + ``UnpackValueError``. If you want to catch all exception during unpack, you need + to use ``try ... except Exception`` with minimum try code block. (#323, #233) + +* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch + normal ``ValueError`` and ``OverflowError``. (#323, #233) + +* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str + (unicode in Python 2) are allowed for map keys. It is recommended to avoid + hashdos. Default value of this option is False for backward compatibility reason. + But it will be changed True in 1.0. (#296, #334) + +Other changes +------------- + +* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of + ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328) + +* Add ``Unpacker.getbuffer()`` method. (#320) + +* unpacker: ``msgpack.StackError`` is raised when input data contains too + nested data. (#331) + +* unpacker: ``msgpack.FormatError`` is raised when input data is not valid + msgpack format. (#331) + + 0.5.6 ====== diff --git a/DEVELOP.md b/DEVELOP.md new file mode 100644 index 00000000..27adf8c0 --- /dev/null +++ b/DEVELOP.md @@ -0,0 +1,17 @@ +# Developer's note + +### Build + +``` +$ make cython +``` + + +### Test + +MessagePack uses `pytest` for testing. +Run test with following command: + +``` +$ make test +``` diff --git a/MANIFEST.in b/MANIFEST.in index e1912cac..57d84a4c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING -include README.rst +include README.md recursive-include msgpack *.h *.c *.pyx *.cpp recursive-include test *.py diff --git a/Makefile b/Makefile index 124f2437..51f3e0ef 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,32 @@ +PYTHON_SOURCES = msgpack test setup.py + .PHONY: all all: cython python setup.py build_ext -i -f +.PHONY: format +format: + ruff format $(PYTHON_SOURCES) + +.PHONY: lint +lint: + ruff check $(PYTHON_SOURCES) + +.PHONY: doc +doc: + cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html + +.PHONY: pyupgrade +pyupgrade: + @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; + .PHONY: cython cython: - cython --cplus msgpack/*.pyx + cython msgpack/_cmsgpack.pyx .PHONY: test -test: +test: cython + pip install -e . pytest -v test MSGPACK_PUREPYTHON=1 pytest -v test @@ -18,17 +37,23 @@ serve-doc: all .PHONY: clean clean: rm -rf build - rm -f msgpack/_packer.cpp - rm -f msgpack/_unpacker.cpp + rm -f msgpack/_cmsgpack.cpp + rm -f msgpack/_cmsgpack.*.so + rm -f msgpack/_cmsgpack.*.pyd rm -rf msgpack/__pycache__ rm -rf test/__pycache__ .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux1_i686 - docker pull quay.io/pypa/manylinux1_x86_64 + docker pull quay.io/pypa/manylinux2014_i686 + docker pull quay.io/pypa/manylinux2014_x86_64 + docker pull quay.io/pypa/manylinux2014_aarch64 .PHONY: linux-wheel linux-wheel: - docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh - docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh + +.PHONY: linux-arm64-wheel +linux-arm64-wheel: + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh diff --git a/README.md b/README.md new file mode 100644 index 00000000..61a03c60 --- /dev/null +++ b/README.md @@ -0,0 +1,242 @@ +# MessagePack for Python + +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) +[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) + +## What's this + +[MessagePack](https://msgpack.org/) is an efficient binary serialization format. +It lets you exchange data among multiple languages like JSON. +But it's faster and smaller. +This package provides CPython bindings for reading and writing MessagePack data. + +## Install + +``` +$ pip install msgpack +``` + +### Pure Python implementation + +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. + +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. + + +### Windows + +When you can't use a binary distribution, you need to install Visual Studio +or Windows SDK on Windows. +Without extension, using pure Python implementation on CPython runs slowly. + + +## How to use + +### One-shot pack & unpack + +Use `packb` for packing and `unpackb` for unpacking. +msgpack provides `dumps` and `loads` as an alias for compatibility with +`json` and `pickle`. + +`pack` and `dump` packs to a file-like object. +`unpack` and `load` unpacks from a file-like object. + +```pycon +>>> import msgpack +>>> msgpack.packb([1, 2, 3]) +'\x93\x01\x02\x03' +>>> msgpack.unpackb(_) +[1, 2, 3] +``` + +Read the docstring for options. + + +### Streaming unpacking + +`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one +stream (or from bytes provided through its `feed` method). + +```py +import msgpack +from io import BytesIO + +buf = BytesIO() +for i in range(100): + buf.write(msgpack.packb(i)) + +buf.seek(0) + +unpacker = msgpack.Unpacker(buf) +for unpacked in unpacker: + print(unpacked) +``` + + +### Packing/unpacking of custom data type + +It is also possible to pack/unpack custom data types. Here is an example for +`datetime.datetime`. + +```py +import datetime +import msgpack + +useful_dict = { + "id": 1, + "created": datetime.datetime.now(), +} + +def decode_datetime(obj): + if '__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj + +def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj + + +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) +``` + +`Unpacker`'s `object_hook` callback receives a dict; the +`object_pairs_hook` callback may instead be used to receive a list of +key-value pairs. + +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + + +### Extended types + +It is also possible to pack/unpack custom data types using the **ext** type. + +```pycon +>>> import msgpack +>>> import array +>>> def default(obj): +... if isinstance(obj, array.array) and obj.typecode == 'd': +... return msgpack.ExtType(42, obj.tostring()) +... raise TypeError("Unknown type: %r" % (obj,)) +... +>>> def ext_hook(code, data): +... if code == 42: +... a = array.array('d') +... a.fromstring(data) +... return a +... return ExtType(code, data) +... +>>> data = array.array('d', [1.2, 3.4]) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> data == unpacked +True +``` + + +### Advanced unpacking control + +As an alternative to iteration, `Unpacker` objects provide `unpack`, +`skip`, `read_array_header` and `read_map_header` methods. The former two +read an entire message from the stream, respectively de-serialising and returning +the result, or ignoring it. The latter two methods return the number of elements +in the upcoming container, so that each element in an array, or key-value pair +in a map, can be unpacked or skipped individually. + + +## Notes + +### string and binary type in old msgpack spec + +Early versions of msgpack didn't distinguish string and binary types. +The type for representing both string and binary types was named **raw**. + +You can pack into and unpack from this old spec using `use_bin_type=False` +and `raw=True` options. + +```pycon +>>> import msgpack +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) +[b'spam', b'eggs'] +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) +[b'spam', 'eggs'] +``` + +### ext type + +To use the **ext** type, pass `msgpack.ExtType` object to packer. + +```pycon +>>> import msgpack +>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) +>>> msgpack.unpackb(packed) +ExtType(code=42, data='xyzzy') +``` + +You can use it with `default` and `ext_hook`. See below. + + +### Security + +To unpacking data received from unreliable source, msgpack provides +two security options. + +`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. +It is used to limit the preallocated list size too. + +`strict_map_key` (default: `True`) limits the type of map keys to bytes and str. +While msgpack spec doesn't limit the types of the map keys, +there is a risk of the hashdos. +If you need to support other types for map keys, use `strict_map_key=False`. + + +### Performance tips + +CPython's GC starts when growing allocated object. +This means unpacking may cause useless GC. +You can use `gc.disable()` when unpacking large message. + +List is the default sequence type of Python. +But tuple is lighter than list. +You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module does not support Python 2 anymore. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action (setup-python) no longer support Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in bin type in msgpack. + * The `encoding` option is removed. UTF-8 is used always. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * Default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attack. + You need to pass `max_buffer_size=0` if you have large but safe data. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys + which type is not bytes or str. diff --git a/README.rst b/README.rst deleted file mode 100644 index 8925a65c..00000000 --- a/README.rst +++ /dev/null @@ -1,336 +0,0 @@ -====================== -MessagePack for Python -====================== - -.. image:: https://travis-ci.org/msgpack/msgpack-python.svg?branch=master - :target: https://travis-ci.org/msgpack/msgpack-python - :alt: Build Status - -.. image:: https://readthedocs.org/projects/msgpack-python/badge/?version=latest - :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - - -What's this ------------ - -`MessagePack `_ is an efficient binary serialization format. -It lets you exchange data among multiple languages like JSON. -But it's faster and smaller. -This package provides CPython bindings for reading and writing MessagePack data. - - -Very important notes for existing users ---------------------------------------- - -PyPI package name -^^^^^^^^^^^^^^^^^ - -TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. -Do `pip uninstall msgpack-python; pip install msgpack` instead. - -Package name on PyPI was changed to msgpack from 0.5. -I upload transitional package (msgpack-python 0.5 which depending on msgpack) -for smooth transition from msgpack-python to msgpack. - -Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-python`, -msgpack is removed and `import msgpack` fail. - - -Deprecating encoding option -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -encoding and unicode_errors options are deprecated. - -In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended. - -For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes`` -object into msgpack raw type. - -In case of unpacker, there is new ``raw`` option. It is ``True`` by default -for backward compatibility, but it is changed to ``False`` in near future. -You can use ``raw=False`` instead of ``encoding='utf-8'``. - -Planned backward incompatible changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When msgpack 1.0, I planning these breaking changes: - -* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option. -* packer: Change default of ``use_bin_type`` option from False to True. -* unpacker: Change default of ``raw`` option from True to False. -* unpacker: Reduce all ``max_xxx_len`` options for typical usage. -* unpacker: Remove ``write_bytes`` option from all methods. - -To avoid these breaking changes breaks your application, please: - -* Don't use deprecated options. -* Pass ``use_bin_type`` and ``raw`` options explicitly. -* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too. - - -Install -------- - -:: - - $ pip install msgpack - -PyPy -^^^^ - -msgpack provides a pure Python implementation. PyPy can use this. - -Windows -^^^^^^^ - -When you can't use a binary distribution, you need to install Visual Studio -or Windows SDK on Windows. -Without extension, using pure Python implementation on CPython runs slowly. - -For Python 2.7, `Microsoft Visual C++ Compiler for Python 2.7 `_ -is recommended solution. - -For Python 3.5, `Microsoft Visual Studio 2015 `_ -Community Edition or Express Edition can be used to build extension module. - - -How to use ----------- - -One-shot pack & unpack -^^^^^^^^^^^^^^^^^^^^^^ - -Use ``packb`` for packing and ``unpackb`` for unpacking. -msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with -``json`` and ``pickle``. - -``pack`` and ``dump`` packs to a file-like object. -``unpack`` and ``load`` unpacks from a file-like object. - -.. code-block:: pycon - - >>> import msgpack - >>> msgpack.packb([1, 2, 3], use_bin_type=True) - '\x93\x01\x02\x03' - >>> msgpack.unpackb(_, raw=False) - [1, 2, 3] - -``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: - -.. code-block:: pycon - - >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) - (1, 2, 3) - -You should always specify the ``use_list`` keyword argument for backward compatibility. -See performance issues relating to `use_list option`_ below. - -Read the docstring for other options. - - -Streaming unpacking -^^^^^^^^^^^^^^^^^^^ - -``Unpacker`` is a "streaming unpacker". It unpacks multiple objects from one -stream (or from bytes provided through its ``feed`` method). - -.. code-block:: python - - import msgpack - from io import BytesIO - - buf = BytesIO() - for i in range(100): - buf.write(msgpack.packb(range(i), use_bin_type=True)) - - buf.seek(0) - - unpacker = msgpack.Unpacker(buf, raw=False) - for unpacked in unpacker: - print(unpacked) - - -Packing/unpacking of custom data type -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It is also possible to pack/unpack custom data types. Here is an example for -``datetime.datetime``. - -.. code-block:: python - - import datetime - import msgpack - - useful_dict = { - "id": 1, - "created": datetime.datetime.now(), - } - - def decode_datetime(obj): - if b'__datetime__' in obj: - obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") - return obj - - def encode_datetime(obj): - if isinstance(obj, datetime.datetime): - return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} - return obj - - - packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) - this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) - -``Unpacker``'s ``object_hook`` callback receives a dict; the -``object_pairs_hook`` callback may instead be used to receive a list of -key-value pairs. - - -Extended types -^^^^^^^^^^^^^^ - -It is also possible to pack/unpack custom data types using the **ext** type. - -.. code-block:: pycon - - >>> import msgpack - >>> import array - >>> def default(obj): - ... if isinstance(obj, array.array) and obj.typecode == 'd': - ... return msgpack.ExtType(42, obj.tostring()) - ... raise TypeError("Unknown type: %r" % (obj,)) - ... - >>> def ext_hook(code, data): - ... if code == 42: - ... a = array.array('d') - ... a.fromstring(data) - ... return a - ... return ExtType(code, data) - ... - >>> data = array.array('d', [1.2, 3.4]) - >>> packed = msgpack.packb(data, default=default, use_bin_type=True) - >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) - >>> data == unpacked - True - - -Advanced unpacking control -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As an alternative to iteration, ``Unpacker`` objects provide ``unpack``, -``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two -read an entire message from the stream, respectively de-serialising and returning -the result, or ignoring it. The latter two methods return the number of elements -in the upcoming container, so that each element in an array, or key-value pair -in a map, can be unpacked or skipped individually. - -Each of these methods may optionally write the packed data it reads to a -callback function: - -.. code-block:: python - - from io import BytesIO - - def distribute(unpacker, get_worker): - nelems = unpacker.read_map_header() - for i in range(nelems): - # Select a worker for the given key - key = unpacker.unpack() - worker = get_worker(key) - - # Send the value as a packed message to worker - bytestream = BytesIO() - unpacker.skip(bytestream.write) - worker.send(bytestream.getvalue()) - - -Notes ------ - -string and binary type -^^^^^^^^^^^^^^^^^^^^^^ - -Early versions of msgpack didn't distinguish string and binary types (like Python 1). -The type for representing both string and binary types was named **raw**. - -For backward compatibility reasons, msgpack-python will still default all -strings to byte strings, unless you specify the ``use_bin_type=True`` option in -the packer. If you do so, it will use a non-standard type called **bin** to -serialize byte arrays, and **raw** becomes to mean **str**. If you want to -distinguish **bin** and **raw** in the unpacker, specify ``raw=False``. - -Note that Python 2 defaults to byte-arrays over Unicode strings: - -.. code-block:: pycon - - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) - ['spam', 'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) - ['spam', u'eggs'] - -This is the same code in Python 3 (same behaviour, but Python 3 has a -different default): - -.. code-block:: pycon - - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) - [b'spam', b'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) - [b'spam', 'eggs'] - - -ext type -^^^^^^^^ - -To use the **ext** type, pass ``msgpack.ExtType`` object to packer. - -.. code-block:: pycon - - >>> import msgpack - >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) - >>> msgpack.unpackb(packed) - ExtType(code=42, data='xyzzy') - -You can use it with ``default`` and ``ext_hook``. See below. - - -Note about performance ----------------------- - -GC -^^ - -CPython's GC starts when growing allocated object. -This means unpacking may cause useless GC. -You can use ``gc.disable()`` when unpacking large message. - -use_list option -^^^^^^^^^^^^^^^ - -List is the default sequence type of Python. -But tuple is lighter than list. -You can use ``use_list=False`` while unpacking when performance is important. - -Python's dict can't use list as key and MessagePack allows array for key of mapping. -``use_list=False`` allows unpacking such message. -Another way to unpacking such object is using ``object_pairs_hook``. - - -Development ------------ - -Test -^^^^ - -MessagePack uses `pytest` for testing. -Run test with following command: - - $ make test - - -.. - vim: filetype=rst diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..75f0c541 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +## Security contact information + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 72b334a0..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,45 +0,0 @@ -environment: - matrix: - # For Python versions available on Appveyor, see - # http://www.appveyor.com/docs/installed-software#python - - PYTHON: "C:\\Python36" - -install: - # We need wheel installed to build wheels - - "%PYTHON%\\python.exe -m pip install -U cython" - - "%PYTHON%\\Scripts\\cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx" - -build: off - -test_script: - # Put your test command here. - # Note that you must use the environment variable %PYTHON% to refer to - # the interpreter you're using - Appveyor does not do anything special - # to put the Python version you want to use on PATH. - - set PYTHON="C:\\Python27" - - ci\\runtests.bat - - set PYTHON="C:\\Python27-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python35" - - ci\\runtests.bat - - set PYTHON="C:\\Python35-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python36" - - ci\\runtests.bat - - set PYTHON="C:\\Python36-x64" - - ci\\runtests.bat - -after_test: - # This step builds your wheels. - # Again, you need to use %PYTHON% to get the correct interpreter - -artifacts: - # bdist_wheel puts your built wheel in the dist directory - - path: dist\*.whl - -#on_success: -# You can use this step to upload your artifacts to a public website. -# See Appveyor's documentation for more details. Or you can simply -# access your wheels from the Appveyor "artifacts" tab for your build. - -# vim: set shiftwidth=2 diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 80819c62..2e778dd2 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,6 +1,8 @@ from msgpack import fallback + try: - from msgpack import _unpacker, _packer + from msgpack import _cmsgpack + has_ext = True except ImportError: has_ext = False @@ -9,26 +11,28 @@ def profile(name, func): times = timeit.repeat(func, number=1000, repeat=4) - times = ', '.join(["%8f" % t for t in times]) + times = ", ".join(["%8f" % t for t in times]) print("%-30s %40s" % (name, times)) def simple(name, data): if has_ext: - packer = _packer.Packer() + packer = _cmsgpack.Packer() profile("packing %s (ext)" % name, lambda: packer.pack(data)) packer = fallback.Packer() - profile('packing %s (fallback)' % name, lambda: packer.pack(data)) + profile("packing %s (fallback)" % name, lambda: packer.pack(data)) data = packer.pack(data) if has_ext: - profile('unpacking %s (ext)' % name, lambda: _unpacker.unpackb(data)) - profile('unpacking %s (fallback)' % name, lambda: fallback.unpackb(data)) + profile("unpacking %s (ext)" % name, lambda: _cmsgpack.unpackb(data)) + profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data)) + def main(): - simple("integers", [7]*10000) - simple("bytes", [b'x'*n for n in range(100)]*10) - simple("lists", [[]]*10000) - simple("dicts", [{}]*10000) + simple("integers", [7] * 10000) + simple("bytes", [b"x" * n for n in range(100)] * 10) + simple("lists", [[]] * 10000) + simple("dicts", [{}] * 10000) + main() diff --git a/ci/runtests.bat b/ci/runtests.bat deleted file mode 100644 index 02404679..00000000 --- a/ci/runtests.bat +++ /dev/null @@ -1,9 +0,0 @@ -%PYTHON%\python.exe -m pip install -U pip wheel pytest -%PYTHON%\python.exe setup.py build_ext -i -%PYTHON%\python.exe setup.py install -%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" -%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker" -%PYTHON%\python.exe setup.py bdist_wheel -%PYTHON%\python.exe -m pytest -v test -SET EL=%ERRORLEVEL% -exit /b %EL% diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index f586a8dd..ff34139d 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -1,11 +1,22 @@ #!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + set -e -x ARCH=`uname -p` echo "arch=$ARCH" -for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do +ls /opt/python + +for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} + $PYBIN/python -m build -w +done + +cd dist +for whl in *.whl; do + auditwheel repair "$whl" + rm "$whl" done diff --git a/docker/runtests.sh b/docker/runtests.sh index 113b6307..fa7e979b 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -1,14 +1,17 @@ #!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + set -e -x -for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do +for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin $PYBIN/python setup.py install rm -rf build/ # Avoid lib build by narrow Python is used by wide python $PYBIN/pip install pytest pushd test # prevent importing msgpack package in current directory. $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' - $PYBIN/python -c 'from msgpack import _packer, _unpacker' + $PYBIN/python -c 'from msgpack import _cmsgpack' # Ensure extension is available $PYBIN/pytest -v . popd done diff --git a/docker/shared.env b/docker/shared.env new file mode 100644 index 00000000..80274ac6 --- /dev/null +++ b/docker/shared.env @@ -0,0 +1,7 @@ +PYTHON_VERSIONS=( + cp310-cp310 + cp39-cp39 + cp38-cp38 + cp37-cp37m + cp36-cp36m +) diff --git a/docs/Makefile b/docs/Makefile index b09d8844..831a6a7f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -153,7 +153,7 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." serve: html - cd _build/html && python3 -m http.server + python3 -m http.server -d _build/html zip: html cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/_static/README.txt b/docs/_static/README.txt new file mode 100644 index 00000000..1c70594f --- /dev/null +++ b/docs/_static/README.txt @@ -0,0 +1 @@ +Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/advanced.rst b/docs/advanced.rst new file mode 100644 index 00000000..38370088 --- /dev/null +++ b/docs/advanced.rst @@ -0,0 +1,32 @@ +Advanced usage +=============== + +Packer +------ + +autoreset +~~~~~~~~~ + +When you used ``autoreset=False`` option of :class:`~msgpack.Packer`, +``pack()`` method doesn't return packed ``bytes``. + +You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to +get packed data. + +``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like +object. It's concrete type is implement detail and it will be changed in future +versions. + +You can reduce temporary bytes object by using ``Unpacker.getbuffer()``. + +.. code-block:: python + + packer = Packer(use_bin_type=True, autoreset=False) + + packer.pack([1, 2]) + packer.pack([3, 4]) + + with open('data.bin', 'wb') as f: + f.write(packer.getbuffer()) + + packer.reset() # reset internal buffer diff --git a/docs/api.rst b/docs/api.rst index 6336793e..f5dfbbd2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -:func:`dump` is alias for :func:`pack` +``dump()`` is an alias for :func:`pack` .. autofunction:: packb -:func:`dumps` is alias for :func:`packb` +``dumps()`` is an alias for :func:`packb` .. autofunction:: unpack -:func:`load` is alias for :func:`unpack` +``load()`` is an alias for :func:`unpack` .. autofunction:: unpackb -:func:`loads` is alias for :func:`unpackb` +``loads()`` is an alias for :func:`unpackb` .. autoclass:: Packer :members: @@ -27,6 +27,10 @@ API reference .. autoclass:: ExtType +.. autoclass:: Timestamp + :members: + :special-members: __init__ + exceptions ---------- diff --git a/docs/conf.py b/docs/conf.py index 47d745a8..28116cd6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -11,37 +9,37 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'msgpack' -copyright = u'2013, INADA Naoki' +project = "msgpack" +copyright = "Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -49,176 +47,170 @@ # # The short X.Y version. # The full version, including alpha/beta/rc tags. -version = release = '0.5' +version = release = "1.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinxdoc' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'msgpackdoc' +htmlhelp_basename = "msgpackdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'msgpack.tex', u'msgpack Documentation', - u'Author', 'manual'), + ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'msgpack', u'msgpack Documentation', - [u'Author'], 1) -] +man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -227,59 +219,65 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'msgpack', u'msgpack Documentation', - u'Author', 'msgpack', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "msgpack", + "msgpack Documentation", + "Author", + "msgpack", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u'msgpack' -epub_author = u'Author' -epub_publisher = u'Author' -epub_copyright = u'2013, Author' +epub_title = "msgpack" +epub_author = "Author" +epub_publisher = "Author" +epub_copyright = "2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. -#epub_language = '' +# epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. -#epub_scheme = '' +# epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. -#epub_identifier = '' +# epub_identifier = '' # A unique identification for the text. -#epub_uid = '' +# epub_uid = '' # A tuple containing the cover image and cover page html template filenames. -#epub_cover = () +# epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_pre_files = [] +# epub_pre_files = [] # HTML files shat should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_post_files = [] +# epub_post_files = [] # A list of files that should not be packed into the epub file. -#epub_exclude_files = [] +# epub_exclude_files = [] # The depth of the table of contents in toc.ncx. -#epub_tocdepth = 3 +# epub_tocdepth = 3 # Allow duplicate toc entries. -#epub_tocdup = True +# epub_tocdup = True diff --git a/docs/index.rst b/docs/index.rst index dcdab4f5..e9c2ce83 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,3 +8,4 @@ language data exchange. :maxdepth: 1 api + advanced diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..26002de4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx~=7.3.7 +sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 3955a414..ad68271d 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,31 +1,20 @@ -# coding: utf-8 -from msgpack._version import version -from msgpack.exceptions import * - -from collections import namedtuple +# ruff: noqa: F401 +import os +from .exceptions import * # noqa: F403 +from .ext import ExtType, Timestamp -class ExtType(namedtuple('ExtType', 'code data')): - """ExtType represents ext type in msgpack.""" - def __new__(cls, code, data): - if not isinstance(code, int): - raise TypeError("code must be int") - if not isinstance(data, bytes): - raise TypeError("data must be bytes") - if not 0 <= code <= 127: - raise ValueError("code must be 0~127") - return super(ExtType, cls).__new__(cls, code, data) +version = (1, 1, 1) +__version__ = "1.1.1" -import os -if os.environ.get('MSGPACK_PUREPYTHON'): - from msgpack.fallback import Packer, unpackb, Unpacker +if os.environ.get("MSGPACK_PUREPYTHON"): + from .fallback import Packer, Unpacker, unpackb else: try: - from msgpack._packer import Packer - from msgpack._unpacker import unpackb, Unpacker + from ._cmsgpack import Packer, Unpacker, unpackb except ImportError: - from msgpack.fallback import Packer, unpackb, Unpacker + from .fallback import Packer, Unpacker, unpackb def pack(o, stream, **kwargs): diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx new file mode 100644 index 00000000..1faaac3a --- /dev/null +++ b/msgpack/_cmsgpack.pyx @@ -0,0 +1,11 @@ +# coding: utf-8 +#cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +from cpython.datetime cimport import_datetime, datetime_new +import_datetime() + +import datetime +cdef object utc = datetime.timezone.utc +cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc) + +include "_packer.pyx" +include "_unpacker.pyx" diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 225f24ae..402b6946 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,21 +1,21 @@ # coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii from cpython cimport * -from cpython.version cimport PY_MAJOR_VERSION -from cpython.exc cimport PyErr_WarnEx +from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact +from cpython.datetime cimport ( + PyDateTime_CheckExact, PyDelta_CheckExact, + datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds, +) -from msgpack.exceptions import PackValueError, PackOverflowError -from msgpack import ExtType +cdef ExtType +cdef Timestamp + +from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) - int PyByteArray_Check(object obj) - int PyByteArray_CheckExact(object obj) - char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL - cdef extern from "pack.h": struct msgpack_packer: @@ -24,22 +24,21 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_bin(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) - int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) + int msgpack_pack_nil(msgpack_packer* pk) except -1 + int msgpack_pack_true(msgpack_packer* pk) except -1 + int msgpack_pack_false(msgpack_packer* pk) except -1 + int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 + int msgpack_pack_float(msgpack_packer* pk, float d) except -1 + int msgpack_pack_double(msgpack_packer* pk, double d) except -1 + int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -53,11 +52,11 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer(object): +cdef class Packer: """ MessagePack Packer - usage:: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -65,7 +64,8 @@ cdef class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -78,9 +78,7 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. - Current default value is false, but it will be changed to true - in future version. You should specify it explicitly. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes @@ -90,199 +88,190 @@ cdef class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. - :param str encoding: - (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') + :param int buf_size: + The size of the internal buffer. (default: 256*1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default - cdef object _bencoding cdef object _berrors - cdef const char *encoding cdef const char *unicode_errors + cdef size_t exports # number of exported buffers cdef bint strict_types - cdef bool use_float + cdef bint use_float cdef bint autoreset + cdef bint datetime - def __cinit__(self): - cdef int buf_size = 1024*1024 + def __cinit__(self, buf_size=256*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 + self.exports = 0 - def __init__(self, default=None, encoding=None, unicode_errors=None, - bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, - bint strict_types=False): - if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + assert self.exports == 0 + + cdef _check_exports(self): + if self.exports > 0: + raise BufferError("Existing exports of data: Packer cannot be changed") + + def __init__(self, *, default=None, + bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset + self.datetime = datetime self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") self._default = default - self._bencoding = encoding - if encoding is None: - if PY_MAJOR_VERSION < 3: - self.encoding = 'utf-8' - else: - self.encoding = NULL - else: - self.encoding = self._bencoding - self._berrors = unicode_errors if unicode_errors is None: self.unicode_errors = NULL else: self.unicode_errors = self._berrors - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + # returns -2 when default should(o) be called + cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: cdef long long llval cdef unsigned long long ullval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d + cdef unsigned long ulval + cdef const char* rawval cdef Py_ssize_t L - cdef int default_used = 0 - cdef bint strict_types = self.strict_types cdef Py_buffer view - - if nest_limit < 0: - raise PackValueError("recursion limit exceeded.") - - while True: - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif PyBool_Check(o) if strict_types else isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) + cdef bint strict = self.strict_types + + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict else PyLong_Check(o): + try: + if o > 0: + ullval = o + msgpack_pack_unsigned_long_long(&self.pk, ullval) else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): - # PyInt_Check(long) is True for Python 3. - # So we should test long before int. - try: - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if not default_used and self._default is not None: - o = self._default(o) - default_used = True - continue - else: - raise PackOverflowError("Integer value out of range") - elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) + llval = o + msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if will_default: + return -2 else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): - L = len(o) + raise OverflowError("Integer value out of range") + elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): + if self.use_float: + msgpack_pack_float(&self.pk, o) + else: + msgpack_pack_double(&self.pk, o) + elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): + if self.unicode_errors == NULL: + rawval = PyUnicode_AsUTF8AndSize(o, &L) + if L >ITEM_LIMIT: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) + L = Py_SIZE(o) if L > ITEM_LIMIT: - raise PackValueError("%s is too large" % type(o).__name__) + raise ValueError("unicode string is too large") rawval = o - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.encoding == NULL and self.unicode_errors == NULL: - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise PackValueError("unicode string is too large") - else: - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - L = len(o) - if L > ITEM_LIMIT: - raise PackValueError("unicode string is too large") - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - rawval = o - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o): - d = o - L = len(d) - if L > ITEM_LIMIT: - raise PackValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif not strict_types and PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise PackValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif type(o) is ExtType if strict_types else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - longval = o.code - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise PackValueError("EXT data is too large") - ret = msgpack_pack_ext(&self.pk, longval, L) - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): - L = len(o) - if L > ITEM_LIMIT: - raise PackValueError("list is too large") - ret = msgpack_pack_array(&self.pk, L) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyMemoryView_Check(o): - if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise PackValueError("could not get buffer for memoryview") - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise PackValueError("memoryview is too large") - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + msgpack_pack_raw(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o) if strict else PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + msgpack_pack_map(&self.pk, L) + for k, v in o.items(): + self._pack(k, nest_limit) + self._pack(v, nest_limit) + elif type(o) is ExtType if strict else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + msgpack_pack_ext(&self.pk, o.code, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + msgpack_pack_array(&self.pk, L) + for v in o: + self._pack(v, nest_limit) + elif PyMemoryView_Check(o): + PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) + L = view.len + if L > ITEM_LIMIT: + PyBuffer_Release(&view); + raise ValueError("memoryview is too large") + try: + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, view.buf, L) + finally: PyBuffer_Release(&view); - elif not default_used and self._default: + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif will_default: + return -2 + elif self.datetime and PyDateTime_CheckExact(o): + # this should be later than will_default + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef int ret + if nest_limit < 0: + raise ValueError("recursion limit exceeded.") + nest_limit -= 1 + if self._default is not None: + ret = self._pack_inner(o, 1, nest_limit) + if ret == -2: o = self._default(o) - default_used = 1 - continue else: - raise TypeError("can't serialize %r" % (o,)) - return ret + return ret + return self._pack_inner(o, 0, nest_limit) - cpdef pack(self, object obj): + def pack(self, object obj): cdef int ret + self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -296,30 +285,27 @@ cdef class Packer(object): return buf def pack_ext_type(self, typecode, data): + self._check_exports() + if len(data) > ITEM_LIMIT: + raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) def pack_array_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise PackValueError - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("array too large") + msgpack_pack_array(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf def pack_map_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise PackValueError - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("map too learge") + msgpack_pack_map(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 @@ -332,26 +318,41 @@ cdef class Packer(object): *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + self._check_exports() + size = len(pairs) + if size > ITEM_LIMIT: + raise ValueError("map too large") + msgpack_pack_map(&self.pk, size) + for k, v in pairs: + self._pack(k) + self._pack(v) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf def reset(self): - """Clear internal buffer.""" + """Reset internal buffer. + + This method is useful only when autoreset=False. + """ + self._check_exports() self.pk.length = 0 def bytes(self): - """Return buffer content.""" + """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + def getbuffer(self): + """Return memoryview of internal buffer. + + Note: Packer now supports buffer protocol. You can use memoryview(packer). + """ + return memoryview(self) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + self.exports += 1 + + def __releasebuffer__(self, Py_buffer *buffer): + self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d7fa5bc6..34ff3304 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,44 +1,25 @@ # coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii - -from cpython.version cimport PY_MAJOR_VERSION -from cpython.bytes cimport ( - PyBytes_AsString, - PyBytes_FromStringAndSize, - PyBytes_Size, -) -from cpython.buffer cimport ( - Py_buffer, - PyObject_CheckBuffer, - PyObject_GetBuffer, - PyBuffer_Release, - PyBuffer_IsContiguous, - PyBUF_READ, - PyBUF_SIMPLE, - PyBUF_FULL_RO, -) -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython.object cimport PyCallable_Check -from cpython.ref cimport Py_DECREF -from cpython.exc cimport PyErr_WarnEx +from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject - cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -ctypedef unsigned long long uint64_t +from libc.stdint cimport uint64_t -from msgpack.exceptions import ( +from .exceptions import ( BufferFull, OutOfData, - UnpackValueError, ExtraData, + FormatError, + StackError, ) -from msgpack import ExtType +from .ext import ExtType, Timestamp + +cdef object giga = 1_000_000_000 cdef extern from "unpack.h": @@ -46,11 +27,15 @@ cdef extern from "unpack.h": bint use_list bint raw bint has_pairs_hook # call object_hook with k-v pairs + bint strict_map_key + int timestamp PyObject* object_hook PyObject* list_hook PyObject* ext_hook - char *encoding - char *unicode_errors + PyObject* timestamp_t + PyObject *giga; + PyObject *utc; + const char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -75,14 +60,16 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw, - const char* encoding, const char* unicode_errors, + bint use_list, bint raw, int timestamp, + bint strict_map_key, + const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list ctx.user.raw = raw + ctx.user.strict_map_key = strict_map_key ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -116,7 +103,14 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = ext_hook - ctx.user.encoding = encoding + if timestamp < 0 or 3 < timestamp: + raise ValueError("timestamp must be 0..3") + + # Add Timestamp type to the user object so it may be used in unpack.h + ctx.user.timestamp = timestamp + ctx.user.timestamp_t = Timestamp + ctx.user.giga = giga + ctx.user.utc = utc ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): @@ -125,54 +119,48 @@ def default_read_extended_type(typecode, data): cdef inline int get_data_from_buffer(object obj, Py_buffer *view, char **buf, - Py_ssize_t *buffer_len, - int *new_protocol) except 0: + Py_ssize_t *buffer_len) except 0: cdef object contiguous cdef Py_buffer tmp - if PyObject_CheckBuffer(obj): - new_protocol[0] = 1 - if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: - raise - if view.itemsize != 1: - PyBuffer_Release(view) - raise BufferError("cannot unpack from multi-byte object") - if PyBuffer_IsContiguous(view, 'A') == 0: - PyBuffer_Release(view) - # create a contiguous copy and get buffer - contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C') - PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) - # view must hold the only reference to contiguous, - # so memory is freed when view is released - Py_DECREF(contiguous) - buffer_len[0] = view.len - buf[0] = view.buf - return 1 - else: - new_protocol[0] = 0 - if PyObject_AsReadBuffer(obj, buf, buffer_len) == -1: - raise BufferError("could not get memoryview") - PyErr_WarnEx(RuntimeWarning, - "using old buffer interface to unpack %s; " - "this leads to unpacking errors if slicing is used and " - "will be removed in a future version" % type(obj), - 1) - return 1 - -def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=True, - encoding=None, unicode_errors=None, + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, b'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = view.buf + return 1 + + +def unpackb(object packed, *, object object_hook=None, object list_hook=None, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, + unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): """ Unpack packed_bytes to object. Returns an unpacked object. - Raises `ValueError` when `packed` contains extra bytes. + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. See :class:`Unpacker` for options. + + *max_xxx_len* options are configured automatically from ``len(packed)``. """ cdef unpack_context ctx cdef Py_ssize_t off = 0 @@ -181,26 +169,31 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_buffer view cdef char* buf = NULL cdef Py_ssize_t buf_len - cdef const char* cenc = NULL cdef const char* cerr = NULL - cdef int new_protocol = 0 - - if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - cenc = encoding if unicode_errors is not None: cerr = unicode_errors - get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + get_data_from_buffer(packed, &view, &buf, &buf_len) + + if max_str_len == -1: + max_str_len = buf_len + if max_bin_len == -1: + max_bin_len = buf_len + if max_array_len == -1: + max_array_len = buf_len + if max_map_len == -1: + max_map_len = buf_len//2 + if max_ext_len == -1: + max_ext_len = buf_len + try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, cenc, cerr, + use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: - if new_protocol: - PyBuffer_Release(&view); + PyBuffer_Release(&view); if ret == 1: obj = unpack_data(&ctx) @@ -208,88 +201,96 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj unpack_clear(&ctx) - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + if ret == 0: + raise ValueError("Unpack failed: incomplete input") + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError + raise ValueError("Unpack failed: error = %d" % (ret,)) -def unpack(object stream, **kwargs): - PyErr_WarnEx( - PendingDeprecationWarning, - "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", 1) - data = stream.read() - return unpackb(data, **kwargs) - - -cdef class Unpacker(object): +cdef class Unpacker: """Streaming unpacker. - arguments: + Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + + :param int timestamp: + Control how timestamp type is unpacked: - *encoding* option which is deprecated overrides this option. + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). - :param callable object_hook: + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. + + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) + :param str unicode_errors: + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX (default). + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). Raises `BufferFull` exception when it is insufficient. You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 2**31-1) + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) :param int max_bin_len: - Limits max length of bin. (default: 2**31-1) + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) :param int max_array_len: - Limits max length of array. (default: 2**31-1) + Limits max length of array. + (default: max_buffer_size) :param int max_map_len: - Limits max length of map. (default: 2**31-1) - - :param str encoding: - Deprecated, use raw instead. - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - - :param str unicode_errors: - Error handler used for decoding str type. (default: `'strict'`) + Limits max length of map. + (default: max_buffer_size//2) + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False) + unpacker = Unpacker(file_like) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False) + unpacker = Unpacker() while True: buf = sock.recv(1024**2) if not buf: @@ -297,6 +298,12 @@ cdef class Unpacker(object): unpacker.feed(buf) for o in unpacker: process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ cdef unpack_context ctx cdef char* buf @@ -306,7 +313,7 @@ cdef class Unpacker(object): cdef Py_ssize_t read_size # To maintain refcnt. cdef object object_hook, object_pairs_hook, list_hook, ext_hook - cdef object encoding, unicode_errors + cdef object unicode_errors cdef Py_ssize_t max_buffer_size cdef uint64_t stream_offset @@ -317,17 +324,16 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL - def __init__(self, file_like=None, Py_ssize_t read_size=0, - bint use_list=True, bint raw=True, + def __init__(self, file_like=None, *, Py_ssize_t read_size=0, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - encoding=None, unicode_errors=None, int max_buffer_size=0, + unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): - cdef const char *cenc=NULL, + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): cdef const char *cerr=NULL self.object_hook = object_hook @@ -340,12 +346,25 @@ cdef class Unpacker(object): self.file_like_read = file_like.read if not PyCallable_Check(self.file_like_read): raise TypeError("`file_like.read` must be a callable.") + if not max_buffer_size: max_buffer_size = INT_MAX + if max_str_len == -1: + max_str_len = max_buffer_size + if max_bin_len == -1: + max_bin_len = max_buffer_size + if max_array_len == -1: + max_array_len = max_buffer_size + if max_map_len == -1: + max_map_len = max_buffer_size//2 + if max_ext_len == -1: + max_ext_len = max_buffer_size + if read_size > max_buffer_size: raise ValueError("read_size should be less or equal to max_buffer_size") if not read_size: read_size = min(max_buffer_size, 1024**2) + self.max_buffer_size = max_buffer_size self.read_size = read_size self.buf = PyMem_Malloc(read_size) @@ -356,24 +375,18 @@ cdef class Unpacker(object): self.buf_tail = 0 self.stream_offset = 0 - if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - self.encoding = encoding - cenc = encoding - if unicode_errors is not None: self.unicode_errors = unicode_errors cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, cenc, cerr, + ext_hook, use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff - cdef int new_protocol = 0 cdef char* buf cdef Py_ssize_t buf_len @@ -381,12 +394,11 @@ cdef class Unpacker(object): raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol) + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len) try: self.append_buffer(buf, buf_len) finally: - if new_protocol: - PyBuffer_Release(&pybuff) + PyBuffer_Release(&pybuff) cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: @@ -428,57 +440,49 @@ cdef class Unpacker(object): self.buf_size = buf_size self.buf_tail = tail + _buf_len - cdef read_from_file(self): - next_bytes = self.file_like_read( - min(self.read_size, - self.max_buffer_size - (self.buf_tail - self.buf_head) - )) + cdef int read_from_file(self) except -1: + cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) + if remains <= 0: + raise BufferFull + + next_bytes = self.file_like_read(min(self.read_size, remains)) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) else: self.file_like = None + return 0 - cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): + cdef object _unpack(self, execute_fn execute, bint iter=0): cdef int ret cdef object obj cdef Py_ssize_t prev_head - if write_bytes is not None: - PyErr_WarnEx(DeprecationWarning, "`write_bytes` option is deprecated. Use `.tell()` instead.", 1) - - if self.buf_head >= self.buf_tail and self.file_like is not None: - self.read_from_file() - while 1: prev_head = self.buf_head - if prev_head >= self.buf_tail: + if prev_head < self.buf_tail: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head + else: + ret = 0 + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue if iter: raise StopIteration("No more data to unpack.") else: raise OutOfData("No more data to unpack.") - - try: - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = unpack_data(&self.ctx) - unpack_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - else: - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) - except ValueError as e: - raise UnpackValueError(e) + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" @@ -486,54 +490,54 @@ cdef class Unpacker(object): nread = min(self.buf_tail - self.buf_head, nbytes) ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) self.buf_head += nread - if len(ret) < nbytes and self.file_like is not None: - ret += self.file_like.read(nbytes - len(ret)) + if nread < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - nread) + nread = len(ret) + self.stream_offset += nread return ret - def unpack(self, object write_bytes=None): + def unpack(self): """Unpack one object - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_construct, write_bytes) + return self._unpack(unpack_construct) - def skip(self, object write_bytes=None): + def skip(self): """Read and ignore one object, returning None - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_skip, write_bytes) + return self._unpack(unpack_skip) - def read_array_header(self, object write_bytes=None): + def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_array_header, write_bytes) + return self._unpack(read_array_header) - def read_map_header(self, object write_bytes=None): + def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_map_header, write_bytes) + return self._unpack(read_map_header) def tell(self): + """Returns the current position of the Unpacker in bytes, i.e., the + number of bytes that were read from the input, also the starting + position of the next object. + """ return self.stream_offset def __iter__(self): return self def __next__(self): - return self._unpack(unpack_construct, None, 1) + return self._unpack(unpack_construct, 1) # for debug. #def _buf(self): diff --git a/msgpack/_version.py b/msgpack/_version.py deleted file mode 100644 index d28f0deb..00000000 --- a/msgpack/_version.py +++ /dev/null @@ -1 +0,0 @@ -version = (0, 5, 6) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index 97668814..d6d2615c 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,5 +1,10 @@ class UnpackException(Exception): - """Deprecated. Use Exception instead to catch all exception during unpacking.""" + """Base class for some exceptions raised while unpacking. + + NOTE: unpack may raise exception other than subclass of + UnpackException. If you want to catch all error, catch + Exception instead. + """ class BufferFull(UnpackException): @@ -10,32 +15,34 @@ class OutOfData(UnpackException): pass -class UnpackValueError(UnpackException, ValueError): - """Deprecated. Use ValueError instead.""" +class FormatError(ValueError, UnpackException): + """Invalid msgpack format""" -class ExtraData(UnpackValueError): - def __init__(self, unpacked, extra): - self.unpacked = unpacked - self.extra = extra - - def __str__(self): - return "unpack(b) received extra data." +class StackError(ValueError, UnpackException): + """Too nested""" -class PackException(Exception): - """Deprecated. Use Exception instead to catch all exception during packing.""" +# Deprecated. Use ValueError instead +UnpackValueError = ValueError -class PackValueError(PackException, ValueError): - """PackValueError is raised when type of input data is supported but it's value is unsupported. +class ExtraData(UnpackValueError): + """ExtraData is raised when there is trailing data. - Deprecated. Use ValueError instead. + This exception is raised while only one-shot (not streaming) + unpack. """ + def __init__(self, unpacked, extra): + self.unpacked = unpacked + self.extra = extra + + def __str__(self): + return "unpack(b) received extra data." -class PackOverflowError(PackValueError, OverflowError): - """PackOverflowError is raised when integer value is out of range of msgpack support [-2**31, 2**32). - Deprecated. Use ValueError instead. - """ +# Deprecated. Use Exception instead to catch all exception during packing. +PackException = Exception +PackValueError = ValueError +PackOverflowError = OverflowError diff --git a/msgpack/ext.py b/msgpack/ext.py new file mode 100644 index 00000000..9694819a --- /dev/null +++ b/msgpack/ext.py @@ -0,0 +1,170 @@ +import datetime +import struct +from collections import namedtuple + + +class ExtType(namedtuple("ExtType", "code data")): + """ExtType represents ext type in msgpack.""" + + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super().__new__(cls, code, data) + + +class Timestamp: + """Timestamp represents the Timestamp extension type in msgpack. + + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. + + This class is immutable: Do not override seconds and nanoseconds. + """ + + __slots__ = ["seconds", "nanoseconds"] + + def __init__(self, seconds, nanoseconds=0): + """Initialize a Timestamp object. + + :param int seconds: + Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). + May be negative. + + :param int nanoseconds: + Number of nanoseconds to add to `seconds` to get fractional time. + Maximum is 999_999_999. Default is 0. + + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. + """ + if not isinstance(seconds, int): + raise TypeError("seconds must be an integer") + if not isinstance(nanoseconds, int): + raise TypeError("nanoseconds must be an integer") + if not (0 <= nanoseconds < 10**9): + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + self.seconds = seconds + self.nanoseconds = nanoseconds + + def __repr__(self): + """String representation of Timestamp.""" + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" + + def __eq__(self, other): + """Check for equality with another Timestamp object""" + if type(other) is self.__class__: + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return False + + def __ne__(self, other): + """not-equals method (see :func:`__eq__()`)""" + return not self.__eq__(other) + + def __hash__(self): + return hash((self.seconds, self.nanoseconds)) + + @staticmethod + def from_bytes(b): + """Unpack bytes into a `Timestamp` object. + + Used for pure-Python msgpack unpacking. + + :param b: Payload from msgpack ext message with code -1 + :type b: bytes + + :returns: Timestamp object unpacked from msgpack ext payload + :rtype: Timestamp + """ + if len(b) == 4: + seconds = struct.unpack("!L", b)[0] + nanoseconds = 0 + elif len(b) == 8: + data64 = struct.unpack("!Q", b)[0] + seconds = data64 & 0x00000003FFFFFFFF + nanoseconds = data64 >> 34 + elif len(b) == 12: + nanoseconds, seconds = struct.unpack("!Iq", b) + else: + raise ValueError( + "Timestamp type can only be created from 32, 64, or 96-bit byte objects" + ) + return Timestamp(seconds, nanoseconds) + + def to_bytes(self): + """Pack this Timestamp object into bytes. + + Used for pure-Python msgpack packing. + + :returns data: Payload for EXT message with code -1 (timestamp type) + :rtype: bytes + """ + if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits + data64 = self.nanoseconds << 34 | self.seconds + if data64 & 0xFFFFFFFF00000000 == 0: + # nanoseconds is zero and seconds < 2**32, so timestamp 32 + data = struct.pack("!L", data64) + else: + # timestamp 64 + data = struct.pack("!Q", data64) + else: + # timestamp 96 + data = struct.pack("!Iq", self.nanoseconds, self.seconds) + return data + + @staticmethod + def from_unix(unix_sec): + """Create a Timestamp from posix timestamp in seconds. + + :param unix_float: Posix timestamp in seconds. + :type unix_float: int or float + """ + seconds = int(unix_sec // 1) + nanoseconds = int((unix_sec % 1) * 10**9) + return Timestamp(seconds, nanoseconds) + + def to_unix(self): + """Get the timestamp as a floating-point value. + + :returns: posix timestamp + :rtype: float + """ + return self.seconds + self.nanoseconds / 1e9 + + @staticmethod + def from_unix_nano(unix_ns): + """Create a Timestamp from posix timestamp in nanoseconds. + + :param int unix_ns: Posix timestamp in nanoseconds. + :rtype: Timestamp + """ + return Timestamp(*divmod(unix_ns, 10**9)) + + def to_unix_nano(self): + """Get the timestamp as a unixtime in nanoseconds. + + :returns: posix timestamp in nanoseconds + :rtype: int + """ + return self.seconds * 10**9 + self.nanoseconds + + def to_datetime(self): + """Get the timestamp as a UTC datetime. + + :rtype: `datetime.datetime` + """ + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 + ) + + @staticmethod + def from_datetime(dt): + """Create a Timestamp from datetime with tzinfo. + + :rtype: Timestamp + """ + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index c8c8c78a..b02e47cf 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,76 +1,56 @@ """Fallback pure Python implementation of msgpack""" -import sys import struct -import warnings - -if sys.version_info[0] == 3: - PY3 = True - int_types = int - Unicode = str - xrange = range - def dict_iteritems(d): - return d.items() -else: - PY3 = False - int_types = (int, long) - Unicode = unicode - def dict_iteritems(d): - return d.iteritems() - +import sys +from datetime import datetime as _DateTime -if hasattr(sys, 'pypy_version_info'): - # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own - # StringBuilder is fastest. +if hasattr(sys, "pypy_version_info"): from __pypy__ import newlist_hint - try: - from __pypy__.builders import BytesBuilder as StringBuilder - except ImportError: - from __pypy__.builders import StringBuilder - USING_STRINGBUILDER = True - class StringIO(object): - def __init__(self, s=b''): + from __pypy__.builders import BytesBuilder + + _USING_STRINGBUILDER = True + + class BytesIO: + def __init__(self, s=b""): if s: - self.builder = StringBuilder(len(s)) + self.builder = BytesBuilder(len(s)) self.builder.append(s) else: - self.builder = StringBuilder() + self.builder = BytesBuilder() + def write(self, s): if isinstance(s, memoryview): s = s.tobytes() elif isinstance(s, bytearray): s = bytes(s) self.builder.append(s) + def getvalue(self): return self.builder.build() + else: - USING_STRINGBUILDER = False - from io import BytesIO as StringIO - newlist_hint = lambda size: [] + from io import BytesIO + _USING_STRINGBUILDER = False -from msgpack.exceptions import ( - BufferFull, - OutOfData, - UnpackValueError, - PackValueError, - PackOverflowError, - ExtraData) + def newlist_hint(size): + return [] -from msgpack import ExtType +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError +from .ext import ExtType, Timestamp -EX_SKIP = 0 -EX_CONSTRUCT = 1 -EX_READ_ARRAY_HEADER = 2 -EX_READ_MAP_HEADER = 3 +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 -TYPE_IMMEDIATE = 0 -TYPE_ARRAY = 1 -TYPE_MAP = 2 -TYPE_RAW = 3 -TYPE_BIN = 4 -TYPE_EXT = 5 +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 +TYPE_BIN = 4 +TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 @@ -83,57 +63,78 @@ def _check_type_strict(obj, t, type=type, tuple=tuple): def _get_data_from_buffer(obj): - try: - view = memoryview(obj) - except TypeError: - # try to use legacy buffer protocol if 2.7, otherwise re-raise - if not PY3: - view = memoryview(buffer(obj)) - warnings.warn("using old buffer interface to unpack %s; " - "this leads to unpacking errors if slicing is used and " - "will be removed in a future version" % type(obj), - RuntimeWarning) - else: - raise + view = memoryview(obj) if view.itemsize != 1: raise ValueError("cannot unpack from multi-byte object") return view -def unpack(stream, **kwargs): - warnings.warn( - "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", - PendingDeprecationWarning) - data = stream.read() - return unpackb(data, **kwargs) - - def unpackb(packed, **kwargs): """ Unpack an object from `packed`. - Raises `ExtraData` when `packed` contains extra bytes. + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + See :class:`Unpacker` for options. """ - unpacker = Unpacker(None, **kwargs) + unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) unpacker.feed(packed) try: ret = unpacker._unpack() except OutOfData: - raise UnpackValueError("Data is not enough.") + raise ValueError("Unpack failed: incomplete input") + except RecursionError: + raise StackError if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret -class Unpacker(object): +_NO_FORMAT_USED = "" +_MSGPACK_HEADERS = { + 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), + 0xC5: (2, ">H", TYPE_BIN), + 0xC6: (4, ">I", TYPE_BIN), + 0xC7: (2, "Bb", TYPE_EXT), + 0xC8: (3, ">Hb", TYPE_EXT), + 0xC9: (5, ">Ib", TYPE_EXT), + 0xCA: (4, ">f"), + 0xCB: (8, ">d"), + 0xCC: (1, _NO_FORMAT_USED), + 0xCD: (2, ">H"), + 0xCE: (4, ">I"), + 0xCF: (8, ">Q"), + 0xD0: (1, "b"), + 0xD1: (2, ">h"), + 0xD2: (4, ">i"), + 0xD3: (8, ">q"), + 0xD4: (1, "b1s", TYPE_EXT), + 0xD5: (2, "b2s", TYPE_EXT), + 0xD6: (4, "b4s", TYPE_EXT), + 0xD7: (8, "b8s", TYPE_EXT), + 0xD8: (16, "b16s", TYPE_EXT), + 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), + 0xDA: (2, ">H", TYPE_RAW), + 0xDB: (4, ">I", TYPE_RAW), + 0xDC: (2, ">H", TYPE_ARRAY), + 0xDD: (4, ">I", TYPE_ARRAY), + 0xDE: (2, ">H", TYPE_MAP), + 0xDF: (4, ">I", TYPE_MAP), +} + + +class Unpacker: """Streaming unpacker. - arguments: + Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -143,60 +144,70 @@ class Unpacker(object): Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). - *encoding* option which is deprecated overrides this option. + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - :param str unicode_errors: - (deprecated) Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX (default). + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). Raises `BufferFull` exception when it is insufficient. You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 2**31-1) + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) :param int max_bin_len: - Limits max length of bin. (default: 2**31-1) + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) :param int max_array_len: - Limits max length of array. (default: 2**31-1) + Limits max length of array. + (default: max_buffer_size) :param int max_map_len: - Limits max length of map. (default: 2**31-1) + Limits max length of map. + (default: max_buffer_size//2) + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) - example of streaming deserialize from file-like object:: + Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False) + unpacker = Unpacker(file_like) for o in unpacker: process(o) - example of streaming deserialize from socket:: + Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False) + unpacker = Unpacker() while True: buf = sock.recv(1024**2) if not buf: @@ -204,25 +215,37 @@ class Unpacker(object): unpacker.feed(buf) for o in unpacker: process(o) - """ - def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, - object_hook=None, object_pairs_hook=None, list_hook=None, - encoding=None, unicode_errors=None, max_buffer_size=0, - ext_hook=ExtType, - max_str_len=2147483647, # 2**32-1 - max_bin_len=2147483647, - max_array_len=2147483647, - max_map_len=2147483647, - max_ext_len=2147483647): - - if encoding is not None: - warnings.warn( - "encoding is deprecated, Use raw=False instead.", - PendingDeprecationWarning) + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + """ + def __init__( + self, + file_like=None, + *, + read_size=0, + use_list=True, + raw=False, + timestamp=0, + strict_map_key=True, + object_hook=None, + object_pairs_hook=None, + list_hook=None, + unicode_errors=None, + max_buffer_size=100 * 1024 * 1024, + ext_hook=ExtType, + max_str_len=-1, + max_bin_len=-1, + max_array_len=-1, + max_map_len=-1, + max_ext_len=-1, + ): if unicode_errors is None: - unicode_errors = 'strict' + unicode_errors = "strict" if file_like is None: self._feeding = True @@ -234,12 +257,6 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, #: array of bytes fed. self._buffer = bytearray() - # Some very old pythons don't support `struct.unpack_from()` with a - # `bytearray`. So we wrap it in a `buffer()` there. - if sys.version_info < (2, 7, 6): - self._buffer_view = buffer(self._buffer) - else: - self._buffer_view = self._buffer #: Which position we currently reads self._buff_i = 0 @@ -252,14 +269,30 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, # state, which _buf_checkpoint records. self._buf_checkpoint = 0 - self._max_buffer_size = max_buffer_size or 2**31-1 + if not max_buffer_size: + max_buffer_size = 2**31 - 1 + if max_str_len == -1: + max_str_len = max_buffer_size + if max_bin_len == -1: + max_bin_len = max_buffer_size + if max_array_len == -1: + max_array_len = max_buffer_size + if max_map_len == -1: + max_map_len = max_buffer_size // 2 + if max_ext_len == -1: + max_ext_len = max_buffer_size + + self._max_buffer_size = max_buffer_size if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self._read_size = read_size or min(self._max_buffer_size, 16*1024) + self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) self._raw = bool(raw) - self._encoding = encoding + self._strict_map_key = bool(strict_map_key) self._unicode_errors = unicode_errors self._use_list = use_list + if not (0 <= timestamp <= 3): + raise ValueError("timestamp must be 0..3") + self._timestamp = timestamp self._list_hook = list_hook self._object_hook = object_hook self._object_pairs_hook = object_pairs_hook @@ -272,33 +305,34 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, self._stream_offset = 0 if list_hook is not None and not callable(list_hook): - raise TypeError('`list_hook` is not callable') + raise TypeError("`list_hook` is not callable") if object_hook is not None and not callable(object_hook): - raise TypeError('`object_hook` is not callable') + raise TypeError("`object_hook` is not callable") if object_pairs_hook is not None and not callable(object_pairs_hook): - raise TypeError('`object_pairs_hook` is not callable') + raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError("object_pairs_hook and object_hook are mutually " - "exclusive") + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): assert self._feeding view = _get_data_from_buffer(next_bytes) - if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): + if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: raise BufferFull # Strip buffer before checkpoint before reading file. if self._buf_checkpoint > 0: - del self._buffer[:self._buf_checkpoint] + del self._buffer[: self._buf_checkpoint] self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 - self._buffer += view + # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython + self._buffer.extend(view) + view.release() def _consume(self): - """ Gets rid of the used parts of the buffer. """ + """Gets rid of the used parts of the buffer.""" self._stream_offset += self._buff_i - self._buf_checkpoint self._buf_checkpoint = self._buff_i @@ -306,19 +340,22 @@ def _got_extradata(self): return self._buff_i < len(self._buffer) def _get_extradata(self): - return self._buffer[self._buff_i:] + return self._buffer[self._buff_i :] def read_bytes(self, n): - return self._read(n) + ret = self._read(n, raise_outofdata=False) + self._consume() + return ret - def _read(self, n): + def _read(self, n, raise_outofdata=True): # (int) -> bytearray - self._reserve(n) + self._reserve(n, raise_outofdata=raise_outofdata) i = self._buff_i - self._buff_i = i+n - return self._buffer[i:i+n] + ret = self._buffer[i : i + n] + self._buff_i = i + len(ret) + return ret - def _reserve(self, n): + def _reserve(self, n, raise_outofdata=True): remain_bytes = len(self._buffer) - self._buff_i - n # Fast path: buffer has n bytes already @@ -331,12 +368,14 @@ def _reserve(self, n): # Strip buffer before checkpoint before reading file. if self._buf_checkpoint > 0: - del self._buffer[:self._buf_checkpoint] + del self._buffer[: self._buf_checkpoint] self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 # Read from file remain_bytes = -remain_bytes + if remain_bytes + len(self._buffer) > self._max_buffer_size: + raise BufferFull while remain_bytes > 0: to_read_bytes = max(self._read_size, remain_bytes) read_data = self.file_like.read(to_read_bytes) @@ -346,11 +385,11 @@ def _reserve(self, n): self._buffer += read_data remain_bytes -= len(read_data) - if len(self._buffer) < n + self._buff_i: + if len(self._buffer) < n + self._buff_i and raise_outofdata: self._buff_i = 0 # rollback raise OutOfData - def _read_header(self, execute=EX_CONSTRUCT): + def _read_header(self): typ = TYPE_IMMEDIATE n = 0 obj = None @@ -360,228 +399,112 @@ def _read_header(self, execute=EX_CONSTRUCT): if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = -1 - (b ^ 0xff) + obj = -1 - (b ^ 0xFF) elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - elif b == 0xc0: + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + elif b == 0xC0: obj = None - elif b == 0xc2: + elif b == 0xC2: obj = False - elif b == 0xc3: + elif b == 0xC3: obj = True - elif b == 0xc4: - typ = TYPE_BIN - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xc5: - typ = TYPE_BIN - self._reserve(2) - n = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] - self._buff_i += 2 - if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xc6: - typ = TYPE_BIN - self._reserve(4) - n = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] - self._buff_i += 4 + elif 0xC4 <= b <= 0xC6: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") obj = self._read(n) - elif b == 0xc7: # ext 8 - typ = TYPE_EXT - self._reserve(2) - L, n = struct.unpack_from('Bb', self._buffer_view, self._buff_i) - self._buff_i += 2 - if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xc8: # ext 16 - typ = TYPE_EXT - self._reserve(3) - L, n = struct.unpack_from('>Hb', self._buffer_view, self._buff_i) - self._buff_i += 3 - if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xc9: # ext 32 - typ = TYPE_EXT - self._reserve(5) - L, n = struct.unpack_from('>Ib', self._buffer_view, self._buff_i) - self._buff_i += 5 + elif 0xC7 <= b <= 0xC9: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") obj = self._read(L) - elif b == 0xca: - self._reserve(4) - obj = struct.unpack_from(">f", self._buffer_view, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xcb: - self._reserve(8) - obj = struct.unpack_from(">d", self._buffer_view, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xcc: - self._reserve(1) - obj = self._buffer[self._buff_i] - self._buff_i += 1 - elif b == 0xcd: - self._reserve(2) - obj = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xce: - self._reserve(4) - obj = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xcf: - self._reserve(8) - obj = struct.unpack_from(">Q", self._buffer_view, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xd0: - self._reserve(1) - obj = struct.unpack_from("b", self._buffer_view, self._buff_i)[0] - self._buff_i += 1 - elif b == 0xd1: - self._reserve(2) - obj = struct.unpack_from(">h", self._buffer_view, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xd2: - self._reserve(4) - obj = struct.unpack_from(">i", self._buffer_view, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xd3: - self._reserve(8) - obj = struct.unpack_from(">q", self._buffer_view, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xd4: # fixext 1 - typ = TYPE_EXT - if self._max_ext_len < 1: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - self._reserve(2) - n, obj = struct.unpack_from("b1s", self._buffer_view, self._buff_i) - self._buff_i += 2 - elif b == 0xd5: # fixext 2 - typ = TYPE_EXT - if self._max_ext_len < 2: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - self._reserve(3) - n, obj = struct.unpack_from("b2s", self._buffer_view, self._buff_i) - self._buff_i += 3 - elif b == 0xd6: # fixext 4 - typ = TYPE_EXT - if self._max_ext_len < 4: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - self._reserve(5) - n, obj = struct.unpack_from("b4s", self._buffer_view, self._buff_i) - self._buff_i += 5 - elif b == 0xd7: # fixext 8 - typ = TYPE_EXT - if self._max_ext_len < 8: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - self._reserve(9) - n, obj = struct.unpack_from("b8s", self._buffer_view, self._buff_i) - self._buff_i += 9 - elif b == 0xd8: # fixext 16 - typ = TYPE_EXT - if self._max_ext_len < 16: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - self._reserve(17) - n, obj = struct.unpack_from("b16s", self._buffer_view, self._buff_i) - self._buff_i += 17 - elif b == 0xd9: - typ = TYPE_RAW - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xda: - typ = TYPE_RAW - self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) - self._buff_i += 2 - if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xdb: - typ = TYPE_RAW - self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) - self._buff_i += 4 + elif 0xCA <= b <= 0xD3: + size, fmt = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + obj = self._buffer[self._buff_i] + self._buff_i += size + elif 0xD4 <= b <= 0xD8: + size, fmt, typ = _MSGPACK_HEADERS[b] + if self._max_ext_len < size: + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") + self._reserve(size + 1) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + 1 + elif 0xD9 <= b <= 0xDB: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) - elif b == 0xdc: - typ = TYPE_ARRAY - self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) - self._buff_i += 2 + elif 0xDC <= b <= 0xDD: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xdd: - typ = TYPE_ARRAY - self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) - self._buff_i += 4 - if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xde: - self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) - self._buff_i += 2 + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif 0xDE <= b <= 0xDF: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP - elif b == 0xdf: - self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) - self._buff_i += 4 - if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") else: - raise UnpackValueError("Unknown header: 0x%x" % b) + raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): - typ, n, obj = self._read_header(execute) + typ, n, obj = self._read_header() if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: - raise UnpackValueError("Expected array") + raise ValueError("Expected array") return n if execute == EX_READ_MAP_HEADER: if typ != TYPE_MAP: - raise UnpackValueError("Expected map") + raise ValueError("Expected map") return n # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) - for i in xrange(n): + for i in range(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) @@ -589,20 +512,23 @@ def _unpack(self, execute=EX_CONSTRUCT): return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), - self._unpack(EX_CONSTRUCT)) - for _ in xrange(n)) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) + ) else: ret = {} - for _ in xrange(n): + for _ in range(n): key = self._unpack(EX_CONSTRUCT) + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + if isinstance(key, str): + key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) @@ -610,17 +536,26 @@ def _unpack(self, execute=EX_CONSTRUCT): if execute == EX_SKIP: return if typ == TYPE_RAW: - if self._encoding is not None: - obj = obj.decode(self._encoding, self._unicode_errors) - elif self._raw: + if self._raw: obj = bytes(obj) else: - obj = obj.decode('utf_8') + obj = obj.decode("utf_8", self._unicode_errors) return obj - if typ == TYPE_EXT: - return self._ext_hook(n, bytes(obj)) if typ == TYPE_BIN: return bytes(obj) + if typ == TYPE_EXT: + if n == -1: # timestamp + ts = Timestamp.from_bytes(bytes(obj)) + if self._timestamp == 1: + return ts.to_unix() + elif self._timestamp == 2: + return ts.to_unix_nano() + elif self._timestamp == 3: + return ts.to_datetime() + else: + return ts + else: + return self._ext_hook(n, bytes(obj)) assert typ == TYPE_IMMEDIATE return obj @@ -635,37 +570,30 @@ def __next__(self): except OutOfData: self._consume() raise StopIteration + except RecursionError: + raise StackError next = __next__ - def skip(self, write_bytes=None): + def skip(self): self._unpack(EX_SKIP) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() - def unpack(self, write_bytes=None): - ret = self._unpack(EX_CONSTRUCT) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) + def unpack(self): + try: + ret = self._unpack(EX_CONSTRUCT) + except RecursionError: + raise StackError self._consume() return ret - def read_array_header(self, write_bytes=None): + def read_array_header(self): ret = self._unpack(EX_READ_ARRAY_HEADER) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret - def read_map_header(self, write_bytes=None): + def read_map_header(self): ret = self._unpack(EX_READ_MAP_HEADER) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret @@ -673,11 +601,11 @@ def tell(self): return self._stream_offset -class Packer(object): +class Packer: """ MessagePack Packer - usage: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -685,7 +613,8 @@ class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -698,49 +627,59 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes - from serializeable types will not be serialized and will be + from serializable types will not be serialized and will be treated as unsupported type and forwarded to default. Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. - :param str encoding: - (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') - """ - def __init__(self, default=None, encoding=None, unicode_errors=None, - use_single_float=False, autoreset=True, use_bin_type=False, - strict_types=False): - if encoding is None: - encoding = 'utf_8' - else: - warnings.warn( - "encoding is deprecated, Use raw=False instead.", - PendingDeprecationWarning) + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. - if unicode_errors is None: - unicode_errors = 'strict' + :param int buf_size: + Internal buffer size. This option is used only for C implementation. + """ + def __init__( + self, + *, + default=None, + use_single_float=False, + autoreset=True, + use_bin_type=True, + strict_types=False, + datetime=False, + unicode_errors=None, + buf_size=None, + ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._encoding = encoding - self._unicode_errors = unicode_errors - self._buffer = StringIO() - if default is not None: - if not callable(default): - raise TypeError("default must be callable") + self._buffer = BytesIO() + self._datetime = bool(datetime) + self._unicode_errors = unicode_errors or "strict" + if default is not None and not callable(default): + raise TypeError("default must be callable") self._default = default - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, - check=isinstance, check_type_strict=_check_type_strict): + def _pack( + self, + obj, + nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, + check_type_strict=_check_type_strict, + ): default_used = False if self._strict_types: check = check_type_strict @@ -749,149 +688,150 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, list_types = (list, tuple) while True: if nest_limit < 0: - raise PackValueError("recursion limit exceeded") + raise ValueError("recursion limit exceeded") if obj is None: return self._buffer.write(b"\xc0") if check(obj, bool): if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int_types): + if check(obj, int): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: return self._buffer.write(struct.pack("b", obj)) - if 0x80 <= obj <= 0xff: - return self._buffer.write(struct.pack("BB", 0xcc, obj)) + if 0x80 <= obj <= 0xFF: + return self._buffer.write(struct.pack("BB", 0xCC, obj)) if -0x80 <= obj < 0: - return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) - if 0xff < obj <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xcd, obj)) + return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) + if 0xFF < obj <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xCD, obj)) if -0x8000 <= obj < -0x80: - return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) - if 0xffff < obj <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xce, obj)) + return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) + if 0xFFFF < obj <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xCE, obj)) if -0x80000000 <= obj < -0x8000: - return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) - if 0xffffffff < obj <= 0xffffffffffffffff: - return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) + return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) + if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: + return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) if -0x8000000000000000 <= obj < -0x80000000: - return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) if not default_used and self._default is not None: obj = self._default(obj) default_used = True continue - raise PackOverflowError("Integer value out of range") + raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) if n >= 2**32: - raise PackValueError("%s is too large" % type(obj).__name__) + raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, Unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) + if check(obj, str): + obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) if n >= 2**32: - raise PackValueError("String is too large") + raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): - n = len(obj) * obj.itemsize + n = obj.nbytes if n >= 2**32: - raise PackValueError("Memoryview is too large") + raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, float): if self._use_float: - return self._buffer.write(struct.pack(">Bf", 0xca, obj)) - return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if check(obj, ExtType): - code = obj.code - data = obj.data + return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) + return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) + if check(obj, (ExtType, Timestamp)): + if check(obj, Timestamp): + code = -1 + data = obj.to_bytes() + else: + code = obj.code + data = obj.data assert isinstance(code, int) assert isinstance(data, bytes) L = len(data) if L == 1: - self._buffer.write(b'\xd4') + self._buffer.write(b"\xd4") elif L == 2: - self._buffer.write(b'\xd5') + self._buffer.write(b"\xd5") elif L == 4: - self._buffer.write(b'\xd6') + self._buffer.write(b"\xd6") elif L == 8: - self._buffer.write(b'\xd7') + self._buffer.write(b"\xd7") elif L == 16: - self._buffer.write(b'\xd8') - elif L <= 0xff: - self._buffer.write(struct.pack(">BB", 0xc7, L)) - elif L <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc8, L)) + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xC7, L)) + elif L <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xC8, L)) else: - self._buffer.write(struct.pack(">BI", 0xc9, L)) + self._buffer.write(struct.pack(">BI", 0xC9, L)) self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return if check(obj, list_types): n = len(obj) self._pack_array_header(n) - for i in xrange(n): + for i in range(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs(len(obj), dict_iteritems(obj), - nest_limit - 1) + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) + + if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: + obj = Timestamp.from_datetime(obj) + default_used = 1 + continue + if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 continue - raise TypeError("Cannot serialize %r" % (obj, )) + + if self._datetime and check(obj, _DateTime): + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") + + raise TypeError(f"Cannot serialize {obj!r}") def pack(self, obj): try: self._pack(obj) except: - self._buffer = StringIO() # force reset + self._buffer = BytesIO() # force reset raise - ret = self._buffer.getvalue() if self._autoreset: - self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) - ret = self._buffer.getvalue() if self._autoreset: - self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret def pack_array_header(self, n): if n >= 2**32: - raise PackValueError + raise ValueError self._pack_array_header(n) - ret = self._buffer.getvalue() if self._autoreset: - self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret def pack_map_header(self, n): if n >= 2**32: - raise PackValueError + raise ValueError self._pack_map_header(n) - ret = self._buffer.getvalue() if self._autoreset: - self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret def pack_ext_type(self, typecode, data): if not isinstance(typecode, int): @@ -901,77 +841,89 @@ def pack_ext_type(self, typecode, data): if not isinstance(data, bytes): raise TypeError("data must have bytes type") L = len(data) - if L > 0xffffffff: - raise PackValueError("Too large data") + if L > 0xFFFFFFFF: + raise ValueError("Too large data") if L == 1: - self._buffer.write(b'\xd4') + self._buffer.write(b"\xd4") elif L == 2: - self._buffer.write(b'\xd5') + self._buffer.write(b"\xd5") elif L == 4: - self._buffer.write(b'\xd6') + self._buffer.write(b"\xd6") elif L == 8: - self._buffer.write(b'\xd7') + self._buffer.write(b"\xd7") elif L == 16: - self._buffer.write(b'\xd8') - elif L <= 0xff: - self._buffer.write(b'\xc7' + struct.pack('B', L)) - elif L <= 0xffff: - self._buffer.write(b'\xc8' + struct.pack('>H', L)) + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(b"\xc7" + struct.pack("B", L)) + elif L <= 0xFFFF: + self._buffer.write(b"\xc8" + struct.pack(">H", L)) else: - self._buffer.write(b'\xc9' + struct.pack('>I', L)) - self._buffer.write(struct.pack('B', typecode)) + self._buffer.write(b"\xc9" + struct.pack(">I", L)) + self._buffer.write(struct.pack("B", typecode)) self._buffer.write(data) def _pack_array_header(self, n): - if n <= 0x0f: - return self._buffer.write(struct.pack('B', 0x90 + n)) - if n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xdc, n)) - if n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xdd, n)) - raise PackValueError("Array is too large") + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x90 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDC, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDD, n)) + raise ValueError("Array is too large") def _pack_map_header(self, n): - if n <= 0x0f: - return self._buffer.write(struct.pack('B', 0x80 + n)) - if n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xde, n)) - if n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xdf, n)) - raise PackValueError("Dict is too large") + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x80 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDE, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDF, n)) + raise ValueError("Dict is too large") def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for (k, v) in pairs: + for k, v in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) def _pack_raw_header(self, n): - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) + if n <= 0x1F: + self._buffer.write(struct.pack("B", 0xA0 + n)) + elif self._use_bin_type and n <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xD9, n)) + elif n <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xDA, n)) + elif n <= 0xFFFFFFFF: + self._buffer.write(struct.pack(">BI", 0xDB, n)) else: - raise PackValueError('Raw is too large') + raise ValueError("Raw is too large") def _pack_bin_header(self, n): if not self._use_bin_type: return self._pack_raw_header(n) - elif n <= 0xff: - return self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xc6, n)) + elif n <= 0xFF: + return self._buffer.write(struct.pack(">BB", 0xC4, n)) + elif n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xC5, n)) + elif n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xC6, n)) else: - raise PackValueError('Bin is too large') + raise ValueError("Bin is too large") def bytes(self): + """Return internal buffer contents as bytes object""" return self._buffer.getvalue() def reset(self): - self._buffer = StringIO() + """Reset internal buffer. + + This method is useful only when autoreset=False. + """ + self._buffer = BytesIO() + + def getbuffer(self): + """Return view of internal buffer.""" + if _USING_STRINGBUILDER: + return memoryview(self.bytes()) + else: + return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index 4f3ce1d9..edf3a3fe 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,15 +21,12 @@ #include "sysdep.h" #include #include +#include #ifdef __cplusplus extern "C" { #endif -#ifdef _MSC_VER -#define inline __inline -#endif - typedef struct msgpack_packer { char *buf; size_t length; @@ -67,53 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" -// return -2 when o is too long -static inline int -msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) -{ -#if PY_MAJOR_VERSION >= 3 - assert(PyUnicode_Check(o)); - - Py_ssize_t len; - const char* buf = PyUnicode_AsUTF8AndSize(o, &len); - if (buf == NULL) - return -1; - - if (len > limit) { - return -2; - } - - int ret = msgpack_pack_raw(pk, len); - if (ret) return ret; - - return msgpack_pack_raw_body(pk, buf, len); -#else - PyObject *bytes; - Py_ssize_t len; - int ret; - - // py2 - bytes = PyUnicode_AsUTF8String(o); - if (bytes == NULL) - return -1; - - len = PyString_GET_SIZE(bytes); - if (len > limit) { - Py_DECREF(bytes); - return -2; - } - - ret = msgpack_pack_raw(pk, len); - if (ret) { - Py_DECREF(bytes); - return -1; - } - ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); - Py_DECREF(bytes); - return ret; -#endif -} - #ifdef __cplusplus } #endif diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 5d1088f4..b8959f02 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,18 +37,6 @@ * Integer */ -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -123,18 +111,6 @@ do { \ } \ } while(0) -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ -} while(0) - #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -264,49 +240,6 @@ do { \ } while(0) -static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -//#ifdef msgpack_pack_inline_func_cint - static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -372,192 +305,37 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 +#if SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + if (sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); + } else { + msgpack_pack_real_int64(x, d); + } #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LLONG_MAX) -#if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(USHRT_MAX) -#if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(UINT_MAX) -#if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULONG_MAX) -#if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif } static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULLONG_MAX) -#if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { msgpack_pack_real_uint64(x, d); } -#endif -} - -//#undef msgpack_pack_inline_func_cint -//#endif - /* @@ -566,24 +344,26 @@ if(sizeof(unsigned long long) == 2) { static inline int msgpack_pack_float(msgpack_packer* x, float d) { - union { float f; uint32_t i; } mem; - mem.f = d; unsigned char buf[5]; - buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); + buf[0] = 0xca; + +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack4(d, (char *)&buf[1], 0); +#else + _PyFloat_Pack4(d, &buf[1], 0); +#endif msgpack_pack_append_buffer(x, buf, 5); } static inline int msgpack_pack_double(msgpack_packer* x, double d) { - union { double f; uint64_t i; } mem; - mem.f = d; unsigned char buf[9]; buf[0] = 0xcb; -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack8(d, (char *)&buf[1], 0); +#else + _PyFloat_Pack8(d, &buf[1], 0); #endif - _msgpack_store64(&buf[1], mem.i); msgpack_pack_append_buffer(x, buf, 9); } @@ -766,6 +546,39 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) } +/* + * Pack Timestamp extension type. Follows msgpack-c pack_template.h. + */ +static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds) +{ + if ((seconds >> 34) == 0) { + /* seconds is unsigned and fits in 34 bits */ + uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds; + if ((data64 & 0xffffffff00000000L) == 0) { + /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */ + unsigned char buf[4]; + uint32_t data32 = (uint32_t)data64; + msgpack_pack_ext(x, -1, 4); + _msgpack_store32(buf, data32); + msgpack_pack_raw_body(x, buf, 4); + } else { + /* timestamp64 */ + unsigned char buf[8]; + msgpack_pack_ext(x, -1, 8); + _msgpack_store64(buf, data64); + msgpack_pack_raw_body(x, buf, 8); + + } + } else { + /* seconds is signed or >34bits */ + unsigned char buf[12]; + _msgpack_store32(&buf[0], nanoseconds); + _msgpack_store64(&buf[4], seconds); + msgpack_pack_ext(x, -1, 12); + msgpack_pack_raw_body(x, buf, 12); + } + return 0; +} #undef msgpack_pack_append_buffer @@ -775,11 +588,9 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) #undef TAKE8_32 #undef TAKE8_64 -#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index ed9c1bc0..70673004 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else -#include /* __BYTE_ORDER */ +#else /* _WIN32 */ +#include /* ntohs, ntohl */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ @@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# elif defined(_byteswap_ulong) || defined(_MSC_VER) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +#if defined(_byteswap_uint64) || defined(_MSC_VER) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 63e5543b..58a2f4f5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -23,10 +23,14 @@ typedef struct unpack_user { bool use_list; bool raw; bool has_pairs_hook; + bool strict_map_key; + int timestamp; PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; - const char *encoding; + PyObject *timestamp_t; + PyObject *giga; + PyObject *utc; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -43,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong((long)d); + PyObject *p = PyLong_FromLong((long)d); if (!p) return -1; *o = p; @@ -57,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromSize_t((size_t)d); + PyObject *p = PyLong_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -70,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } if (!p) return -1; @@ -80,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong(d); + PyObject *p = PyLong_FromLong(d); if (!p) return -1; *o = p; @@ -103,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } *o = p; return 0; @@ -188,6 +192,13 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { + if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); + return -1; + } + if (PyUnicode_CheckExact(k)) { + PyUnicode_InternInPlace(&k); + } if (u->has_pairs_hook) { msgpack_unpack_object item = PyTuple_Pack(2, k, v); if (!item) @@ -227,9 +238,7 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* PyObject *py; - if (u->encoding) { - py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else if (u->raw) { + if (u->raw) { py = PyBytes_FromStringAndSize(p, l); } else { py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors); @@ -254,10 +263,43 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* return 0; } -static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, - unsigned int length, msgpack_unpack_object* o) +typedef struct msgpack_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; +} msgpack_timestamp; + +/* + * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. + */ +static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { + switch (buflen) { + case 4: + ts->tv_nsec = 0; + { + uint32_t v = _msgpack_load32(uint32_t, buf); + ts->tv_sec = (int64_t)v; + } + return 0; + case 8: { + uint64_t value =_msgpack_load64(uint64_t, buf); + ts->tv_nsec = (uint32_t)(value >> 34); + ts->tv_sec = value & 0x00000003ffffffffLL; + return 0; + } + case 12: + ts->tv_nsec = _msgpack_load32(uint32_t, buf); + ts->tv_sec = _msgpack_load64(int64_t, buf + 4); + return 0; + default: + return -1; + } +} + +#include "datetime.h" + +static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) { - PyObject *py; int8_t typecode = (int8_t)*pos++; if (!u->ext_hook) { PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); @@ -267,12 +309,79 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); return -1; } + + PyObject *py = NULL; // length also includes the typecode, so the actual data is length-1 -#if PY_MAJOR_VERSION == 2 - py = PyObject_CallFunction(u->ext_hook, "(is#)", (int)typecode, pos, (Py_ssize_t)length-1); -#else - py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); -#endif + if (typecode == -1) { + msgpack_timestamp ts; + if (unpack_timestamp(pos, length-1, &ts) < 0) { + return -1; + } + + if (u->timestamp == 2) { // int + PyObject *a = PyLong_FromLongLong(ts.tv_sec); + if (a == NULL) return -1; + + PyObject *c = PyNumber_Multiply(a, u->giga); + Py_DECREF(a); + if (c == NULL) { + return -1; + } + + PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec); + if (b == NULL) { + Py_DECREF(c); + return -1; + } + + py = PyNumber_Add(c, b); + Py_DECREF(c); + Py_DECREF(b); + } + else if (u->timestamp == 0) { // Timestamp + py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); + } + else if (u->timestamp == 3) { // datetime + // Calculate datetime using epoch + delta + // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps + PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); + if (epoch == NULL) { + return -1; + } + + PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); + if (d == NULL) { + Py_DECREF(epoch); + return -1; + } + + py = PyNumber_Add(epoch, d); + + Py_DECREF(epoch); + Py_DECREF(d); + } + else { // float + PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); + if (a == NULL) return -1; + + PyObject *b = PyNumber_TrueDivide(a, u->giga); + Py_DECREF(a); + if (b == NULL) return -1; + + PyObject *c = PyLong_FromLongLong(ts.tv_sec); + if (c == NULL) { + Py_DECREF(b); + return -1; + } + + a = PyNumber_Add(b, c); + Py_DECREF(b); + Py_DECREF(c); + py = a; + } + } else { + py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); + } if (!py) return -1; *o = py; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h new file mode 100644 index 00000000..c14a3c2b --- /dev/null +++ b/msgpack/unpack_container_header.h @@ -0,0 +1,51 @@ +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 525dea24..cce29e7a 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,8 +75,7 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -template -static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -123,7 +122,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize goto _fixed_trail_again #define start_container(func, count_, ct_) \ - if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ + if(top >= MSGPACK_EMBED_STACK_SIZE) { ret = -3; goto _end; } \ if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ if((count_) == 0) { obj = stack[top].obj; \ if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ @@ -132,27 +131,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize stack[top].size = count_; \ stack[top].count = 0; \ ++top; \ - /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ - /*printf("stack push %d\n", top);*/ \ - /* FIXME \ - if(top >= stack_size) { \ - if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ - size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ - size_t nsize = csize * 2; \ - unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ - if(tmp == NULL) { goto _failed; } \ - memcpy(tmp, ctx->stack, csize); \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ - } else { \ - size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ - unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ - if(tmp == NULL) { goto _failed; } \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = stack_size * 2; \ - } \ - } \ - */ \ goto _header_again #define NEXT_CS(p) ((unsigned int)*p & 0x1f) @@ -229,7 +207,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize case 0xdf: // map 32 again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); default: - goto _failed; + ret = -2; + goto _end; } SWITCH_RANGE(0xa0, 0xbf) // FixRaw again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); @@ -239,7 +218,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); SWITCH_RANGE_DEFAULT - goto _failed; + ret = -2; + goto _end; SWITCH_RANGE_END // end CS_HEADER @@ -262,17 +242,21 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - union { uint32_t i; float f; } mem; - mem.i = _msgpack_load32(uint32_t,n); - push_fixed_value(_float, mem.f); } + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack4((const char*)n, 0); +#else + f = _PyFloat_Unpack4((unsigned char*)n, 0); +#endif + push_fixed_value(_float, f); } case CS_DOUBLE: { - union { uint64_t i; double f; } mem; - mem.i = _msgpack_load64(uint64_t,n); -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack8((const char*)n, 0); +#else + f = _PyFloat_Unpack8((unsigned char*)n, 0); #endif - push_fixed_value(_double, mem.f); } + push_fixed_value(_double, f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); case CS_UINT_16: @@ -401,6 +385,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef construct_cb } +#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -412,68 +397,27 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef again_fixed_trail_if_zero #undef start_container -template -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; +static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(1, ctx, data, len, off); +} +static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(0, ctx, data, len, off); } -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END - -static const execute_fn unpack_construct = &unpack_execute; -static const execute_fn unpack_skip = &unpack_execute; -static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; - -#undef NEXT_CS +#define unpack_container_header read_array_header +#define fixed_offset 0x90 +#define var_offset 0xdc +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +#define unpack_container_header read_map_header +#define fixed_offset 0x80 +#define var_offset 0xde +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..b1628322 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +# 75.3.0 is the latest version supporting Python 3.8 +requires = ["setuptools >= 75.3.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "msgpack" +dynamic = ["version"] +# `license = "Apache-2.0"` is preferred. But keep old syntax for Python 3.8 compatibility. +# https://github.com/msgpack/msgpack-python/pull/637 +license = {text="Apache 2.0"} +authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] +description = "MessagePack serializer" +readme = "README.md" +keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Topic :: File Formats", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +[project.urls] +Homepage = "https://msgpack.org/" +Documentation = "https://msgpack-python.readthedocs.io/" +Repository = "https://github.com/msgpack/msgpack-python/" +Tracker = "https://github.com/msgpack/msgpack-python/issues" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" + +[tool.setuptools] +# Do not install C/C++/Cython source files +include-package-data = false + +[tool.setuptools.dynamic] +version = {attr = "msgpack.__version__"} + +[tool.ruff] +line-length = 100 +target-version = "py38" +lint.select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + #"UP", pyupgrade +] diff --git a/requirements.txt b/requirements.txt index cd54e6df..5d2e20b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -Cython==0.27.3 +Cython~=3.1.2 diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index c252d816..4029e9ed --- a/setup.py +++ b/setup.py @@ -1,143 +1,32 @@ #!/usr/bin/env python -# coding: utf-8 -import io import os import sys -from glob import glob -from distutils.command.sdist import sdist -from setuptools import setup, Extension -from distutils.command.build_ext import build_ext +from setuptools import Extension, setup -# for building transitional package. -TRANSITIONAL = False - -class NoCython(Exception): - pass - -try: - import Cython.Compiler.Main as cython_compiler - have_cython = True -except ImportError: - have_cython = False - - -def cythonize(src): - sys.stderr.write("cythonize: %r\n" % (src,)) - cython_compiler.compile([src], cplus=True) - -def ensure_source(src): - pyx = os.path.splitext(src)[0] + '.pyx' - - if not os.path.exists(src): - if not have_cython: - raise NoCython - cythonize(pyx) - elif (os.path.exists(pyx) and - os.stat(src).st_mtime < os.stat(pyx).st_mtime and - have_cython): - cythonize(pyx) - return src - - -class BuildExt(build_ext): - def build_extension(self, ext): - try: - ext.sources = list(map(ensure_source, ext.sources)) - except NoCython: - print("WARNING") - print("Cython is required for building extension from checkout.") - print("Install Cython >= 0.16 or install msgpack from PyPI.") - print("Falling back to pure Python implementation.") - return - try: - return build_ext.build_extension(self, ext) - except Exception as e: - print("WARNING: Failed to compile extension modules.") - print("msgpack uses fallback pure python implementation.") - print(e) - - -exec(open('msgpack/_version.py').read()) - -version_str = '.'.join(str(x) for x in version[:3]) -if len(version) > 3 and version[3] != 'final': - version_str += version[3] - -# take care of extension modules. -if have_cython: - class Sdist(sdist): - def __init__(self, *args, **kwargs): - for src in glob('msgpack/*.pyx'): - cythonize(src) - sdist.__init__(self, *args, **kwargs) -else: - Sdist = sdist +PYPY = hasattr(sys, "pypy_version_info") libraries = [] -if sys.platform == 'win32': - libraries.append('ws2_32') - -if sys.byteorder == 'big': - macros = [('__BIG_ENDIAN__', '1')] -else: - macros = [('__LITTLE_ENDIAN__', '1')] - +macros = [] ext_modules = [] -if not hasattr(sys, 'pypy_version_info'): - ext_modules.append(Extension('msgpack._packer', - sources=['msgpack/_packer.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) - ext_modules.append(Extension('msgpack._unpacker', - sources=['msgpack/_unpacker.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) -del libraries, macros +if sys.platform == "win32": + libraries.append("ws2_32") + macros = [("__LITTLE_ENDIAN__", "1")] + +if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): + ext_modules.append( + Extension( + "msgpack._cmsgpack", + sources=["msgpack/_cmsgpack.c"], + libraries=libraries, + include_dirs=["."], + define_macros=macros, + ) + ) +del libraries, macros -desc = 'MessagePack (de)serializer.' -with io.open('README.rst', encoding='utf-8') as f: - long_desc = f.read() -del f - -name = 'msgpack' - -if TRANSITIONAL: - name = 'msgpack-python' - long_desc = "This package is deprecated. Install msgpack instead." - -setup(name=name, - author='INADA Naoki', - author_email='songofacandy@gmail.com', - version=version_str, - cmdclass={'build_ext': BuildExt, 'sdist': Sdist}, - ext_modules=ext_modules, - packages=['msgpack'], - description=desc, - long_description=long_desc, - long_description_content_type="text/x-rst", - url='https://msgpack.org/', - project_urls = { - 'Documentation': 'https://msgpack-python.readthedocs.io/', - 'Source': 'https://github.com/msgpack/msgpack-python', - 'Tracker': 'https://github.com/msgpack/msgpack-python/issues', - }, - license='Apache 2.0', - classifiers=[ - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - ], +setup( + ext_modules=ext_modules, + packages=["msgpack"], ) diff --git a/test/test_buffer.py b/test/test_buffer.py index 87f359f9..2c5a14c5 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,29 +1,49 @@ -#!/usr/bin/env python -# coding: utf-8 +from pytest import raises -from msgpack import packb, unpackb +from msgpack import Packer, packb, unpackb def test_unpack_buffer(): from array import array - buf = array('b') - buf.fromstring(packb((b'foo', b'bar'))) + + buf = array("b") + buf.frombytes(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj def test_unpack_bytearray(): - buf = bytearray(packb(('foo', 'bar'))) + buf = bytearray(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) def test_unpack_memoryview(): - buf = bytearray(packb(('foo', 'bar'))) + buf = bytearray(packb((b"foo", b"bar"))) view = memoryview(buf) obj = unpackb(view, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_packer_getbuffer(): + packer = Packer(autoreset=False) + packer.pack_array_header(2) + packer.pack(42) + packer.pack("hello") + buffer = packer.getbuffer() + assert isinstance(buffer, memoryview) + assert bytes(buffer) == b"\x92*\xa5hello" + + if Packer.__module__ == "msgpack._cmsgpack": # only for Cython + # cython Packer supports buffer protocol directly + assert bytes(packer) == b"\x92*\xa5hello" + + with raises(BufferError): + packer.pack(42) + buffer.release() + packer.pack(42) + assert bytes(packer) == b"\x92*\xa5hello*" diff --git a/test/test_case.py b/test/test_case.py index 5a4bb6c4..c4c615e3 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,102 +1,136 @@ #!/usr/bin/env python -# coding: utf-8 - from msgpack import packb, unpackb -def check(length, obj): - v = packb(obj) - assert len(v) == length, \ - "%r length should be %r but get %r" % (obj, length, len(v)) - assert unpackb(v, use_list=0) == obj +def check(length, obj, use_bin_type=True): + v = packb(obj, use_bin_type=use_bin_type) + assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" + assert unpackb(v, use_list=0, raw=not use_bin_type) == obj + def test_1(): - for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, - -((1<<5)-1), -(1<<5)]: + for o in [ + None, + True, + False, + 0, + 1, + (1 << 6), + (1 << 7) - 1, + -1, + -((1 << 5) - 1), + -(1 << 5), + ]: check(1, o) + def test_2(): - for o in [1 << 7, (1 << 8) - 1, - -((1<<5)+1), -(1<<7) - ]: + for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: check(2, o) + def test_3(): - for o in [1 << 8, (1 << 16) - 1, - -((1<<7)+1), -(1<<15)]: + for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: check(3, o) + def test_5(): - for o in [1 << 16, (1 << 32) - 1, - -((1<<15)+1), -(1<<31)]: + for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: check(5, o) + def test_9(): - for o in [1 << 32, (1 << 64) - 1, - -((1<<31)+1), -(1<<63), - 1.0, 0.1, -0.1, -1.0]: + for o in [ + 1 << 32, + (1 << 64) - 1, + -((1 << 31) + 1), + -(1 << 63), + 1.0, + 0.1, + -0.1, + -1.0, + ]: check(9, o) def check_raw(overhead, num): - check(num + overhead, b" " * num) + check(num + overhead, b" " * num, use_bin_type=False) + def test_fixraw(): check_raw(1, 0) - check_raw(1, (1<<5) - 1) + check_raw(1, (1 << 5) - 1) + def test_raw16(): - check_raw(3, 1<<5) - check_raw(3, (1<<16) - 1) + check_raw(3, 1 << 5) + check_raw(3, (1 << 16) - 1) + def test_raw32(): - check_raw(5, 1<<16) + check_raw(5, 1 << 16) def check_array(overhead, num): check(num + overhead, (None,) * num) + def test_fixarray(): check_array(1, 0) check_array(1, (1 << 4) - 1) + def test_array16(): check_array(3, 1 << 4) - check_array(3, (1<<16)-1) + check_array(3, (1 << 16) - 1) + def test_array32(): - check_array(5, (1<<16)) + check_array(5, (1 << 16)) def match(obj, buf): assert packb(obj) == buf - assert unpackb(buf, use_list=0) == obj + assert unpackb(buf, use_list=0, strict_map_key=False) == obj + def test_match(): cases = [ - (None, b'\xc0'), - (False, b'\xc2'), - (True, b'\xc3'), - (0, b'\x00'), - (127, b'\x7f'), - (128, b'\xcc\x80'), - (256, b'\xcd\x01\x00'), - (-1, b'\xff'), - (-33, b'\xd0\xdf'), - (-129, b'\xd1\xff\x7f'), - ({1:1}, b'\x81\x01\x01'), + (None, b"\xc0"), + (False, b"\xc2"), + (True, b"\xc3"), + (0, b"\x00"), + (127, b"\x7f"), + (128, b"\xcc\x80"), + (256, b"\xcd\x01\x00"), + (-1, b"\xff"), + (-33, b"\xd0\xdf"), + (-129, b"\xd1\xff\x7f"), + ({1: 1}, b"\x81\x01\x01"), (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), b'\x90'), - (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), - (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - ({}, b'\x80'), - (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), - (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), - ] + ((), b"\x90"), + ( + tuple(range(15)), + b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e", + ), + ( + tuple(range(16)), + b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + ), + ({}, b"\x80"), + ( + {x: x for x in range(15)}, + b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", + ), + ( + {x: x for x in range(16)}, + b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", + ), + ] for v, p in cases: match(v, p) -def test_unicode(): - assert unpackb(packb('foobar'), use_list=1) == b'foobar' +def test_unicode(): + assert unpackb(packb("foobar"), use_list=1) == "foobar" diff --git a/test/test_except.py b/test/test_except.py index 361d4ea3..b77ac800 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 + +import datetime from pytest import raises -from msgpack import packb, unpackb -import datetime +from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb class DummyException(Exception): @@ -19,13 +19,45 @@ def test_raise_on_find_unsupported_value(): def test_raise_from_object_hook(): def hook(obj): raise DummyException + raises(DummyException, unpackb, packb({}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) - raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({"fizz": {"buzz": "spam"}}), object_hook=hook) + raises( + DummyException, + unpackb, + packb({"fizz": {"buzz": "spam"}}), + object_pairs_hook=hook, + ) def test_invalidvalue(): + incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97 + with raises(ValueError): + unpackb(incomplete) + + with raises(OutOfData): + unpacker = Unpacker() + unpacker.feed(incomplete) + unpacker.unpack() + + with raises(FormatError): + unpackb(b"\xc1") # (undefined tag) + + with raises(FormatError): + unpackb(b"\x91\xc1") # fixarray(len=1) [ (undefined tag) ] + + with raises(StackError): + unpackb(b"\x91" * 3000) # nested fixarray(len=1) + + +def test_strict_map_key(): + valid = {"unicode": 1, b"bytes": 2} + packed = packb(valid, use_bin_type=True) + assert valid == unpackb(packed, raw=False, strict_map_key=True) + + invalid = {42: 1} + packed = packb(invalid, use_bin_type=True) with raises(ValueError): - unpackb(b'\xd9\x97#DL_') + unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py index d05d7ab9..aaf0fd92 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,5 @@ -from __future__ import print_function import array + import msgpack from msgpack import ExtType @@ -9,65 +9,67 @@ def p(s): packer = msgpack.Packer() packer.pack_ext_type(0x42, s) return packer.bytes() - assert p(b'A') == b'\xd4\x42A' # fixext 1 - assert p(b'AB') == b'\xd5\x42AB' # fixext 2 - assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 - assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 - assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 - assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 - assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 - assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + + assert p(b"A") == b"\xd4\x42A" # fixext 1 + assert p(b"AB") == b"\xd5\x42AB" # fixext 2 + assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4 + assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8 + assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 + assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 + assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 + assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 def test_unpack_ext_type(): def check(b, expected): assert msgpack.unpackb(b) == expected - check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 - check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 - check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 - check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 - check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 - check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 - check(b'\xc8\x01\x23\x42' + b'A'*0x0123, - ExtType(0x42, b'A'*0x0123)) # ext 16 - check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, - ExtType(0x42, b'A'*0x00012345)) # ext 32 + check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1 + check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2 + check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4 + check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8 + check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16 + check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8 + check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16 + check( + b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345, + ExtType(0x42, b"A" * 0x00012345), + ) # ext 32 def test_extension_type(): def default(obj): - print('default called', obj) + print("default called", obj) if isinstance(obj, array.array): - typecode = 123 # application specific typecode - data = obj.tostring() + typecode = 123 # application specific typecode + try: + data = obj.tobytes() + except AttributeError: + data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknown type object %r" % (obj,)) + raise TypeError(f"Unknown type object {obj!r}") def ext_hook(code, data): - print('ext_hook called', code, data) + print("ext_hook called", code, data) assert code == 123 - obj = array.array('d') - obj.fromstring(data) + obj = array.array("d") + obj.frombytes(data) return obj - obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] + obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] s = msgpack.packb(obj, default=default) obj2 = msgpack.unpackb(s, ext_hook=ext_hook) assert obj == obj2 -import sys -if sys.version > '3': - long = int def test_overriding_hooks(): def default(obj): - if isinstance(obj, long): + if isinstance(obj, int): return {"__type__": "long", "__data__": str(obj)} else: return obj - obj = {"testval": long(1823746192837461928374619)} + obj = {"testval": 1823746192837461928374619} refobj = {"testval": default(obj["testval"])} refout = msgpack.packb(refobj) assert isinstance(refout, (str, bytes)) diff --git a/test/test_format.py b/test/test_format.py index 5fec0c3a..c06c87dc 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,70 +1,88 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import unpackb -def check(src, should, use_list=0): - assert unpackb(src, use_list=use_list) == should + +def check(src, should, use_list=0, raw=True): + assert unpackb(src, use_list=use_list, raw=raw, strict_map_key=False) == should + def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", - (None, False, True,)) + check(b"\x93\xc0\xc2\xc3", (None, False, True)) + def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", - ((0,64,127,), (-32,-16,-1,),) - ) + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1))) + def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", - ((),((None,),),), - ) + check(b"\x92\x90\x91\x91\xc0", ((), ((None,),))) + def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", - (b"", b"a", b"bc", b"def",), - ) + check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def")) + def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True:{None:{}}}, - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) + def testUnsignedInt(): check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), - ) + b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" + b"\xce\xff\xff\xff\xff", + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295), + ) + def testSignedInt(): - check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) + check( + b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" + b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" + b"\xd2\xff\xff\xff\xff", + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1), + ) + def testRaw(): - check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + (b"", b"a", b"ab", b"", b"a", b"ab"), + ) + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab")) + ("", "a", "ab", "", "a", "ab"), + raw=False, + ) + def testArray(): - check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" + check( + b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" b"\xc2\xc3", - ((), (None,), (False,True), (), (None,), (False,True)) - ) + ((), (None,), (False, True), (), (None,), (False, True)), + ) + def testMap(): check( b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ({}, {None: False}, {True: False, None: False}, {}, - {None: False}, {True: False, None: False})) + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", + ( + {}, + {None: False}, + {True: False, None: False}, + {}, + {None: False}, + {True: False, None: False}, + ), + ) diff --git a/test/test_limits.py b/test/test_limits.py index 74e48c19..9b92b4d9 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,42 +1,46 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals import pytest from msgpack import ( - packb, unpackb, Packer, Unpacker, ExtType, - PackOverflowError, PackValueError, UnpackValueError, + ExtType, + Packer, + PackOverflowError, + PackValueError, + Unpacker, + UnpackValueError, + packb, + unpackb, ) def test_integer(): - x = -(2 ** 63) + x = -(2**63) assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): - packb(x-1) + packb(x - 1) - x = 2 ** 64 - 1 + x = 2**64 - 1 assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): - packb(x+1) + packb(x + 1) def test_array_header(): packer = Packer() - packer.pack_array_header(2**32-1) + packer.pack_array_header(2**32 - 1) with pytest.raises(PackValueError): packer.pack_array_header(2**32) def test_map_header(): packer = Packer() - packer.pack_map_header(2**32-1) + packer.pack_map_header(2**32 - 1) with pytest.raises(PackValueError): packer.pack_array_header(2**32) def test_max_str_len(): - d = 'x' * 3 + d = "x" * 3 packed = packb(d) unpacker = Unpacker(max_str_len=3, raw=False) @@ -50,7 +54,7 @@ def test_max_str_len(): def test_max_bin_len(): - d = b'x' * 3 + d = b"x" * 3 packed = packb(d, use_bin_type=True) unpacker = Unpacker(max_bin_len=3) @@ -64,7 +68,7 @@ def test_max_bin_len(): def test_max_array_len(): - d = [1,2,3] + d = [1, 2, 3] packed = packb(d) unpacker = Unpacker(max_array_len=3) @@ -81,11 +85,11 @@ def test_max_map_len(): d = {1: 2, 3: 4, 5: 6} packed = packb(d) - unpacker = Unpacker(max_map_len=3) + unpacker = Unpacker(max_map_len=3, strict_map_key=False) unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_map_len=2) + unpacker = Unpacker(max_map_len=2, strict_map_key=False) with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() @@ -105,11 +109,10 @@ def test_max_ext_len(): unpacker.unpack() - # PyPy fails following tests because of constant folding? # https://bugs.pypy.org/issue1721 -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_binary(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_binary(): # x = b'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # del x @@ -118,8 +121,8 @@ def test_max_ext_len(): # packb(x) # # -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_string(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_string(): # x = 'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # x += 'y' @@ -127,10 +130,36 @@ def test_max_ext_len(): # packb(x) # # -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_array(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_array(): # x = [0] * (2**32 - 1) # assert unpackb(packb(x)) == x # x.append(0) # with pytest.raises(ValueError): # packb(x) + + +# auto max len + + +def test_auto_max_array_len(): + packed = b"\xde\x00\x06zz" + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=5, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() + + +def test_auto_max_map_len(): + # len(packed) == 6 -> max_map_len == 3 + packed = b"\xde\x00\x04zzz" + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=6, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() diff --git a/test/test_memoryview.py b/test/test_memoryview.py index f6d74edf..0a2a6f53 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,47 +1,26 @@ #!/usr/bin/env python -# coding: utf-8 from array import array -from msgpack import packb, unpackb -import sys - - -# For Python < 3: -# - array type only supports old buffer interface -# - array.frombytes is not available, must use deprecated array.fromstring -if sys.version_info[0] < 3: - def make_memoryview(obj): - return memoryview(buffer(obj)) - - def make_array(f, data): - a = array(f) - a.fromstring(data) - return a - def get_data(a): - return a.tostring() -else: - make_memoryview = memoryview +from msgpack import packb, unpackb - def make_array(f, data): - a = array(f) - a.frombytes(data) - return a - def get_data(a): - return a.tobytes() +def make_array(f, data): + a = array(f) + a.frombytes(data) + return a def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = get_data(original_array) - view = make_memoryview(original_array) + original_data = original_array.tobytes() + view = memoryview(original_array) # pack, unpack, and reconstruct array packed = packb(view, use_bin_type=use_bin_type) - unpacked = unpackb(packed) + unpacked = unpackb(packed, raw=(not use_bin_type)) reconstructed_array = make_array(format, unpacked) # check that we got the right amount of data @@ -49,64 +28,72 @@ def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # check packed header assert packed[:1] == expected_header # check packed length prefix, if any - assert packed[1:1+len(expected_prefix)] == expected_prefix + assert packed[1 : 1 + len(expected_prefix)] == expected_prefix # check packed data - assert packed[1+len(expected_prefix):] == original_data + assert packed[1 + len(expected_prefix) :] == original_data # check array unpacked correctly assert original_array == reconstructed_array def test_fixstr_from_byte(): - _runtest('B', 1, b'\xa1', b'', False) - _runtest('B', 31, b'\xbf', b'', False) + _runtest("B", 1, b"\xa1", b"", False) + _runtest("B", 31, b"\xbf", b"", False) def test_fixstr_from_float(): - _runtest('f', 4, b'\xa4', b'', False) - _runtest('f', 28, b'\xbc', b'', False) + _runtest("f", 4, b"\xa4", b"", False) + _runtest("f", 28, b"\xbc", b"", False) def test_str16_from_byte(): - _runtest('B', 2**8, b'\xda', b'\x01\x00', False) - _runtest('B', 2**16-1, b'\xda', b'\xff\xff', False) + _runtest("B", 2**8, b"\xda", b"\x01\x00", False) + _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) def test_str16_from_float(): - _runtest('f', 2**8, b'\xda', b'\x01\x00', False) - _runtest('f', 2**16-4, b'\xda', b'\xff\xfc', False) + _runtest("f", 2**8, b"\xda", b"\x01\x00", False) + _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) def test_str32_from_byte(): - _runtest('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_str32_from_float(): - _runtest('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_bin8_from_byte(): - _runtest('B', 1, b'\xc4', b'\x01', True) - _runtest('B', 2**8-1, b'\xc4', b'\xff', True) + _runtest("B", 1, b"\xc4", b"\x01", True) + _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) def test_bin8_from_float(): - _runtest('f', 4, b'\xc4', b'\x04', True) - _runtest('f', 2**8-4, b'\xc4', b'\xfc', True) + _runtest("f", 4, b"\xc4", b"\x04", True) + _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) def test_bin16_from_byte(): - _runtest('B', 2**8, b'\xc5', b'\x01\x00', True) - _runtest('B', 2**16-1, b'\xc5', b'\xff\xff', True) + _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) def test_bin16_from_float(): - _runtest('f', 2**8, b'\xc5', b'\x01\x00', True) - _runtest('f', 2**16-4, b'\xc5', b'\xff\xfc', True) + _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) def test_bin32_from_byte(): - _runtest('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) def test_bin32_from_float(): - _runtest('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_multidim_memoryview(): + # See https://github.com/msgpack/msgpack-python/issues/526 + view = memoryview(b"\00" * 6) + data = view.cast(view.format, (3, 2)) + packed = packb(data) + assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_newspec.py b/test/test_newspec.py index ab05029d..9e2f9be5 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,88 +1,90 @@ -# coding: utf-8 - -from msgpack import packb, unpackb, ExtType +from msgpack import ExtType, packb, unpackb def test_str8(): - header = b'\xd9' - data = b'x' * 32 + header = b"\xd9" + data = b"x" * 32 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\x20' + assert b[0:2] == header + b"\x20" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() - data = b'x' * 255 + data = b"x" * 255 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\xff' + assert b[0:2] == header + b"\xff" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() def test_bin8(): - header = b'\xc4' - data = b'' + header = b"\xc4" + data = b"" b = packb(data, use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\x00' + assert b[0:2] == header + b"\x00" assert b[2:] == data assert unpackb(b) == data - data = b'x' * 255 + data = b"x" * 255 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\xff' + assert b[0:2] == header + b"\xff" assert b[2:] == data assert unpackb(b) == data def test_bin16(): - header = b'\xc5' - data = b'x' * 256 + header = b"\xc5" + data = b"x" * 256 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b'\x01\x00' + assert b[1:3] == b"\x01\x00" assert b[3:] == data assert unpackb(b) == data - data = b'x' * 65535 + data = b"x" * 65535 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b'\xff\xff' + assert b[1:3] == b"\xff\xff" assert b[3:] == data assert unpackb(b) == data def test_bin32(): - header = b'\xc6' - data = b'x' * 65536 + header = b"\xc6" + data = b"x" * 65536 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 5 assert b[0:1] == header - assert b[1:5] == b'\x00\x01\x00\x00' + assert b[1:5] == b"\x00\x01\x00\x00" assert b[5:] == data assert unpackb(b) == data + def test_ext(): def check(ext, packed): assert packb(ext) == packed assert unpackb(packed) == ext - check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 - check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 - check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 - check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 - check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + + check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1 + check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2 + check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4 + check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8 + check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16 # ext 8 - check(ExtType(0x42, b''), b'\xc7\x00\x42') - check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + check(ExtType(0x42, b""), b"\xc7\x00\x42") + check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255) # ext 16 - check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) - check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256) + check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF) # ext 32 - check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000) # needs large memory - #check(ExtType(0x42, b'Z'*0xffffffff), + # check(ExtType(0x42, b'Z'*0xffffffff), # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/test/test_obj.py b/test/test_obj.py index 390c1b62..23be06d5 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,67 +1,82 @@ #!/usr/bin/env python -# coding: utf-8 from pytest import raises + from msgpack import packb, unpackb + def _decode_complex(obj): - if b'__complex__' in obj: - return complex(obj[b'real'], obj[b'imag']) + if b"__complex__" in obj: + return complex(obj[b"real"], obj[b"imag"]) return obj + def _encode_complex(obj): if isinstance(obj, complex): - return {b'__complex__': True, b'real': 1, b'imag': 2} + return {b"__complex__": True, b"real": 1, b"imag": 2} return obj + def test_encode_hook(): - packed = packb([3, 1+2j], default=_encode_complex) + packed = packb([3, 1 + 2j], default=_encode_complex) unpacked = unpackb(packed, use_list=1) - assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2} + assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2} + def test_decode_hook(): - packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) + packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}]) unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) - assert unpacked[1] == 1+2j + assert unpacked[1] == 1 + 2j + def test_decode_pairs_hook(): packed = packb([3, {1: 2, 3: 4}]) prod_sum = 1 * 2 + 3 * 4 - unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1) + unpacked = unpackb( + packed, + object_pairs_hook=lambda lst: sum(k * v for k, v in lst), + use_list=1, + strict_map_key=False, + ) assert unpacked[1] == prod_sum + def test_only_one_obj_hook(): with raises(TypeError): - unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x) + def test_bad_hook(): with raises(TypeError): - packed = packb([3, 1+2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) + packed = packb([3, 1 + 2j], default=lambda o: o) + unpackb(packed, use_list=1) + def _arr_to_str(arr): - return ''.join(str(c) for c in arr) + return "".join(str(c) for c in arr) + def test_array_hook(): - packed = packb([1,2,3]) + packed = packb([1, 2, 3]) unpacked = unpackb(packed, list_hook=_arr_to_str, use_list=1) - assert unpacked == '123' + assert unpacked == "123" class DecodeError(Exception): pass + def bad_complex_decoder(o): raise DecodeError("Ooops!") def test_an_exception_in_objecthook1(): with raises(DecodeError): - packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) - unpackb(packed, object_hook=bad_complex_decoder) + packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) + unpackb(packed, object_hook=bad_complex_decoder, strict_map_key=False) def test_an_exception_in_objecthook2(): with raises(DecodeError): - packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) - unpackb(packed, list_hook=bad_complex_decoder, use_list=1) + packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) + unpackb(packed, list_hook=bad_complex_decoder, use_list=1, strict_map_key=False) diff --git a/test/test_pack.py b/test/test_pack.py index b447f9c3..374d1549 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,33 +1,58 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals import struct -from pytest import raises, xfail - -from msgpack import packb, unpackb, Unpacker, Packer - from collections import OrderedDict from io import BytesIO +import pytest + +from msgpack import Packer, Unpacker, packb, unpackb + + def check(data, use_list=False): - re = unpackb(packb(data), use_list=use_list) + re = unpackb(packb(data), use_list=use_list, strict_map_key=False) assert re == data + def testPack(): test_data = [ - 0, 1, 127, 128, 255, 256, 65535, 65536, 4294967295, 4294967296, - -1, -32, -33, -128, -129, -32768, -32769, -4294967296, -4294967297, - 1.0, - b"", b"a", b"a"*31, b"a"*32, - None, True, False, - (), ((),), ((), None,), + 0, + 1, + 127, + 128, + 255, + 256, + 65535, + 65536, + 4294967295, + 4294967296, + -1, + -32, + -33, + -128, + -129, + -32768, + -32769, + -4294967296, + -4294967297, + 1.0, + b"", + b"a", + b"a" * 31, + b"a" * 32, + None, + True, + False, + (), + ((),), + ((), None), {None: 0}, - (1<<23), - ] + (1 << 23), + ] for td in test_data: check(td) + def testPackUnicode(): test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: @@ -38,57 +63,48 @@ def testPackUnicode(): re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() assert re == td -def testPackUTF32(): # deprecated - try: - test_data = [ - "", - "abcd", - ["defgh"], - "Русский текст", - ] - for td in test_data: - re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') - assert re == td - except LookupError as e: - xfail(e) def testPackBytes(): - test_data = [ - b"", b"abcd", (b"defgh",), - ] + test_data = [b"", b"abcd", (b"defgh",)] for td in test_data: check(td) + def testPackByteArrays(): - test_data = [ - bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),), - ] + test_data = [bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),)] for td in test_data: check(td) -def testIgnoreUnicodeErrors(): # deprecated - re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) + +def testIgnoreUnicodeErrors(): + re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") assert re == "abcdef" + def testStrictUnicodeUnpack(): - with raises(UnicodeDecodeError): - unpackb(packb(b'abc\xeddef'), raw=False, use_list=1) + packed = packb(b"abc\xeddef", use_bin_type=False) + with pytest.raises(UnicodeDecodeError): + unpackb(packed, raw=False, use_list=1) -def testStrictUnicodePack(): # deprecated - with raises(UnicodeEncodeError): - packb("abc\xeddef", encoding='ascii', unicode_errors='strict') -def testIgnoreErrorsPack(): # deprecated - re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1) +def testIgnoreErrorsPack(): + re = unpackb( + packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), + raw=False, + use_list=1, + ) assert re == "abcdef" + def testDecodeBinary(): - re = unpackb(packb(b"abc"), encoding=None, use_list=1) + re = unpackb(packb(b"abc"), use_list=1) assert re == b"abc" + def testPackFloat(): - assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack(str('>f'), 1.0) - assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack(str('>d'), 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) + def testArraySize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -103,6 +119,7 @@ def testArraySize(sizes=[0, 5, 50, 1000]): for size in sizes: assert unpacker.unpack() == list(range(size)) + def test_manualreset(sizes=[0, 5, 50, 1000]): packer = Packer(autoreset=False) for size in sizes: @@ -116,7 +133,8 @@ def test_manualreset(sizes=[0, 5, 50, 1000]): assert unpacker.unpack() == list(range(size)) packer.reset() - assert packer.bytes() == b'' + assert packer.bytes() == b"" + def testMapSize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -124,27 +142,40 @@ def testMapSize(sizes=[0, 5, 50, 1000]): for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): - bio.write(packer.pack(i)) # key - bio.write(packer.pack(i * 2)) # value + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value bio.seek(0) - unpacker = Unpacker(bio) + unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: - assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + assert unpacker.unpack() == {i: i * 2 for i in range(size)} def test_odict(): - seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] + seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)] od = OrderedDict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) + def pair_hook(seq): return list(seq) + assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq def test_pairlist(): - pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')] + pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] packer = Packer() packed = packer.pack_map_pairs(pairlist) - unpacked = unpackb(packed, object_pairs_hook=list) + unpacked = unpackb(packed, object_pairs_hook=list, strict_map_key=False) assert pairlist == unpacked + + +def test_get_buffer(): + packer = Packer(autoreset=0, use_bin_type=True) + packer.pack([1, 2]) + strm = BytesIO() + strm.write(packer.getbuffer()) + written = strm.getvalue() + + expected = packb([1, 2], use_bin_type=True) + assert written == expected diff --git a/test/test_read_size.py b/test/test_read_size.py index 4e6c2b93..0f6c1b50 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,66 +1,72 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from msgpack import packb, Unpacker, OutOfData + +from msgpack import OutOfData, Unpacker, packb + UnexpectedTypeException = ValueError + def test_read_array_header(): unpacker = Unpacker() - unpacker.feed(packb(['a', 'b', 'c'])) + unpacker.feed(packb(["a", "b", "c"])) assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == b'a' - assert unpacker.unpack() == b'b' - assert unpacker.unpack() == b'c' + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "b" + assert unpacker.unpack() == "c" try: unpacker.unpack() - assert 0, 'should raise exception' + assert 0, "should raise exception" except OutOfData: - assert 1, 'okay' + assert 1, "okay" def test_read_map_header(): unpacker = Unpacker() - unpacker.feed(packb({'a': 'A'})) + unpacker.feed(packb({"a": "A"})) assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == B'a' - assert unpacker.unpack() == B'A' + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "A" try: unpacker.unpack() - assert 0, 'should raise exception' + assert 0, "should raise exception" except OutOfData: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_array(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_array_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_map(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_map_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_correct_type_nested_array(): unpacker = Unpacker() - unpacker.feed(packb({'a': ['b', 'c', 'd']})) + unpacker.feed(packb({"a": ["b", "c", "d"]})) try: unpacker.read_array_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_nested_map(): unpacker = Unpacker() - unpacker.feed(packb([{'a': 'b'}])) + unpacker.feed(packb([{"a": "b"}])) try: unpacker.read_map_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' - + assert 1, "okay" diff --git a/test/test_seq.py b/test/test_seq.py index fed9ff42..8dee4620 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,14 +1,14 @@ -#!/usr/bin/env python -# coding: utf-8 - +# ruff: noqa: E501 +# ignore line length limit for long comments import io -import msgpack +import msgpack binarydata = bytes(bytearray(range(256))) + def gen_binary_data(idx): - return binarydata[:idx % 300] + return binarydata[: idx % 300] def test_exceeding_unpacker_read_size(): @@ -18,10 +18,10 @@ def test_exceeding_unpacker_read_size(): NUMBER_OF_STRINGS = 6 read_size = 16 - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): for idx in range(NUMBER_OF_STRINGS): data = gen_binary_data(idx) @@ -34,7 +34,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert type(o) == bytes + assert isinstance(o, bytes) assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 59718f56..0f895d7d 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,117 +1,147 @@ #!/usr/bin/env python -# coding: utf-8 - import io -from msgpack import Unpacker, BufferFull -from msgpack import pack -from msgpack.exceptions import OutOfData + from pytest import raises +from msgpack import BufferFull, Unpacker, pack, packb +from msgpack.exceptions import OutOfData + def test_partialdata(): unpacker = Unpacker() - unpacker.feed(b'\xa5') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'h') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'a') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'l') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'l') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'o') - assert next(iter(unpacker)) == b'hallo' + unpacker.feed(b"\xa5") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"h") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"a") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"o") + assert next(iter(unpacker)) == "hallo" + def test_foobar(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'b') - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"b") + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") with raises(OutOfData): unpacker.unpack() - unpacker.feed(b'foo') - unpacker.feed(b'bar') + unpacker.feed(b"foo") + unpacker.feed(b"bar") k = 0 - for o, e in zip(unpacker, 'foobarbaz'): + for o, e in zip(unpacker, "foobarbaz"): assert o == ord(e) k += 1 - assert k == len(b'foobar') + assert k == len(b"foobar") + def test_foobar_skip(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") unpacker.skip() - assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b"o") unpacker.skip() - assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b"a") unpacker.skip() with raises(OutOfData): unpacker.unpack() + def test_maxbuffersize(): with raises(ValueError): Unpacker(read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b'fo') + unpacker.feed(b"fo") + with raises(BufferFull): + unpacker.feed(b"ob") + unpacker.feed(b"o") + assert ord("f") == next(unpacker) + unpacker.feed(b"b") + assert ord("o") == next(unpacker) + assert ord("o") == next(unpacker) + assert ord("b") == next(unpacker) + + +def test_maxbuffersize_file(): + buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) + unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) + assert unpacker.unpack() == b"a" * 10 + # assert unpacker.unpack() == [b"a" * 20]*2 with raises(BufferFull): - unpacker.feed(b'ob') - unpacker.feed(b'o') - assert ord('f') == next(unpacker) - unpacker.feed(b'b') - assert ord('o') == next(unpacker) - assert ord('o') == next(unpacker) - assert ord('b') == next(unpacker) + print(unpacker.unpack()) def test_readbytes(): unpacker = Unpacker(read_size=3) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') - assert unpacker.read_bytes(3) == b'oob' - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") # Test buffer refill - unpacker = Unpacker(io.BytesIO(b'foobar'), read_size=3) - assert unpacker.unpack() == ord(b'f') - assert unpacker.read_bytes(3) == b'oob' - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker = Unpacker(io.BytesIO(b"foobar"), read_size=3) + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") + + # Issue 352 + u = Unpacker() + u.feed(b"x") + assert bytes(u.read_bytes(1)) == b"x" + with raises(StopIteration): + next(u) + u.feed(b"\1") + assert next(u) == 1 + def test_issue124(): unpacker = Unpacker() - unpacker.feed(b'\xa1?\xa1!') - assert tuple(unpacker) == (b'?', b'!') + unpacker.feed(b"\xa1?\xa1!") + assert tuple(unpacker) == ("?", "!") assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b'?',) + assert tuple(unpacker) == ("?",) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == (b'!',) + assert tuple(unpacker) == ("!",) assert tuple(unpacker) == () def test_unpack_tell(): stream = io.BytesIO() - messages = [2**i-1 for i in range(65)] + messages = [2**i - 1 for i in range(65)] messages += [-(2**i) for i in range(1, 64)] - messages += [b'hello', b'hello'*1000, list(range(20)), - {i: bytes(i)*i for i in range(10)}, - {i: bytes(i)*i for i in range(32)}] + messages += [ + b"hello", + b"hello" * 1000, + list(range(20)), + {i: bytes(i) * i for i in range(10)}, + {i: bytes(i) * i for i in range(32)}, + ] offsets = [] for m in messages: pack(m, stream) offsets.append(stream.tell()) stream.seek(0) - unpacker = Unpacker(stream) + unpacker = Unpacker(stream, strict_map_key=False) for m, o in zip(messages, offsets): m2 = next(unpacker) assert m == m2 diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 87e7c1ce..72776a2c 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,34 +1,32 @@ -# coding: utf-8 - from collections import namedtuple -from msgpack import packb, unpackb, ExtType + +from msgpack import ExtType, packb, unpackb def test_namedtuple(): - T = namedtuple('T', "foo bar") + T = namedtuple("T", "foo bar") + def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError('Unsupported type %s' % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") + packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) - assert unpacked == {'foo': 1, 'bar': 42} + assert unpacked == {"foo": 1, "bar": 42} def test_tuple(): - t = ('one', 2, b'three', (4, )) + t = ("one", 2, b"three", (4,)) def default(o): if isinstance(o, tuple): - return { - '__type__': 'tuple', - 'value': list(o), - } - raise TypeError('Unsupported type %s' % (type(o),)) + return {"__type__": "tuple", "value": list(o)} + raise TypeError(f"Unsupported type {type(o)}") def convert(o): - if o.get('__type__') == 'tuple': - return tuple(o['value']) + if o.get("__type__") == "tuple": + return tuple(o["value"]) return o data = packb(t, strict_types=True, use_bin_type=True, default=default) @@ -38,15 +36,14 @@ def convert(o): def test_tuple_ext(): - t = ('one', 2, b'three', (4, )) + t = ("one", 2, b"three", (4,)) MSGPACK_EXT_TYPE_TUPLE = 0 def default(o): if isinstance(o, tuple): # Convert to list and pack - payload = packb( - list(o), strict_types=True, use_bin_type=True, default=default) + payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -54,7 +51,7 @@ def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError('Unknown Ext code {}'.format(code)) + raise ValueError(f"Unknown Ext code {code}") data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index 6807508e..a911578c 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,19 +1,24 @@ #!/usr/bin/env python -# coding: utf-8 -from msgpack import packb, unpackb from collections import namedtuple +from msgpack import packb + + class MyList(list): pass + class MyDict(dict): pass + class MyTuple(tuple): pass -MyNamedTuple = namedtuple('MyNamedTuple', 'x y') + +MyNamedTuple = namedtuple("MyNamedTuple", "x y") + def test_types(): assert packb(MyDict()) == packb(dict()) diff --git a/test/test_timestamp.py b/test/test_timestamp.py new file mode 100644 index 00000000..831141a1 --- /dev/null +++ b/test/test_timestamp.py @@ -0,0 +1,171 @@ +import datetime + +import pytest + +import msgpack +from msgpack.ext import Timestamp + + +def test_timestamp(): + # timestamp32 + ts = Timestamp(2**32 - 1) + assert ts.to_bytes() == b"\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd6\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 + + # timestamp64 + ts = Timestamp(2**34 - 1, 999999999) + assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd7\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 + + # timestamp96 + ts = Timestamp(2**63 - 1, 999999999) + assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 + + # negative fractional + ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 + assert ts.seconds == -3 and ts.nanoseconds == 700000000 + assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + + +def test_unpack_timestamp(): + # timestamp 32 + assert msgpack.unpackb(b"\xd6\xff\x00\x00\x00\x00") == Timestamp(0) + + # timestamp 64 + assert msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd7\xff" + b"\xff" * 8) + + # timestamp 96 + assert msgpack.unpackb(b"\xc7\x0c\xff" + b"\x00" * 12) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x0c\xff" + b"\xff" * 12) == Timestamp(0) + + # Undefined + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd4\xff\x00") # fixext 1 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd5\xff\x00\x00") # fixext 2 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x00\xff") # ext8 (len=0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x03\xff\0\0\0") # ext8 (len=3) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x05\xff\0\0\0\0\0") # ext8 (len=5) + + +def test_timestamp_from(): + t = Timestamp(42, 14000) + assert Timestamp.from_unix(42.000014) == t + assert Timestamp.from_unix_nano(42000014000) == t + + +def test_timestamp_to(): + t = Timestamp(42, 14000) + assert t.to_unix() == 42.000014 + assert t.to_unix_nano() == 42000014000 + + +def test_timestamp_datetime(): + t = Timestamp(42, 14) + utc = datetime.timezone.utc + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) + ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) + + assert ( + Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 + ) + + ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) + ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) + assert ( + Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 + ) + + assert Timestamp.from_datetime(ts).to_datetime() == ts + + +def test_unpack_datetime(): + t = Timestamp(42, 14) + utc = datetime.timezone.utc + packed = msgpack.packb(t) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + +def test_pack_unpack_before_epoch(): + utc = datetime.timezone.utc + t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) + packed = msgpack.packb(t_in, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == t_in + + +def test_pack_datetime(): + t = Timestamp(42, 14000) + dt = t.to_datetime() + utc = datetime.timezone.utc + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + + packed = msgpack.packb(dt, datetime=True) + packed2 = msgpack.packb(t) + assert packed == packed2 + + unpacked = msgpack.unpackb(packed) + print(packed, unpacked) + assert unpacked == t + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt + + x = [] + packed = msgpack.packb(dt, datetime=False, default=x.append) + assert x + assert x[0] == dt + assert msgpack.unpackb(packed) is None + + +def test_issue451(): + # https://github.com/msgpack/msgpack-python/issues/451 + utc = datetime.timezone.utc + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + assert packed == b"\xd6\xff\xf4\x86eL" + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert dt == unpacked + + +def test_pack_datetime_without_tzinfo(): + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + with pytest.raises(ValueError, match="where tzinfo=None"): + packed = msgpack.packb(dt, datetime=True) + + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + packed = msgpack.packb(dt, datetime=True, default=lambda x: None) + assert packed == msgpack.packb(None) + + utc = datetime.timezone.utc + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index 00a10612..b17c3c53 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,11 +1,13 @@ -from io import BytesIO import sys -from msgpack import Unpacker, packb, OutOfData, ExtType -from pytest import raises, mark +from io import BytesIO + +from pytest import mark, raises + +from msgpack import ExtType, OutOfData, Unpacker, packb def test_unpack_array_header_from_file(): - f = BytesIO(packb([1,2,3,4])) + f = BytesIO(packb([1, 2, 3, 4])) unpacker = Unpacker(f) assert unpacker.read_array_header() == 4 assert unpacker.unpack() == 1 @@ -16,8 +18,10 @@ def test_unpack_array_header_from_file(): unpacker.unpack() -@mark.skipif("not hasattr(sys, 'getrefcount') == True", - reason='sys.getrefcount() is needed to pass this test') +@mark.skipif( + "not hasattr(sys, 'getrefcount') == True", + reason="sys.getrefcount() is needed to pass this test", +) def test_unpacker_hook_refcnt(): result = [] @@ -43,12 +47,9 @@ def hook(x): def test_unpacker_ext_hook(): - class MyUnpacker(Unpacker): - def __init__(self): - super(MyUnpacker, self).__init__( - ext_hook=self._hook, raw=False) + super().__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: @@ -57,15 +58,32 @@ def _hook(self, code, data): return ExtType(code, data) unpacker = MyUnpacker() - unpacker.feed(packb({'a': 1})) - assert unpacker.unpack() == {'a': 1} - unpacker.feed(packb({'a': ExtType(1, b'123')})) - assert unpacker.unpack() == {'a': 123} - unpacker.feed(packb({'a': ExtType(2, b'321')})) - assert unpacker.unpack() == {'a': ExtType(2, b'321')} - - -if __name__ == '__main__': - test_unpack_array_header_from_file() - test_unpacker_hook_refcnt() - test_unpacker_ext_hook() + unpacker.feed(packb({"a": 1})) + assert unpacker.unpack() == {"a": 1} + unpacker.feed(packb({"a": ExtType(1, b"123")})) + assert unpacker.unpack() == {"a": 123} + unpacker.feed(packb({"a": ExtType(2, b"321")})) + assert unpacker.unpack() == {"a": ExtType(2, b"321")} + + +def test_unpacker_tell(): + objects = 1, 2, "abc", "def", "ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + positions = 1, 2, 6, 10, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos in zip(objects, unpacker, positions): + assert obj == unp + assert pos == unpacker.tell() + + +def test_unpacker_tell_read_bytes(): + objects = 1, "abc", "ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + raw_data = b"\x02", b"\xa3def", b"" + lenghts = 1, 4, 999 + positions = 1, 6, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos, n, raw in zip(objects, unpacker, positions, lenghts, raw_data): + assert obj == unp + assert pos == unpacker.tell() + assert unpacker.read_bytes(n) == raw diff --git a/test/test_unpack_raw.py b/test/test_unpack_raw.py deleted file mode 100644 index 70026012..00000000 --- a/test/test_unpack_raw.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Tests for cases where the user seeks to obtain packed msgpack objects""" - -import io -from msgpack import Unpacker, packb - - -def test_write_bytes(): - unpacker = Unpacker() - unpacker.feed(b'abc') - f = io.BytesIO() - assert unpacker.unpack(f.write) == ord('a') - assert f.getvalue() == b'a' - f = io.BytesIO() - assert unpacker.skip(f.write) is None - assert f.getvalue() == b'b' - f = io.BytesIO() - assert unpacker.skip() is None - assert f.getvalue() == b'' - - -def test_write_bytes_multi_buffer(): - long_val = (5) * 100 - expected = packb(long_val) - unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3) - - f = io.BytesIO() - unpacked = unpacker.unpack(f.write) - assert unpacked == long_val - assert f.getvalue() == expected diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 68a2f53a..00000000 --- a/tox.ini +++ /dev/null @@ -1,38 +0,0 @@ -[tox] -envlist = {py27,py35,py36}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 - -[variants:pure] -setenv= - MSGPACK_PUREPYTHON=x - -[testenv] -deps= - pytest - -changedir=test -commands= - c,x86: python -c 'from msgpack import _packer, _unpacker' - c,x86: py.test - pure: py.test - -[testenv:py27-x86] -basepython=python2.7-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _packer, _unpacker' - py.test - -[testenv:py34-x86] -basepython=python3.4-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _packer, _unpacker' - py.test