diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 060c910..0000000 --- a/.appveyor.yml +++ /dev/null @@ -1,22 +0,0 @@ -image: -- Visual Studio 2017 - -stack: python 3 - -environment: - PY_DIR: C:\Python37-x64 - -clone_depth: 3 - -build: off - -init: -- cmd: set PATH=%PY_DIR%;%PY_DIR%\Scripts;%PATH% - -install: -- pip install -e .[tests] -- pip install mypy - -test_script: -- pytest --cov -# - mypy . --ignore-missing-imports diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 9cc2bb4..5a64758 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,9 +1,7 @@ --- name: Bug report -about: Create a report to help us improve -title: "[BUG] INSERT TITLE HERE" +about: Something is broken or doesn't work as expected labels: bug -assignees: '' --- @@ -20,5 +18,5 @@ Steps to reproduce the behavior: **Expected behavior** A clear and concise description of what you expected to happen. -**Sample File** +**Sample file** Please provide a sample file that shows the behavior that you have described in this report. You can use any upload site (such as wetransfer) and provide the link here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.md.license b/.github/ISSUE_TEMPLATE/bug_report.md.license new file mode 100644 index 0000000..96be622 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2021-2024 tinytag Contributors +SPDX-License-Identifier: MIT diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..294d5f1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2025 tinytag Contributors +# SPDX-License-Identifier: MIT + +blank_issues_enabled: false +contact_links: + - name: Other + url: https://github.com/tinytag/tinytag/discussions + about: Question, discussion or anything else diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index bbcbbe7..9390310 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,9 +1,7 @@ --- name: Feature request about: Suggest an idea for this project -title: '' -labels: '' -assignees: '' +labels: enhancement --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md.license b/.github/ISSUE_TEMPLATE/feature_request.md.license new file mode 100644 index 0000000..96be622 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2021-2024 tinytag Contributors +SPDX-License-Identifier: MIT diff --git a/.github/workflows/reuse.yml b/.github/workflows/reuse.yml new file mode 100644 index 0000000..ec2ee03 --- /dev/null +++ b/.github/workflows/reuse.yml @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: 2024-2025 tinytag Contributors +# SPDX-License-Identifier: MIT + +name: REUSE Compliance +on: [push, pull_request] +permissions: {} + +jobs: + check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: REUSE compliance + uses: fsfe/reuse-action@v5 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..3b4b043 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: 2022-2025 tinytag Contributors +# SPDX-License-Identifier: MIT + +name: Tests +on: [push, pull_request] +permissions: {} + +jobs: + tests: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python: [ + '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14-dev', + 'pypy-3.8', 'pypy-3.9', 'pypy-3.10', 'pypy-3.11' + ] + include: + - os: ubuntu-22.04 + python: 3.7 + - os: macos-13 + python: 3.7 + - os: windows-latest + python: 3.7 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: 'pip' + + - name: Install dependencies + run: python -m pip install build flit .[tests] + + - name: PEP 8 style checks + run: python -m pycodestyle . + + - name: Linting + run: python -m pylint --recursive=y . + + - name: Typing (mypy) + run: python -m mypy -p tinytag + + - name: Typing (pyright) + run: python -m pyright + + - name: Unit tests + run: python -m coverage run -m unittest + env: + TINYTAG_DEBUG: true + + - name: Build package + run: python -m build + + - name: Build package without isolation + run: python -m build --no-isolation + + - name: Coveralls + uses: coverallsapp/github-action@v2 + with: + flag-name: run-${{ join(matrix.*, '-') }} + file: .coverage + parallel: true + + finish: + needs: tests + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Coveralls finished + uses: coverallsapp/github-action@v2 + with: + parallel-finished: true diff --git a/.gitignore b/.gitignore index 0524d01..e2ddd48 100644 --- a/.gitignore +++ b/.gitignore @@ -1,36 +1,18 @@ -*.py[cod] +# SPDX-FileCopyrightText: 2020-2024 tinytag Contributors +# SPDX-License-Identifier: MIT -# C extensions -*.so +*.py[cod] # Packages -*.egg *.egg-info dist build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -lib -lib64 __pycache__ -# Installer logs -pip-log.txt - # Unit test / coverage reports .coverage -.tox -nosetests.xml -test-results/ .mypy_cache - -# Translations -*.mo +.pytest_cache # Mr Developer .mr.developer.cfg @@ -42,5 +24,5 @@ test-results/ .venv venv -# custom test samples -tinytag/tests/custom_samples +# Visual Studio Code +.vscode diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c6cab09..0000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Continuous Integration config -# travis-ci.org -# -# see http://about.travis-ci.org/docs/user/build-configuration/ -# - -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "pypy" - -# command to install dependencies -install: - - if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then pip install coveralls; fi - - "pip install pytest" - - "pip install pytest-cov" - -# workaround for pypy not working anymore in travis -env: - - DEBUG=1 CRYPTOGRAPHY_ALLOW_OPENSSL_102=1 - -# command to run tests -script: - pytest --cov=tinytag --cov-branch --cov-report xml:test-results/coverage.xml --junitxml test-results/junit.xml - -after_success: - - if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then coveralls; fi diff --git a/LICENSE b/LICENSE index c0162e4..9781aad 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2014-2017 Tom Wallroth +Copyright (c) 2014-2025 Tom Wallroth, Mat (mathiascode), et al. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/LICENSES/CC0-1.0.txt b/LICENSES/CC0-1.0.txt new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSES/CC0-1.0.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/LICENSES/MIT.txt @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index ddf3a98..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,8 +0,0 @@ -include *.md -include *.rst -include *.txt -include LICENSE - -global-exclude *.pyc -global-exclude .gitignore -global-exclude .DS_Store diff --git a/README.md b/README.md index 202105f..b07fd32 100644 --- a/README.md +++ b/README.md @@ -1,121 +1,574 @@ -tinytag -======= + -tinytag is a library for reading music meta data of MP3, OGG, OPUS, MP4, M4A, FLAC, WMA and Wave files with python +# tinytag -[![Build Status](https://travis-ci.org/devsnd/tinytag.png?branch=master)](https://travis-ci.org/devsnd/tinytag) -[![Build status](https://ci.appveyor.com/api/projects/status/w9y2kg97869g1edj?svg=true)](https://ci.appveyor.com/project/devsnd/tinytag) -[![Coverage Status](https://coveralls.io/repos/devsnd/tinytag/badge.png)](https://coveralls.io/r/devsnd/tinytag) +tinytag is a Python library for reading audio file metadata -Install -------- +[![Build Status](https://img.shields.io/github/actions/workflow/status/tinytag/tinytag/tests.yml +)](https://github.com/tinytag/tinytag/actions?query=workflow:%22Tests%22) +[![Coverage Status](https://img.shields.io/coverallsCoverage/github/tinytag/tinytag +)](https://coveralls.io/r/tinytag/tinytag) +[![PyPI Version](https://img.shields.io/pypi/v/tinytag +)](https://pypi.org/project/tinytag/) +[![PyPI Downloads](https://img.shields.io/pypi/dm/tinytag +)](https://pypistats.org/packages/tinytag) -```pip install tinytag``` +## Install -Features: ---------- +``` +python3 -m pip install tinytag +``` - * Read tags, length and cover images of audio files - * supported formats - * MP3 (ID3 v1, v1.1, v2.2, v2.3+) - * Wave/RIFF - * OGG - * OPUS + +## Features + + * Read tags, images and properties of audio files + * Supported formats: + * MP3 / MP2 / MP1 (ID3 v1, v1.1, v2.2, v2.3+) + * M4A (AAC / ALAC) + * WAVE / WAV + * OGG (FLAC / Opus / Speex / Vorbis) * FLAC * WMA - * MP4/M4A/M4B - * pure python, no dependencies - * supports python 2.7 and 3.4 or higher - * high test coverage - * Just a few hundred lines of code (just include it in your project!) + * AIFF / AIFF-C + * Same API for all formats + * Small, portable library + * High code coverage + * Pure Python, no dependencies + * Supports Python 3.7 or higher -tinytag only provides the minimum needed for _reading_ MP3, OGG, OPUS, MP4, M4A, FLAC, WMA and Wave meta-data. -It can determine track number, total tracks, title, artist, album, year, duration and more. +> [!IMPORTANT] +> Support for changing/writing metadata will not be added. Use another library +> such as [Mutagen](https://mutagen.readthedocs.io/) for this. + + +## Usage + +tinytag only provides the minimum needed for _reading_ metadata, and presents +it in a simple format. It can determine track number, total tracks, title, +artist, album, year, duration and more. + +```python +from tinytag import TinyTag +tag: TinyTag = TinyTag.get('/some/music.mp3') + +print(f'This track is by {tag.artist}.') +print(f'It is {tag.duration:.2f} seconds long.') +``` + +> [!WARNING] +> The `ignore_errors` parameter of `TinyTag.get()` is obsolete as of tinytag +> 2.0.0, and will be removed in the future. - from tinytag import TinyTag - tag = TinyTag.get('/some/music.mp3') - print('This track is by %s.' % tag.artist) - print('It is %f seconds long.' % tag.duration) - Alternatively you can use tinytag directly on the command line: - $ python -m tinytag --format csv /some/music.mp3 - > {"filename": "/some/music.mp3", "filesize": 30212227, "album": "Album", "albumartist": "Artist", "artist": "Artist", "audio_offset": null, "bitrate": 256, "channels": 2, "comment": null, "composer": null, "disc": "1", "disc_total": null, "duration": 10, "genre": null, "samplerate": 44100, "title": "Title", "track": "5", "track_total": null, "year": "2012"} + $ python3 -m tinytag /some/music.mp3 + { + "filename": "/some/music.mp3", + "filesize": 3243226, + "duration": 173.52, + "channels": 2, + "bitrate": 128, + "samplerate": 44100, + "artist": [ + "artist name" + ], + "album": [ + "album name" + ], + "title": [ + "track name" + ], + "track": 4, + "genre": [ + "Jazz" + ], + "year": [ + "2010" + ], + "comment": [ + "Some comment here" + ] + } + +Check `python3 -m tinytag --help` for all CLI options, for example other +output formats. + +### Supported Files + +To receive a tuple of file extensions tinytag supports, use the +`SUPPORTED_FILE_EXTENSIONS` constant: + +```python +TinyTag.SUPPORTED_FILE_EXTENSIONS +``` + +Alternatively, check if a file is supported by providing its path: + +```python +is_supported = TinyTag.is_supported('/some/music.mp3') +``` + +### Common Metadata + +tinytag provides some common attributes, which always contain a single value. +These are helpful when you need quick access to common metadata. -Check `python -m tinytag --help` for all CLI options, for example other output formats` +#### File/Audio Properties -List of possible attributes you can get with TinyTag: + tag.bitdepth # bit depth as integer (for lossless audio) + tag.bitrate # bitrate in kBits/s as float + tag.duration # audio duration in seconds as float + tag.filename # filename as string + tag.filesize # file size in bytes as integer + tag.samplerate # samples per second as integer + +> [!WARNING] +> The `tag.audio_offset` attribute is obsolete as of tinytag 2.0.0, and will +> be removed in the future. + +#### Metadata Fields tag.album # album as string tag.albumartist # album artist as string tag.artist # artist name as string - tag.audio_offset # number of bytes before audio data begins - tag.bitrate # bitrate in kBits/s tag.comment # file comment as string - tag.composer # composer as string - tag.disc # disc number - tag.disc_total # the total number of discs - tag.duration # duration of the song in seconds - tag.filesize # file size in bytes + tag.composer # composer as string + tag.disc # disc number as integer + tag.disc_total # total number of discs as integer tag.genre # genre as string - tag.samplerate # samples per second - tag.title # title of the song - tag.track # track number as string - tag.track_total # total number of tracks as string - tag.year # year or data as string - -Additionally you can also get cover images from ID3 tags: - - tag = TinyTag.get('/some/music.mp3', image=True) - image_data = tag.get_image() - -Changelog: - * 1.5.0 (2020-11-05): - - fixed data type to always return str for disc, disc_total, track, track_total #97 (thanks to kostalski) - - fixed package install being reported as UNKNOWN for some python/pip variations #90 (thanks to russpoutine) - - Added automatic detection for certain MP4 file headers - * 1.4.0 (2020-04-23): - - detecting file types based on their magic header bytes, #85 - - fixed opus duration being wrong for files with lower sample rate #81 - - implemented support for binary paths #72 - - always cast mp3 bitrates to int, so that CBR and VBR output behaves the sam - - made __str__ deterministic and use json as output format - * 1.3.0 (2020-03-09): - - added option to ignore encoding errors `ignore_errors` #73 - - Improved text decoding for many malformed files - * 1.2.2 (2019-04-13): - - Improved stability when reading corrupted mp3 files - * 1.2.1 (2019-04-13): - - fixed wav files not correctly reporting the number of channels #61 - * 1.2.0 (2019-04-13): - - using setup.cfg instead of setup.py (thanks to scivision) - - added support for calling TinyTag.get with pathlib.Path (thanks to scivision) - - added appveyor windows test CI (thanks to scivision) - - using pytest instead of nosetest (thanks to scivision) - * 1.1.0 (2019-04-13): - - added new field "composer" (Thanks to Phil Borman) - * 1.0.1 (2019-04-13): - - fixed ID3 loading for files with corrupt header (thanks to Ian Homer) - - fixed parsing of duration in wav file (thanks to Ian Homer) - * 1.0.0 (2018-12-12): - - added comment field - - added wav-riff format support - - use MP4 parser for m4b files - - added simple cli tool - - fix parsing of FLAC files with ID3 header (thanks to minus7) - - added method `TinyTag.is_supported(filename)` - * 0.19.0 (2018-02-11): - - fixed corrupted images for some mp3s (#45) - * 0.18.0 (2017-04-29): - - fixed wrong bitrate and crash when parsing xing header - * 0.17.0 (2016-10-02): - - supporting ID3v2.2 images - * 0.16.0 (2016-08-06): - - MP4 cover image support - * 0.15.2 (2016-08-06): - - fixed crash for malformed MP4 files (#34) - * 0.15.0 (2016-08-06): - - fixed decoding of UTF-16LE ID3v2 Tags, improved overall stability - * 0.14.0 (2016-06-05): - - MP4/M4A and Opus support + tag.title # title of the song as string + tag.track # track number as integer + tag.track_total # total number of tracks as integer + tag.year # year or date as string + +### Additional Metadata + +For additional values of the same field type, non-common metadata fields, or +metadata specific to certain file formats, use `other`: + + tag.other # a dictionary of additional fields + +> [!WARNING] +> The `other` dictionary has replaced the `extra` dictionary in tinytag 2.0.0. +> The latter will be removed in a future release. + +The following `other` field names are standardized in tinytag, and optionally +present when files provide such metadata: + + barcode + bpm + catalog_number + conductor + copyright + director + encoded_by + encoder_settings + initial_key + isrc + language + license + lyricist + lyrics + media + publisher + set_subtitle + url + +Additional `other` field names not documented above may be present, but are +format-specific and may change or disappear in future tinytag releases. If +tinytag does not expose metadata you need, or you wish to standardize more +field names, open a feature request on GitHub for discussion. + +`other` values are always provided as strings, and are not guaranteed to be +valid. Should e.g. the `bpm` value in the file contain non-numeric characters, +tinytag will provide the string as-is. It is your responsibility to handle +possible exceptions, e.g. when converting the value to an integer. + +Multiple values of the same field type are provided if a file contains them. +Values are always provided as a list, even when only a single value exists. + +Example: + +```python +from tinytag import OtherFields, TinyTag + +tag: TinyTag = TinyTag.get('/some/music.mp3') +other_fields: OtherFields = tag.other +catalog_numbers: list[str] | None = other_fields.get('catalog_number') + +if catalog_numbers: + catalog_number: str = catalog_numbers[0] + print(catalog_number) + +print(catalog_numbers) +``` + +Output: + + > 10 + > ['10'] + +When a file contains multiple values for a [common metadata field](#common-metadata) +(e.g. `artist`), the primary value is accessed through the common attribute +(`tag.artist`), and any additional values through the `other` dictionary +(`tag.other['artist']`). + +Example: + +```python +from tinytag import TinyTag + +tag: TinyTag = TinyTag.get('/some/music.mp3') +artist: str | None = tag.artist +additional_artists: list[str] | None = tag.other.get('artist') + +print(artist) +print(additional_artists) +``` + +Output: + + > main artist + > ['another artist', 'yet another artist'] + +### All Metadata + +If you need to receive all available metadata as key-value pairs in a flat +dictionary, use the `as_dict()` method. This combines the common attributes +and `other` dictionary, which can be more convenient in some cases. + + from tinytag import TinyTag + + tag: TinyTag = TinyTag.get('/some/music.mp3') + metadata: dict = tag.as_dict() + +### Images + +Additionally, you can also read embedded images by passing a `image=True` +keyword argument to `TinyTag.get()`. + +If you need to receive an image of a specific kind, including its description, +use `images`: + + tag.images # available embedded images + +The following common image attributes are available, providing the first +located image of each kind: + + tag.images.front_cover # front cover as 'Image' object + tag.images.back_cover # back cover as 'Image' object + tag.images.media # media (e.g. CD label) as 'Image' object + +When present, any additional images are available in an `images.other` +dictionary, using the following standardized key names: + + generic + icon + alt_icon + front_cover + back_cover + media + leaflet + lead_artist + artist + conductor + band + composer + lyricist + recording_location + during_recording + during_performance + screen_capture + bright_colored_fish + illustration + band_logo + publisher_logo + unknown + +Provided values are always lists containing at least one `Image` object. + +The `Image` object provides the following attributes: + + data # image data as bytes + name # image name/kind as string + mime_type # image MIME type as string + description # image description as string + +To receive any available image, prioritizing the front cover, use `images.any`: + +```python +from tinytag import Image, TinyTag + +tag: TinyTag = TinyTag.get('/some/music.ogg', image=True) +image: Image | None = tag.images.any + +if image is not None: + data: bytes = image.data + name: str = image.name + mime_type: str = image.mime_type + description: str = image.description + + print(len(data)) + print(name) + print(mime_type) + print(description) +``` + +Output: + + > 74452 + > front_cover + > image/jpeg + > some image description + +> [!WARNING] +> `tag.images.any` has replaced `tag.get_image()` in tinytag 2.0.0. +> `tag.get_image()` will be removed in the future. + +To receive a common image, e.g. `front_cover`: + +```python +from tinytag import Image, Images, TinyTag + +tag: TinyTag = TinyTag.get('/some/music.ogg', image=True) +images: Images = tag.images +cover_image: Image = images.front_cover + +if cover_image is not None: + data: bytes = cover_image.data + description: str = cover_image.description +``` + +To receive an additional image, e.g. `bright_colored_fish`: + +```python +from tinytag import Image, OtherImages, TinyTag + +tag: TinyTag = TinyTag.get('/some/music.ogg', image=True) +other_images: OtherImages = tag.images.other +fish_images: list[Image] | None = other_images.get('bright_colored_fish') + +if fish_images: + image = fish_images[0] # Use first image + data = image.data + description = image.description +``` + +### Encoding + +To open files using a specific encoding, you can use the `encoding` parameter. +This parameter is however only used for formats where the encoding is not +explicitly specified. + +```python +TinyTag.get('a_file_with_gbk_encoding.mp3', encoding='gbk') +``` + +### File-like Objects + +To use a file-like object (e.g. BytesIO) instead of a file path, pass a +`file_obj` keyword argument: + +```python +TinyTag.get(file_obj=your_file_obj) +``` + +### Exceptions + + TinyTagException # Base class for exceptions + ParseError # Parsing an audio file failed + UnsupportedFormatError # File format is not supported + + +## Changelog + +### 2.1.1 (2025-04-23) + +- ID3: Stop removing 'b' character from strings +- Port unit tests from pytest to built-in unittest module + +### 2.1.0 (2025-02-23) + +- Opus: Calculate audio bitrate +- Opus: Take pre-skip into account when calculating the duration + +### 2.0.0 (2024-11-03) + +- **BREAKING:** Store 'disc', 'disc_total', 'track' and 'track_total' values as int instead of str +- **BREAKING:** 'as_dict()' method (previously undocumented) returns tag field values in list form +- **BREAKING:** TinyTagException no longer inherits LookupError +- **BREAKING:** TinyTag subclasses are now private +- **BREAKING:** Remove function to use custom audio file samples in tests +- **BREAKING:** Remove support for Python 2 +- **DEPRECATION:** Mark 'ignore_errors' parameter for TinyTag.get() as obsolete +- **DEPRECATION:** Mark 'audio_offset' attribute as obsolete +- **DEPRECATION:** Deprecate 'extra' dict in favor of 'other' dict with values in list form +- **DEPRECATION:** Deprecate 'get_image()' method in favor of 'images.any' property +- Add type hints to codebase +- Provide access to custom metadata fields through the 'other' dict +- Provide access to all available images +- Add more standard 'other' fields +- Use Flit as Python build backend instead of Setuptools +- ID3: Fix invalid sample rate/duration in some cases +- ID3: Fix reading of UTF-16 strings without BOM +- FLAC: Apply ID3 tags after Vorbis +- OGG/WMA: Set missing 'channels' field +- WMA: Set missing 'other.copyright' field +- WMA: Raise exception if file is invalid +- Various optimizations + +### 1.10.1 (2023-10-26) + +- Update 'extra' fields with data from other tags #188 +- ID3: Add missing 'extra.copyright' field + +### 1.10.0 (2023-10-18) + +- Add support for OGG FLAC format #182 +- Add support for OGG Speex format #181 +- Wave: support image loading +- Add support for file-like objects (BytesIO) #178 +- Add list of supported file extensions #177 +- Fix deprecations related to setuptools #176 +- Fix pathlib support in TinyTag.is_supported() +- Only remove zero bytes at the end of strings +- Stricter conditions in while loops +- OGG: Add stricter magic byte matching for OGG files +- Compatibility with Python 3.4 and 3.5 is no longer tested + +### 1.9.0 (2023-04-23) + +- Add bitdepth attribute for lossless audio #157 +- Add recognition of Audible formats #163 (thanks to snowskeleton) +- Add .m4v to list of supported file extensions #142 +- Aiff: Implement replacement for Python's aifc module #164 +- ID3: Only check for language in COMM and USLT frames #147 +- ID3: Read the correct number of bytes from Xing header #154 +- ID3: Add support for ID3v2.4 TDRC frame #156 (thanks to Uninen) +- M4A: Add description fields #168 (thanks to snowskeleton) +- RIFF: Handle tags containing extra zero-byte #141 +- Vorbis: Parse OGG cover art #144 (thanks to Pseurae) +- Vorbis: Support standard disctotal/tracktotal comments #171 +- Wave: Add proper support for padded IFF chunks + +### 1.8.1 (2022-03-12) [still mathiascode-edition] + +- MP3 ID3: Set correct file position if tag reading is disabled #119 (thanks to mathiascode) +- MP3: Fix incorrect calculation of duration for VBR encoded MP3s #128 (thanks to mathiascode) + +### 1.8.0 (2022-03-05) [mathiascode-edition] + +- Add support for ALAC audio files #130 (thanks to mathiascode) +- AIFF: Fixed bitrate calculation for certain files #129 (thanks to mathiascode) +- MP3: Do not round MP3 bitrates #131 (thanks to mathiascode) +- MP3 ID3: Support any language in COMM and USLT frames #135 (thanks to mathiascode) +- Performance: Don't use regex when parsing genre #136 (thanks to mathiascode) +- Disable tag parsing for all formats when requested #137 (thanks to mathiascode) +- M4A: Fix invalid bitrates in certain files #132 (thanks to mathiascode) +- WAV: Fix metadata parsing for certain files #133 (thanks to mathiascode) + +### 1.7.0. (2021-12-14) + +- fixed rare occasion of ID3v2 tags missing their first character, #106 +- allow overriding the default encoding of ID3 tags (e.g. `TinyTag.get(..., encoding='gbk'))`) +- fixed calculation of bitrate for very short mp3 files, #99 +- utf-8 support for AIFF files, #123 +- fixed image parsing for id3v2 with images containing utf-16LE descriptions, #117 +- fixed ID3v1 tags overwriting ID3v2 tags, #121 +- Set correct file position if tag reading is disabled for ID3 (thanks to mathiascode) + +### 1.6.0 (2021-08-28) [aw-edition] + +- fixed handling of non-latin encoding types for images (thanks to aw-was-here) +- added support for ISRC data, available in `extra['isrc']` field (thanks to aw-was-here) +- added support for AIFF/AIFF-C (thanks to aw-was-here) +- fixed import deprecation warnings (thanks to idotobi) +- fixed exception for TinyTag misuse being different in different python versions (thanks to idotobi) +- added support for ID3 initial key tonality hint, available in `extra['initial_key']` +- added support for ID3 unsynchronized lyrics, available in `extra['lyrics']` +- added `extra` field, which may contain additional metadata not available in all file formats + +### 1.5.0 (2020-11-05) + +- fixed data type to always return str for disc, disc_total, track, track_total #97 (thanks to kostalski) +- fixed package install being reported as UNKNOWN for some python/pip variations #90 (thanks to russpoutine) +- Added automatic detection for certain MP4 file headers + +### 1.4.0 (2020-04-23) + +- detecting file types based on their magic header bytes, #85 +- fixed opus duration being wrong for files with lower sample rate #81 +- implemented support for binary paths #72 +- always cast mp3 bitrates to int, so that CBR and VBR output behaves the sam +- made __str__ deterministic and use json as output format + +### 1.3.0 (2020-03-09) + +- added option to ignore encoding errors `ignore_errors` #73 +- Improved text decoding for many malformed files + +### 1.2.2 (2019-04-13) + +- Improved stability when reading corrupted mp3 files + +### 1.2.1 (2019-04-13) + +- fixed wav files not correctly reporting the number of channels #61 + +### 1.2.0 (2019-04-13) + +- using setup.cfg instead of setup.py (thanks to scivision) +- added support for calling TinyTag.get with pathlib.Path (thanks to scivision) +- added appveyor windows test CI (thanks to scivision) +- using pytest instead of nosetest (thanks to scivision) + +### 1.1.0 (2019-04-13) + +- added new field "composer" (Thanks to Phil Borman) + +### 1.0.1 (2019-04-13) + +- fixed ID3 loading for files with corrupt header (thanks to Ian Homer) +- fixed parsing of duration in wav file (thanks to Ian Homer) + +### 1.0.0 (2018-12-12) + +- added comment field +- added wav-riff format support +- use MP4 parser for m4b files +- added simple cli tool +- fix parsing of FLAC files with ID3 header (thanks to minus7) +- added method `TinyTag.is_supported(filename)` + +### 0.19.0 (2018-02-11) + +- fixed corrupted images for some mp3s (#45) + +### 0.18.0 (2017-04-29) + +- fixed wrong bitrate and crash when parsing xing header + +### 0.17.0 (2016-10-02) + +- supporting ID3v2.2 images + +### 0.16.0 (2016-08-06) + +- MP4 cover image support + +### 0.15.2 (2016-08-06) + +- fixed crash for malformed MP4 files (#34) + +### 0.15.0 (2016-08-06) + +- fixed decoding of UTF-16LE ID3v2 Tags, improved overall stability + +### 0.14.0 (2016-06-05): + +- MP4/M4A and Opus support diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1167bc5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: 2024 tinytag Contributors +# SPDX-License-Identifier: MIT + +[build-system] +requires = ["flit_core>=3.2"] +build-backend = "flit_core.buildapi" + +[project] +name = "tinytag" +description = "Read audio file metadata" +authors = [ + {name = "Tom Wallroth"}, + {name = "Mat (mathiascode)"} +] +keywords = [ + "metadata", + "audio", + "music", + "mp3", + "m4a", + "wav", + "ogg", + "opus", + "flac", + "wma", + "aiff" +] +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: MIT License", + "Development Status :: 5 - Production/Stable", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Multimedia", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Multimedia :: Sound/Audio :: Analysis", + "Typing :: Typed" +] +license = {file = "LICENSE"} +readme = "README.md" +requires-python = ">=3.7" +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/tinytag/tinytag" + +[project.optional-dependencies] +tests = [ + "coverage", + "mypy", + "pycodestyle", + "pylint", + "pyright" +] + +[tool.flit.sdist] +exclude = [ + ".gitignore", + ".github/", + "tinytag/icons/", + "tinytag/tests/" +] + +[tool.pylint.master] +disable = [ + "invalid-name", + "too-many-arguments", + "too-many-boolean-expressions", + "too-many-branches", + "too-many-instance-attributes", + "too-many-lines", + "too-many-locals", + "too-many-positional-arguments", + "too-many-nested-blocks", + "too-many-return-statements", + "too-many-statements", + "too-few-public-methods", + "unknown-option-value" +] +enable = [ + "consider-using-augmented-assign" +] +load-plugins = [ + "pylint.extensions.bad_builtin", + "pylint.extensions.check_elif", + "pylint.extensions.code_style", + "pylint.extensions.comparison_placement", + "pylint.extensions.consider_refactoring_into_while_condition", + "pylint.extensions.consider_ternary_expression", + "pylint.extensions.dict_init_mutate", + "pylint.extensions.docstyle", + "pylint.extensions.dunder", + "pylint.extensions.empty_comment", + "pylint.extensions.eq_without_hash", + "pylint.extensions.for_any_all", + "pylint.extensions.no_self_use", + "pylint.extensions.overlapping_exceptions", + "pylint.extensions.private_import", + "pylint.extensions.redefined_loop_name", + "pylint.extensions.redefined_variable_type", + "pylint.extensions.set_membership", + "pylint.extensions.typing" +] +py-version = "3.7" + +[tool.mypy] +strict = true + +[tool.coverage.report] +exclude_lines = [ + "if TYPE_CHECKING:" +] diff --git a/runtests.py b/runtests.py deleted file mode 100755 index 85a5ba2..0000000 --- a/runtests.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python -import pytest - - -def runtests(): - pytest.main() - - -if __name__ == '__main__': - runtests() diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 89a86ea..0000000 --- a/setup.cfg +++ /dev/null @@ -1,76 +0,0 @@ -[metadata] -name = tinytag -author = Tom Wallroth -author-email = tomwallroth@gmail.com -url = https://github.com/devsnd/tinytag -description = Read music meta data and length of MP3, OGG, OPUS, MP4, M4A, FLAC, WMA and Wave files -keywords = - metadata - music -classifiers = - Programming Language :: Python - Programming Language :: Python :: 2.7 - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.4 - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - License :: OSI Approved :: MIT License - Development Status :: 5 - Production/Stable - Environment :: Web Environment - Intended Audience :: Developers - Operating System :: OS Independent - Topic :: Internet :: WWW/HTTP - Topic :: Multimedia - Topic :: Multimedia :: Sound/Audio - Topic :: Multimedia :: Sound/Audio :: Analysis -license = MIT -license-file = LICENSE -long-description = file: README.md -long-description-content-type = text/markdown - -[options] -python_requires = >= 2.7 -setup_requires = - setuptools >= 38.6 - pip >= 10 -include_package_data = True -packages = find: -install_requires = - -[options.extras_require] -tests = - pytest - pytest-cov - coveralls - flake8 - -[options.entry_points] -console_scripts = - -[flake8] -max-line-length = 132 -exclude = .git,__pycache__,.eggs/,doc/,docs/,build/,dist/,archive/,src/ -ignore = E501 - -[coverage:run] -cover_pylib = false -omit = - /home/travis/virtualenv/* - */site-packages/* - */bin/* - */src/* - -[coverage:report] -exclude_lines = - pragma: no cover - def __repr__ - except RuntimeError - except NotImplementedError - except ImportError - except FileNotFoundError - except CalledProcessError - logging.warning - logging.error - logging.critical - if __name__ == .__main__.: diff --git a/setup.py b/setup.py deleted file mode 100755 index 812ab51..0000000 --- a/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -from os.path import join -from setuptools import setup, find_packages - - -def get_version(): - with open(join("tinytag", "__init__.py")) as f: - version_line = next(line for line in f if line.startswith("__version__ =")) - return version_line.split("=")[1].strip().strip("\"'") - - -setup( - name="tinytag", - version=get_version(), - packages=find_packages(), - ) diff --git a/tinytag/__init__.py b/tinytag/__init__.py index 6d51173..1623ac5 100644 --- a/tinytag/__init__.py +++ b/tinytag/__init__.py @@ -1,10 +1,15 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -from .tinytag import TinyTag, TinyTagException, ID3, Ogg, Wave, Flac -import sys +# SPDX-FileCopyrightText: 2014-2024 tinytag Contributors +# SPDX-License-Identifier: MIT +"""Audio file metadata reader.""" -__version__ = '1.5.0' +__version__ = '2.1.1' -if __name__ == '__main__': - print(TinyTag.get(sys.argv[1])) +from .tinytag import ( + TinyTag, Image, Images, OtherFields, OtherImages, + TinyTagException, ParseError, UnsupportedFormatError +) +__all__ = ( + "TinyTag", "Image", "Images", "OtherFields", "OtherImages", + "TinyTagException", "ParseError", "UnsupportedFormatError" +) diff --git a/tinytag/__main__.py b/tinytag/__main__.py index 83ce9a1..35debcc 100755 --- a/tinytag/__main__.py +++ b/tinytag/__main__.py @@ -1,88 +1,114 @@ -import os -import json +# SPDX-FileCopyrightText: 2015-2024 tinytag Contributors +# SPDX-License-Identifier: MIT + +# pylint: disable=missing-module-docstring,protected-access + +from __future__ import annotations + import sys -from os.path import splitext + +from io import StringIO +from os.path import isfile, splitext from tinytag import TinyTag, TinyTagException -def usage(): + +def _usage() -> None: print('''tinytag [options] - + -h, --help Display help - + -i, --save-image Save the cover art to a file - + -f, --format json|csv|tsv|tabularcsv Specify how the output should be formatted - + -s, --skip-unsupported - Skip files that do not have a file extension supported by tinytag - - ''') + Skip files that do not have a file extension supported by tinytag + +''') -def pop_param(name, _default): + +def _pop_param(name: str, _default: str | None) -> str | None: if name in sys.argv: idx = sys.argv.index(name) sys.argv.pop(idx) return sys.argv.pop(idx) return _default -def pop_switch(name, _default): + +def _pop_switch(name: str) -> bool: if name in sys.argv: idx = sys.argv.index(name) sys.argv.pop(idx) return True return False -try: - display_help = pop_switch('--help', False) or pop_switch('-h', False) - if display_help: - usage() - sys.exit(0) - save_image_path = pop_param('--save-image', None) or pop_param('-i', None) - formatting = (pop_param('--format', None) or pop_param('-f', None)) or 'json' - skip_unsupported = pop_switch('--skip-unsupported', False) or pop_switch('-s', False) + +def _print_tag(tag: TinyTag, fmt: str, header_printed: bool = False) -> bool: + data = tag.as_dict() + if fmt == 'json': + import json # pylint: disable=import-outside-toplevel + print(json.dumps(data, ensure_ascii=False, indent=2)) + return header_printed + if fmt not in {'csv', 'tsv', 'tabularcsv'}: + return header_printed + import csv # pylint: disable=import-outside-toplevel + for field, value in data.items(): + if isinstance(value, str): + # use a more friendly separator for output + data[field] = value.replace('\x00', ';') + csv_file = StringIO() + delimiter = '\t' if fmt == 'tsv' else ',' + writer = csv.writer(csv_file, delimiter=delimiter, lineterminator='\n') + if fmt == 'tabularcsv': + if not header_printed: + writer.writerow(data.keys()) + header_printed = True + writer.writerow(data.values()) + value = csv_file.getvalue().strip() + else: + writer.writerows(data.items()) + value = csv_file.getvalue() + print(value) + return header_printed + + +def _run() -> int: + header_printed = False + image_path = _pop_param('--save-image', None) or _pop_param('-i', None) + fmt = (_pop_param('--format', None) or _pop_param('-f', None)) or 'json' + skip_unsupported = _pop_switch('--skip-unsupported') or _pop_switch('-s') filenames = sys.argv[1:] -except Exception as exc: - print(exc) - usage() - sys.exit(1) - -header_printed = False - -for i, filename in enumerate(filenames): - try: - if skip_unsupported: - if os.path.isdir(filename): - continue - if not TinyTag.is_supported(filename): - continue - tag = TinyTag.get(filename, image=save_image_path is not None) - if save_image_path: - # allow for saving the image of multiple files - actual_save_image_path = save_image_path - if len(filenames) > 1: - actual_save_image_path, ext = splitext(actual_save_image_path) - actual_save_image_path += '%05d' % i + ext - image = tag.get_image() - if image: - with open(actual_save_image_path, 'wb') as fh: - fh.write(image) - data = {'filename': filename} - data.update(tag.as_dict()) - if formatting == 'json': - print(json.dumps(data)) - elif formatting == 'csv': - print('\n'.join('%s,%s' % (k, v) for k, v in data.items())) - elif formatting == 'tsv': - print('\n'.join('%s\t%s' % (k, v) for k, v in data.items())) - elif formatting == 'tabularcsv': - if not header_printed: - print(','.join(k for k, v in data.items())) - header_printed = True - print(','.join('"%s"' % v for k, v in data.items())) - except TinyTagException as e: - sys.stderr.write('%s: %s\n' % (filename, str(e))) - sys.exit(1) + display_help = not filenames or _pop_switch('--help') or _pop_switch('-h') + if display_help: + _usage() + return 0 + + for i, filename in enumerate(filenames): + if (skip_unsupported + and not (TinyTag.is_supported(filename) and isfile(filename))): + continue + try: + tag = TinyTag.get(filename, image=image_path is not None) + if image_path: + # allow for saving the image of multiple files + actual_image_path = image_path + if len(filenames) > 1: + actual_image_path, ext = splitext(actual_image_path) + actual_image_path += f'{i:05d}{ext}' + image = tag.images.any + if image is not None: + with open(actual_image_path, 'wb') as file_handle: + file_handle.write(image.data) + header_printed = _print_tag(tag, fmt, header_printed) + except (OSError, TinyTagException) as exc: + sys.stderr.write(f'{filename}: {exc}\n') + return 1 + return 0 + + +if __name__ == '__main__': + sys.exit(_run()) diff --git a/tinytag/icons/icon.svg b/tinytag/icons/icon.svg new file mode 100644 index 0000000..601cb01 --- /dev/null +++ b/tinytag/icons/icon.svg @@ -0,0 +1,5 @@ + + diff --git a/tinytag/icons/icon_bg.svg b/tinytag/icons/icon_bg.svg new file mode 100644 index 0000000..3215d2f --- /dev/null +++ b/tinytag/icons/icon_bg.svg @@ -0,0 +1,5 @@ + + diff --git a/tinytag/icons/icon_bg_round.svg b/tinytag/icons/icon_bg_round.svg new file mode 100644 index 0000000..a465a52 --- /dev/null +++ b/tinytag/icons/icon_bg_round.svg @@ -0,0 +1,5 @@ + + diff --git a/tinytag/py.typed b/tinytag/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tinytag/tests/custom_samples/instructions.txt b/tinytag/tests/custom_samples/instructions.txt deleted file mode 100644 index 23bd8c4..0000000 --- a/tinytag/tests/custom_samples/instructions.txt +++ /dev/null @@ -1,20 +0,0 @@ -You can easily check if tinytag does the right thing by placing test files in -this folder. - -The name of the test file will automatically create a test, when running pytest. - -For example the file - - some-funky-name-d=10.5-sr=44100.mp3 - -will run a test that checks if the file has a duration of 10.5 and a samplerate -of 44100 seconds. - -These are the prefixes that can be used for the expected values: - - sr - samplerate - d - duration - b - bitrate - c - channels - dn - disc number - dt - disc total diff --git a/tinytag/tests/please download the test samples from github.txt b/tinytag/tests/please download the test samples from github.txt deleted file mode 100644 index b76b820..0000000 --- a/tinytag/tests/please download the test samples from github.txt +++ /dev/null @@ -1,3 +0,0 @@ -If you installed tinytag from pip, it is missing the test samples needed to -run the test suite. please download the sources (including the test samples) -from github to run the test suite. See: https://github.com/devsnd/tinytag diff --git a/tinytag/tests/samples/REUSE.toml b/tinytag/tests/samples/REUSE.toml new file mode 100644 index 0000000..6726561 --- /dev/null +++ b/tinytag/tests/samples/REUSE.toml @@ -0,0 +1,6 @@ +version = 1 + +[[annotations]] +path = ["*"] +SPDX-FileCopyrightText = "NONE" +SPDX-License-Identifier = "CC0-1.0" diff --git a/tinytag/tests/samples/adpcm.wav b/tinytag/tests/samples/adpcm.wav new file mode 100644 index 0000000..d6a9008 Binary files /dev/null and b/tinytag/tests/samples/adpcm.wav differ diff --git a/tinytag/tests/samples/aiff_extra_tags.aiff b/tinytag/tests/samples/aiff_extra_tags.aiff new file mode 100644 index 0000000..9051b25 Binary files /dev/null and b/tinytag/tests/samples/aiff_extra_tags.aiff differ diff --git a/tinytag/tests/samples/aiff_with_image.aiff b/tinytag/tests/samples/aiff_with_image.aiff new file mode 100644 index 0000000..bbf89fb Binary files /dev/null and b/tinytag/tests/samples/aiff_with_image.aiff differ diff --git a/tinytag/tests/samples/alac_file.m4a b/tinytag/tests/samples/alac_file.m4a new file mode 100644 index 0000000..fd4ea5e Binary files /dev/null and b/tinytag/tests/samples/alac_file.m4a differ diff --git a/tinytag/tests/samples/chinese_id3.mp3 b/tinytag/tests/samples/chinese_id3.mp3 new file mode 100644 index 0000000..b81b0c1 Binary files /dev/null and b/tinytag/tests/samples/chinese_id3.mp3 differ diff --git a/tinytag/tests/samples/cover_img.mp3 b/tinytag/tests/samples/cover_img.mp3 deleted file mode 100644 index 4be6e72..0000000 Binary files a/tinytag/tests/samples/cover_img.mp3 and /dev/null differ diff --git a/tinytag/tests/samples/cut_off_titles.mp3 b/tinytag/tests/samples/cut_off_titles.mp3 new file mode 100644 index 0000000..c4432b4 Binary files /dev/null and b/tinytag/tests/samples/cut_off_titles.mp3 differ diff --git a/tinytag/tests/samples/detect_ogg_flac.x b/tinytag/tests/samples/detect_ogg_flac.x new file mode 100644 index 0000000..dd605c0 Binary files /dev/null and b/tinytag/tests/samples/detect_ogg_flac.x differ diff --git a/tinytag/tests/samples/detect_ogg_opus.x b/tinytag/tests/samples/detect_ogg_opus.x new file mode 100644 index 0000000..1d170dc Binary files /dev/null and b/tinytag/tests/samples/detect_ogg_opus.x differ diff --git a/tinytag/tests/samples/detect_ogg.x b/tinytag/tests/samples/detect_ogg_vorbis.x similarity index 100% rename from tinytag/tests/samples/detect_ogg.x rename to tinytag/tests/samples/detect_ogg_vorbis.x diff --git a/tinytag/tests/samples/flac_invalid_track_number.flac b/tinytag/tests/samples/flac_invalid_track_number.flac new file mode 100644 index 0000000..d32f29c Binary files /dev/null and b/tinytag/tests/samples/flac_invalid_track_number.flac differ diff --git a/tinytag/tests/samples/flac_with_image.flac b/tinytag/tests/samples/flac_with_image.flac index 176d376..2e3352d 100644 Binary files a/tinytag/tests/samples/flac_with_image.flac and b/tinytag/tests/samples/flac_with_image.flac differ diff --git a/tinytag/tests/samples/gsm_6_10.wav b/tinytag/tests/samples/gsm_6_10.wav new file mode 100644 index 0000000..b668840 Binary files /dev/null and b/tinytag/tests/samples/gsm_6_10.wav differ diff --git a/tinytag/tests/samples/id3_frames.mp3 b/tinytag/tests/samples/id3_frames.mp3 new file mode 100644 index 0000000..6342e3d Binary files /dev/null and b/tinytag/tests/samples/id3_frames.mp3 differ diff --git a/tinytag/tests/samples/id3_multiple_artists.mp3 b/tinytag/tests/samples/id3_multiple_artists.mp3 new file mode 100644 index 0000000..f1c7687 Binary files /dev/null and b/tinytag/tests/samples/id3_multiple_artists.mp3 differ diff --git a/tinytag/tests/samples/id3_xxx_lang.mp3 b/tinytag/tests/samples/id3_xxx_lang.mp3 new file mode 100644 index 0000000..921f6bd Binary files /dev/null and b/tinytag/tests/samples/id3_xxx_lang.mp3 differ diff --git a/tinytag/tests/samples/id3image_without_description.mp3 b/tinytag/tests/samples/id3image_without_description.mp3 deleted file mode 100644 index a4a8cb1..0000000 Binary files a/tinytag/tests/samples/id3image_without_description.mp3 and /dev/null differ diff --git a/tinytag/tests/samples/id3v1_does_not_overwrite_id3v2.mp3 b/tinytag/tests/samples/id3v1_does_not_overwrite_id3v2.mp3 new file mode 100644 index 0000000..a725b88 Binary files /dev/null and b/tinytag/tests/samples/id3v1_does_not_overwrite_id3v2.mp3 differ diff --git a/tinytag/tests/samples/id3v22_image.mp3 b/tinytag/tests/samples/id3v22_image.mp3 deleted file mode 100644 index 701219b..0000000 Binary files a/tinytag/tests/samples/id3v22_image.mp3 and /dev/null differ diff --git a/tinytag/tests/samples/id3v22_with_image.mp3 b/tinytag/tests/samples/id3v22_with_image.mp3 new file mode 100644 index 0000000..2118a69 Binary files /dev/null and b/tinytag/tests/samples/id3v22_with_image.mp3 differ diff --git a/tinytag/tests/samples/id3v24_genre_null_byte.mp3 b/tinytag/tests/samples/id3v24_genre_null_byte.mp3 new file mode 100644 index 0000000..8e8dd88 Binary files /dev/null and b/tinytag/tests/samples/id3v24_genre_null_byte.mp3 differ diff --git a/tinytag/tests/samples/image-text-encoding.mp3 b/tinytag/tests/samples/image-text-encoding.mp3 new file mode 100644 index 0000000..d37b2f1 Binary files /dev/null and b/tinytag/tests/samples/image-text-encoding.mp3 differ diff --git a/tinytag/tests/samples/invalid_sample_rate.aiff b/tinytag/tests/samples/invalid_sample_rate.aiff new file mode 100644 index 0000000..da088da Binary files /dev/null and b/tinytag/tests/samples/invalid_sample_rate.aiff differ diff --git a/tinytag/tests/samples/iso8859_with_image.m4a b/tinytag/tests/samples/iso8859_with_image.m4a deleted file mode 100644 index 0e56f5c..0000000 Binary files a/tinytag/tests/samples/iso8859_with_image.m4a and /dev/null differ diff --git a/tinytag/tests/samples/lossless.wma b/tinytag/tests/samples/lossless.wma new file mode 100644 index 0000000..89f730a Binary files /dev/null and b/tinytag/tests/samples/lossless.wma differ diff --git a/tinytag/tests/samples/mpeg4_desc_cmt.m4a b/tinytag/tests/samples/mpeg4_desc_cmt.m4a new file mode 100644 index 0000000..026de1d Binary files /dev/null and b/tinytag/tests/samples/mpeg4_desc_cmt.m4a differ diff --git a/tinytag/tests/samples/mpeg4_with_image.m4a b/tinytag/tests/samples/mpeg4_with_image.m4a new file mode 100644 index 0000000..0cbb769 Binary files /dev/null and b/tinytag/tests/samples/mpeg4_with_image.m4a differ diff --git a/tinytag/tests/samples/mpeg4_xa9des.m4a b/tinytag/tests/samples/mpeg4_xa9des.m4a new file mode 100644 index 0000000..44dc4e5 Binary files /dev/null and b/tinytag/tests/samples/mpeg4_xa9des.m4a differ diff --git a/tinytag/tests/samples/multi_value.m4a b/tinytag/tests/samples/multi_value.m4a new file mode 100644 index 0000000..9c618c6 Binary files /dev/null and b/tinytag/tests/samples/multi_value.m4a differ diff --git a/tinytag/tests/samples/mvhd_version_1.m4a b/tinytag/tests/samples/mvhd_version_1.m4a new file mode 100644 index 0000000..595680b Binary files /dev/null and b/tinytag/tests/samples/mvhd_version_1.m4a differ diff --git "a/tinytag/tests/samples/non_ascii_filename_\303\244\303\244\303\244.mp3" "b/tinytag/tests/samples/non_ascii_filename_\303\244\303\244\303\244.mp3" new file mode 100644 index 0000000..bbc9739 Binary files /dev/null and "b/tinytag/tests/samples/non_ascii_filename_\303\244\303\244\303\244.mp3" differ diff --git a/tinytag/tests/samples/ogg_with_image.ogg b/tinytag/tests/samples/ogg_with_image.ogg new file mode 100644 index 0000000..6cc4c88 Binary files /dev/null and b/tinytag/tests/samples/ogg_with_image.ogg differ diff --git a/tinytag/tests/samples/riff_extra_zero.wav b/tinytag/tests/samples/riff_extra_zero.wav new file mode 100644 index 0000000..93bacea Binary files /dev/null and b/tinytag/tests/samples/riff_extra_zero.wav differ diff --git a/tinytag/tests/samples/riff_extra_zero_2.wav b/tinytag/tests/samples/riff_extra_zero_2.wav new file mode 100644 index 0000000..0d7bfde Binary files /dev/null and b/tinytag/tests/samples/riff_extra_zero_2.wav differ diff --git a/tinytag/tests/samples/test.spx b/tinytag/tests/samples/test.spx new file mode 100644 index 0000000..524f62a Binary files /dev/null and b/tinytag/tests/samples/test.spx differ diff --git a/tinytag/tests/samples/test2.m4a b/tinytag/tests/samples/test2.m4a index 4b58dcd..8c1e15f 100644 Binary files a/tinytag/tests/samples/test2.m4a and b/tinytag/tests/samples/test2.m4a differ diff --git a/tinytag/tests/samples/test_flac.oga b/tinytag/tests/samples/test_flac.oga new file mode 100644 index 0000000..dd605c0 Binary files /dev/null and b/tinytag/tests/samples/test_flac.oga differ diff --git a/tinytag/tests/samples/test_with_image.aiff b/tinytag/tests/samples/test_with_image.aiff deleted file mode 100644 index 6dc6095..0000000 Binary files a/tinytag/tests/samples/test_with_image.aiff and /dev/null differ diff --git a/tinytag/tests/samples/utf16_no_bom.mp3 b/tinytag/tests/samples/utf16_no_bom.mp3 new file mode 100644 index 0000000..d82bc19 Binary files /dev/null and b/tinytag/tests/samples/utf16_no_bom.mp3 differ diff --git a/tinytag/tests/samples/vbr11.mp3 b/tinytag/tests/samples/vbr11.mp3 new file mode 100644 index 0000000..d893948 Binary files /dev/null and b/tinytag/tests/samples/vbr11.mp3 differ diff --git a/tinytag/tests/samples/vbr11stereo.mp3 b/tinytag/tests/samples/vbr11stereo.mp3 new file mode 100644 index 0000000..d539fc9 Binary files /dev/null and b/tinytag/tests/samples/vbr11stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr16.mp3 b/tinytag/tests/samples/vbr16.mp3 new file mode 100644 index 0000000..93b3fe0 Binary files /dev/null and b/tinytag/tests/samples/vbr16.mp3 differ diff --git a/tinytag/tests/samples/vbr16stereo.mp3 b/tinytag/tests/samples/vbr16stereo.mp3 new file mode 100644 index 0000000..f473e25 Binary files /dev/null and b/tinytag/tests/samples/vbr16stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr22.mp3 b/tinytag/tests/samples/vbr22.mp3 new file mode 100644 index 0000000..c855b11 Binary files /dev/null and b/tinytag/tests/samples/vbr22.mp3 differ diff --git a/tinytag/tests/samples/vbr22stereo.mp3 b/tinytag/tests/samples/vbr22stereo.mp3 new file mode 100644 index 0000000..723253c Binary files /dev/null and b/tinytag/tests/samples/vbr22stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr32.mp3 b/tinytag/tests/samples/vbr32.mp3 new file mode 100644 index 0000000..893800c Binary files /dev/null and b/tinytag/tests/samples/vbr32.mp3 differ diff --git a/tinytag/tests/samples/vbr32stereo.mp3 b/tinytag/tests/samples/vbr32stereo.mp3 new file mode 100644 index 0000000..5e59008 Binary files /dev/null and b/tinytag/tests/samples/vbr32stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr44.mp3 b/tinytag/tests/samples/vbr44.mp3 new file mode 100644 index 0000000..1ed2295 Binary files /dev/null and b/tinytag/tests/samples/vbr44.mp3 differ diff --git a/tinytag/tests/samples/vbr44stereo.mp3 b/tinytag/tests/samples/vbr44stereo.mp3 new file mode 100644 index 0000000..e40c3f9 Binary files /dev/null and b/tinytag/tests/samples/vbr44stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr48.mp3 b/tinytag/tests/samples/vbr48.mp3 new file mode 100644 index 0000000..153edfc Binary files /dev/null and b/tinytag/tests/samples/vbr48.mp3 differ diff --git a/tinytag/tests/samples/vbr48stereo.mp3 b/tinytag/tests/samples/vbr48stereo.mp3 new file mode 100644 index 0000000..3505c17 Binary files /dev/null and b/tinytag/tests/samples/vbr48stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr8.mp3 b/tinytag/tests/samples/vbr8.mp3 new file mode 100644 index 0000000..89b983b Binary files /dev/null and b/tinytag/tests/samples/vbr8.mp3 differ diff --git a/tinytag/tests/samples/vbr8stereo.mp3 b/tinytag/tests/samples/vbr8stereo.mp3 new file mode 100644 index 0000000..614eb57 Binary files /dev/null and b/tinytag/tests/samples/vbr8stereo.mp3 differ diff --git a/tinytag/tests/samples/vbr_xing_header_short.mp3 b/tinytag/tests/samples/vbr_xing_header_short.mp3 new file mode 100644 index 0000000..0c2d62f Binary files /dev/null and b/tinytag/tests/samples/vbr_xing_header_short.mp3 differ diff --git a/tinytag/tests/samples/wav_invalid_track_number.wav b/tinytag/tests/samples/wav_invalid_track_number.wav new file mode 100644 index 0000000..17140da Binary files /dev/null and b/tinytag/tests/samples/wav_invalid_track_number.wav differ diff --git a/tinytag/tests/samples/wav_with_image.wav b/tinytag/tests/samples/wav_with_image.wav new file mode 100644 index 0000000..8f15b98 Binary files /dev/null and b/tinytag/tests/samples/wav_with_image.wav differ diff --git a/tinytag/tests/samples/with_id3_header.flac b/tinytag/tests/samples/with_id3_header.flac deleted file mode 100644 index 0eca4ec..0000000 Binary files a/tinytag/tests/samples/with_id3_header.flac and /dev/null differ diff --git a/tinytag/tests/samples/wma_invalid_track_number.wma b/tinytag/tests/samples/wma_invalid_track_number.wma new file mode 100644 index 0000000..24780dd Binary files /dev/null and b/tinytag/tests/samples/wma_invalid_track_number.wma differ diff --git a/tinytag/tests/test_all.py b/tinytag/tests/test_all.py index 5c96f0c..ce8dadf 100644 --- a/tinytag/tests/test_all.py +++ b/tinytag/tests/test_all.py @@ -1,311 +1,1697 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# tests can be extended using other bigger files that are not going to be -# checked into git, by placing them into the custom_samples folder -# -# see custom_samples/instructions.txt -# - - -from __future__ import unicode_literals - -import io -import os -import shutil -import sys -import tempfile - -import pytest -import re - -from pytest import raises - -from tinytag import TinyTagException, TinyTag, ID3, Ogg, Wave, Flac -from tinytag.tinytag import Wma, MP4, Aiff - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = dict # python 2.6 and 3.2 compat - - -testfiles = OrderedDict([ - # MP3 - ('samples/vbri.mp3', {'extra': {'url': ''}, 'channels': 2, 'samplerate': 44100, 'track_total': None, 'duration': 0.47020408163265304, 'album': 'I Can Walk On Water I Can Fly', 'year': '2007', 'title': 'I Can Walk On Water I Can Fly', 'artist': 'Basshunter', 'track': '01', 'filesize': 8192, 'audio_offset': 1007, 'genre': '(3)Dance', 'comment': '\ufeff\ufeffRipped by THSLIVE', 'composer': ''}), - ('samples/cbr.mp3', {'extra': {}, 'channels': 2, 'samplerate': 44100, 'track_total': None, 'duration': 0.49, 'album': 'I Can Walk On Water I Can Fly', 'year': '2007', 'title': 'I Can Walk On Water I Can Fly', 'artist': 'Basshunter', 'track': '01', 'filesize': 8186, 'audio_offset': 246, 'bitrate': 128.0, 'genre': 'Dance', 'comment': 'Ripped by THSLIVE'}), - # the output of the lame encoder was 185.4 bitrate, but this is good enough for now - ('samples/vbr_xing_header.mp3', {'extra': {}, 'bitrate': 186, 'channels': 1, 'samplerate': 44100, 'duration': 3.944489795918367, 'filesize': 91731, 'audio_offset': 441}), - ('samples/vbr_xing_header_2channel.mp3', {'extra': {}, 'filesize': 2000, 'album': "The Harpers' Masque", 'artist': 'Knodel and Valencia', 'audio_offset': 694, 'bitrate': 46, 'channels': 2, 'duration': 250.04408163265308, 'samplerate': 22050, 'title': 'Lochaber No More', 'year': '1992'}), - ('samples/id3v22-test.mp3', {'extra': {}, 'channels': 2, 'samplerate': 44100, 'track_total': '11', 'duration': 0.138, 'album': 'Hymns for the Exiled', 'year': '2004', 'title': 'cosmic american', 'artist': 'Anais Mitchell', 'track': '3', 'filesize': 5120, 'audio_offset': 2225, 'bitrate': 160.0, 'comment': 'Waterbug Records, www.anaismitchell.com'}), - ('samples/silence-44-s-v1.mp3', {'extra': {}, 'channels': 2, 'samplerate': 44100, 'genre': 'Darkwave', 'track_total': None, 'duration': 3.7355102040816326, 'album': 'Quod Libet Test Data', 'year': '2004', 'title': 'Silence', 'artist': 'piman', 'track': '2', 'filesize': 15070, 'audio_offset': 0, 'bitrate': 32.0, 'comment': ''}), - ('samples/id3v1-latin1.mp3', {'extra': {}, 'channels': None, 'samplerate': 44100, 'genre': 'Rock', 'samplerate': None, 'album': 'The Young Americans', 'title': 'Play Dead', 'filesize': 256, 'track': '12', 'artist': 'Björk', 'track_total': None, 'year': '1993', 'comment': ' '}), - ('samples/UTF16.mp3', {'extra': {'text': 'MusicBrainz Artist Id664c3e0e-42d8-48c1-b209-1efca19c0325', 'url': 'WIKIPEDIA_RELEASEhttp://en.wikipedia.org/wiki/High_Violet'}, 'channels': None, 'samplerate': None, 'track_total': '11', 'track': '07', 'artist': 'The National', 'year': '2010', 'album': 'High Violet', 'title': 'Lemonworld', 'filesize': 20480, 'genre': 'Indie', 'comment': 'Track 7'}), - ('samples/utf-8-id3v2.mp3', {'extra': {}, 'channels': None, 'samplerate': 44100, 'genre': 'Acustico', 'track_total': '21', 'track': '01', 'filesize': 2119, 'title': 'Gran día', 'artist': 'Paso a paso', 'album': 'S/T', 'year': None, 'samplerate': None, 'disc': '', 'disc_total': '0'}), - ('samples/empty_file.mp3', {'extra': {}, 'channels': None, 'samplerate': None, 'track_total': None, 'album': None, 'year': None, 'title': None, 'track': None, 'artist': None, 'filesize': 0}), - ('samples/silence-44khz-56k-mono-1s.mp3', {'extra': {}, 'channels': 1, 'samplerate': 44100, 'duration': 1.018, 'samplerate': 44100, 'filesize': 7280, 'audio_offset': 0, 'bitrate': 56.0}), - ('samples/silence-22khz-mono-1s.mp3', {'extra': {}, 'channels': 1, 'samplerate': 22050, 'filesize': 4284, 'audio_offset': 0, 'bitrate': 32.0, 'duration': 1.0438932496075353}), - ('samples/id3v24-long-title.mp3', {'extra': {}, 'track': '1', 'disc_total': '1', 'album': 'The Double EP: A Sea of Split Peas', 'filesize': 10000, 'channels': None, 'track_total': '12', 'genre': 'AlternRock', 'title': 'Out of the Woodwork', 'artist': 'Courtney Barnett', 'albumartist': 'Courtney Barnett', 'samplerate': None, 'year': None, 'disc': '1', 'comment': 'Amazon.com Song ID: 240853806', 'composer': 'Courtney Barnett'}), - ('samples/utf16be.mp3', {'extra': {}, 'title': '52-girls', 'filesize': 2048, 'track': '6', 'album': 'party mix', 'artist': 'The B52s', 'genre': 'Rock', 'albumartist': None, 'disc': None, 'channels': None}), - ('samples/id3v22_image.mp3', {'extra': {}, 'title': 'Kids (MGMT Cover) ', 'filesize': 35924, 'album': 'winniecooper.net ', 'artist': 'The Kooks', 'year': '2008', 'channels': None, 'genre': '.'}), - ('samples/id3v22.TCO.genre.mp3', {'extra': {}, 'filesize': 500, 'album': 'ARTPOP', 'artist': 'Lady GaGa', 'comment': 'engiTunPGAP0', 'genre': 'Pop', 'title': 'Applause'}), - ('samples/id3_comment_utf_16_with_bom.mp3', {'extra': {}, 'filesize': 19980, 'album': 'Ghosts I-IV', 'albumartist': 'Nine Inch Nails', 'artist': 'Nine Inch Nails', 'comment': '', 'disc': '1', 'disc_total': '2', 'title': '1 Ghosts I', 'track': '1', 'track_total': '36', 'year': '2008', 'comment': '3/4 time'}), - ('samples/id3_comment_utf_16_double_bom.mp3', {'extra': {'text': 'LABEL\ufeffUnclear'}, 'filesize': 512, 'album': 'The Embrace', 'artist': 'Johannes Heil & D.Diggler', 'comment': 'Unclear', 'title': 'The Embrace (Romano Alfieri Remix)', 'track': '04-johannes_heil_and_d.diggler-the_embrace_(romano_alfieri_remix)', 'year': '2012'}), - ('samples/id3_genre_id_out_of_bounds.mp3', {'extra': {}, 'filesize': 512, 'album': 'MECHANICAL ANIMALS', 'artist': 'Manson', 'comment': '', 'genre': '(255)', 'title': '01 GREAT BIG WHITE WORLD', 'track': 'Marilyn', 'year': '0'}), - - # OGG - ('samples/empty.ogg', {'extra': {}, 'track_total': None, 'duration': 3.684716553287982, 'album': None, '_max_samplenum': 162496, 'year': None, 'title': None, 'artist': None, 'track': None, '_tags_parsed': False, 'filesize': 4328, 'audio_offset': 0, 'bitrate': 109.375, 'samplerate': 44100}), - ('samples/multipagecomment.ogg', {'extra': {}, 'track_total': None, 'duration': 3.684716553287982, 'album': None, '_max_samplenum': 162496, 'year': None, 'title': None, 'artist': None, 'track': None, '_tags_parsed': False, 'filesize': 135694, 'audio_offset': 0, 'bitrate': 109.375, 'samplerate': 44100}), - ('samples/multipage-setup.ogg', {'extra': {}, 'genre': 'JRock', 'track_total': None, 'duration': 4.128798185941043, 'album': 'Timeless', 'year': '2006', 'title': 'Burst', 'artist': 'UVERworld', 'track': '7', '_tags_parsed': False, 'filesize': 76983, 'audio_offset': 0, 'bitrate': 156.25, 'samplerate': 44100}), - ('samples/test.ogg', {'extra': {}, 'track_total': None, 'duration': 1.0, 'album': 'the boss', 'year': '2006', 'title': 'the boss', 'artist': 'james brown', 'track': '1', '_tags_parsed': False, 'filesize': 7467, 'audio_offset': 0, 'bitrate': 156.25, 'samplerate': 44100, 'comment': 'hello!'}), - ('samples/corrupt_metadata.ogg', {'extra': {}, 'filesize': 18648, 'audio_offset': 0, 'bitrate': 78.125, 'duration': 2.132358276643991, 'samplerate': 44100}), - ('samples/composer.ogg', {'extra': {}, 'filesize': 4480, 'album': 'An Album', 'artist': 'An Artist', 'audio_offset': 0, 'bitrate': 109.375, 'duration': 3.684716553287982, 'genre': 'Some Genre', 'samplerate': 44100, 'title': 'A Title', 'track': '2', 'year': '2007', 'composer': 'some composer'}), - - # OPUS - ('samples/test.opus', {'extra': {}, 'albumartist': 'Alstroemeria Records', 'samplerate': 48000, 'channels': 2, 'track': '1', 'disc': '1', 'title': 'Bad Apple!!', 'duration': 2.0, 'year': '2008.05.25', 'filesize': 10000, 'artist': 'nomico', 'album': 'Exserens - A selection of Alstroemeria Records', 'comment': 'ARCD0018 - Lovelight'}), - ('samples/8khz_5s.opus', {'extra': {}, 'filesize': 7251, 'channels': 1, 'samplerate': 48000, 'duration': 5.0}), - - # WAV - ('samples/test.wav', {'extra': {}, 'channels': 2, 'duration': 1.0, 'filesize': 176444, 'bitrate': 1378.125, 'samplerate': 44100, 'audio_offest': 36}), - ('samples/test3sMono.wav', {'extra': {}, 'channels': 1, 'duration': 3.0, 'filesize': 264644, 'bitrate': 689.0625, 'duration': 3.0, 'samplerate': 44100, 'audio_offest': 36}), - ('samples/test-tagged.wav', {'extra': {}, 'channels': 2, 'duration': 1.0, 'filesize': 176688, 'album': 'thealbum', 'artist': 'theartisst', 'bitrate': 1378.125, 'genre': 'Acid', 'samplerate': 44100, 'title': 'thetitle', 'track': '66', 'audio_offest': 36, 'comment': 'hello', 'year': '2014'}), - ('samples/test-riff-tags.wav', {'extra': {}, 'channels': 2, 'duration': 1.0, 'filesize': 176540, 'album': None, 'artist': 'theartisst', 'bitrate': 1378.125, 'genre': 'Acid', 'samplerate': 44100, 'title': 'thetitle', 'track': None, 'audio_offest': 36, 'comment': 'hello', 'year': '2014'}), - ('samples/silence-22khz-mono-1s.wav', {'extra': {}, 'channels': 1, 'duration': 1.0, 'filesize': 48160, 'bitrate': 344.53125, 'samplerate': 22050, 'audio_offest': 4088}), - ('samples/id3_header_with_a_zero_byte.wav', {'extra': {}, 'channels': 1, 'duration': 1.0, 'filesize': 44280, 'bitrate': 344.53125, 'samplerate': 22050, 'audio_offest': 122, 'artist': 'Purpley', 'title': 'Test000', 'track': '17'}), - - # FLAC - ('samples/flac1sMono.flac', {'extra': {}, 'genre': 'Avantgarde', 'track_total': None, 'album': 'alb', 'year': '2014', 'duration': 1.0, 'title': 'track', 'track': '23', 'artist': 'art', 'channels': 1, 'filesize': 26632, 'bitrate': 208.0625, 'samplerate': 44100}), - ('samples/flac453sStereo.flac', {'extra': {}, 'channels': 2, 'track_total': None, 'album': None, 'year': None, 'duration': 453.51473922902494, 'title': None, 'track': None, 'artist': None, 'filesize': 84236, 'bitrate': 1.45109671875, 'samplerate': 44100}), - ('samples/flac1.5sStereo.flac', {'extra': {}, 'channels': 2, 'track_total': None, 'album': 'alb', 'year': '2014', 'duration': 1.4995238095238095, 'title': 'track', 'track': '23', 'artist': 'art', 'filesize': 59868, 'bitrate': 311.9115195300095, 'genre': 'Avantgarde', 'samplerate': 44100}), - ('samples/flac_application.flac', {'extra': {}, 'channels': 2, 'track_total': '11', 'album': 'Belle and Sebastian Write About Love', 'year': '2010-10-11', 'duration': 273.64, 'title': 'I Want the World to Stop', 'track': '4', 'artist': 'Belle and Sebastian', 'filesize': 13000, 'bitrate': 0.37115370559859673, 'samplerate': 44100}), - ('samples/no-tags.flac', {'extra': {}, 'channels': 2, 'track_total': None, 'album': None, 'year': None, 'duration': 3.684716553287982, 'title': None, 'track': None, 'artist': None, 'filesize': 4692, 'bitrate': 9.94818718614612, 'samplerate': 44100}), - ('samples/variable-block.flac', {'extra': {}, 'channels': 2, 'album': 'Appleseed Original Soundtrack', 'year': '2004', 'duration': 261.68, 'title': 'DIVE FOR YOU', 'track': '01', 'track_total': '11', 'artist': 'Boom Boom Satellites', 'filesize': 10240, 'bitrate': 0.3057169061449098, 'disc': '1', 'genre': 'Anime Soundtrack', 'samplerate': 44100, 'composer': 'Boom Boom Satellites (Lyrics)', 'disc_total': '2'}), - ('samples/106-invalid-streaminfo.flac', {'extra': {}, 'filesize': 4692}), - ('samples/106-short-picture-block-size.flac', {'extra': {}, 'filesize': 4692, 'bitrate': 9.94818718614612, 'channels': 2, 'duration': 3.68, 'samplerate': 44100}), - ('samples/with_id3_header.flac', {'extra': {}, 'filesize': 64837, 'album': ' ', 'artist': '群星', 'disc': '0', 'title': 'A 梦 哆啦 机器猫 短信铃声', 'track': '0', 'bitrate': 1116.9186328125, 'channels': 1, 'duration': 0.45351473922902497, 'genre': 'genre', 'samplerate': 44100, 'year': '2018'}), - ('samples/with_padded_id3_header.flac', {'extra': {}, 'filesize': 16070, 'album': 'album', 'albumartist': None, 'artist': 'artist', 'audio_offset': None, 'bitrate': 276.830859375, 'channels': 1, 'comment': None, 'disc': None, 'disc_total': None, 'duration': 0.45351473922902497, 'genre': 'genre', 'samplerate': 44100, 'title': 'title', 'track': '1', 'track_total': None, 'year': '2018'}), - ('samples/with_padded_id3_header2.flac', {'extra': {}, 'filesize': 19522, 'album': 'Unbekannter Titel', 'albumartist': None, 'artist': 'Unbekannter Künstler', 'audio_offset': None, 'bitrate': 336.29695312499996, 'channels': 1, 'comment': None, 'disc': '1', 'disc_total': '1', 'duration': 0.45351473922902497, 'genre': 'genre', 'samplerate': 44100, 'title': 'Track01', 'track': '01', 'track_total': '05', 'year': '2018'}), - ('samples/flac_with_image.flac', {'extra': {}, 'filesize': 80000, 'album': 'smilin´ in circles', 'artist': 'Andreas Kümmert', 'bitrate': 7.479655337482049, 'channels': 2, 'disc': '1', 'disc_total': '1', 'duration': 83.56, 'genre': 'Blues', 'samplerate': 44100, 'title': 'intro', 'track': '01', 'track_total': '8'}), - - # WMA - ('samples/test2.wma', {'extra': {}, 'samplerate': 44100, 'album': 'The Colour and the Shape', 'title': 'Doll', 'bitrate': 64.04, 'filesize': 5800, 'track': '1', 'albumartist': 'Foo Fighters', 'artist': 'Foo Fighters', 'duration': 86.406, 'track_total': None, 'year': '1997', 'genre': 'Alternative', 'comment': '', 'composer': 'Foo Fighters'}), - - # M4A/MP4 - ('samples/test.m4a', {'extra': {}, 'samplerate': 44100, 'duration': 314.97, 'bitrate': 256.0, 'channels': 2, 'genre': 'Pop', 'year': '2011', 'title': 'Nothing', 'album': 'Only Our Hearts To Lose', 'track_total': '11', 'track': '11', 'artist': 'Marian', 'filesize': 61432}), - ('samples/test2.m4a', {'extra': {}, 'bitrate': 256.0, 'track': '1', 'albumartist': "Millie Jackson - Get It Out 'cha System - 1978", 'duration': 167.78739229024944, 'filesize': 223365, 'channels': 2, 'year': '1978', 'artist': 'Millie Jackson', 'track_total': '9', 'disc_total': '1', 'genre': 'R&B/Soul', 'album': "Get It Out 'cha System", 'samplerate': 44100, 'disc': '1', 'title': 'Go Out and Get Some', 'comment': "Millie Jackson - Get It Out 'cha System - 1978", 'composer': "Millie Jackson - Get It Out 'cha System - 1978"}), - ('samples/iso8859_with_image.m4a', {'extra': {}, 'artist': 'Major Lazer', 'filesize': 57017, 'title': 'Cold Water (feat. Justin Bieber & M�)', 'album': 'Cold Water (feat. Justin Bieber & M�) - Single', 'year': '2016', 'samplerate': 44100, 'duration': 188.545, 'genre': 'Electronic;Music', 'albumartist': 'Major Lazer', 'channels': 2, 'bitrate': 303040.001, 'comment': '? 2016 Mad Decent'}), - - # AIFF - ('samples/test-tagged.aiff', {'extra': {}, 'channels': 2, 'duration': 1.0, 'filesize': 177620, 'artist': 'theartist', 'bitrate': 1378.125, 'genre': 'Acid', 'samplerate': 44100, 'track': '1', 'title': 'thetitle', 'album': 'thealbum', 'audio_offset': 76, 'comment': 'hello', 'year': '2014', }), - ('samples/test.aiff', {'extra': {'copyright': '℗ 1992 Ace Records'}, 'channels': 2, 'duration': 0.0, 'filesize': 164, 'artist': None, 'bitrate': 1378.125, 'genre': None, 'samplerate': 44100, 'track': None, 'title': 'Go Out and Get Some', 'album': None, 'audio_offset': 156, 'comment': 'Millie Jackson - Get It Out \'cha System - 1978', }), - ('samples/pluck-pcm8.aiff', {'extra': {}, 'channels': 2, 'duration': 0.2999546485260771, 'filesize': 6892, 'artist': 'Serhiy Storchaka', 'title': 'Pluck', 'album': 'Python Test Suite', 'bitrate': 344.53125, 'samplerate': 11025, 'audio_offset': 116, 'comment': 'Audacity Pluck + Wahwah', }), - ('samples/M1F1-mulawC-AFsp.afc', {'extra': {}, 'channels': 2, 'duration': 2.936625, 'filesize': 47148, 'artist': None, 'title': None, 'album': None, 'bitrate': 250, 'samplerate': 8000, 'audio_offset': 154, 'comment': 'AFspdate: 2003-01-30 03:28:34 UTCuser: kabal@CAPELLAprogram: CopyAudio', }), +# SPDX-FileCopyrightText: 2019-2025 tinytag Contributors +# SPDX-License-Identifier: MIT -]) +# pylint: disable=missing-class-docstring,missing-function-docstring +# pylint: disable=missing-module-docstring,protected-access +# pylint: disable=too-many-public-methods -testfolder = os.path.join(os.path.dirname(__file__)) - -# load custom samples -custom_samples_folder = os.path.join(testfolder, 'custom_samples') -pattern_field_name_type = [ - (r'sr=(\d+)', 'samplerate', int), - (r'dn=(\d+)', 'disc', str), - (r'dt=(\d+)', 'disc_total', str), - (r'd=(\d+.?\d*)', 'duration', float), - (r'b=(\d+)', 'bitrate', int), - (r'c=(\d)', 'channels', int), -] -for filename in os.listdir(custom_samples_folder): - if filename == 'instructions.txt': - continue - if os.path.isdir(os.path.join(custom_samples_folder, filename)): - continue - expected_values = {} - for pattern, fieldname, _type in pattern_field_name_type: - match = re.findall(pattern, filename) - if match: - expected_values[fieldname] = _type(match[0]) - if expected_values: - testfiles[os.path.join('custom_samples', filename)] = expected_values - else: - # if there are no expected values, just try parsing the file - testfiles[os.path.join('custom_samples', filename)] = {} - -@pytest.mark.parametrize("testfile,expected", [ - pytest.param(testfile, expected) for testfile, expected in testfiles.items() -]) -def test_file_reading(testfile, expected): - filename = os.path.join(testfolder, testfile) - # print(filename) - tag = TinyTag.get(filename) - - for key, expected_val in expected.items(): - result = getattr(tag, key) - fmt_string = 'field "%s": got %s (%s) expected %s (%s)!' - fmt_values = (key, repr(result), type(result), repr(expected_val), type(expected_val)) - if key == 'duration' and result is not None and expected_val is not None: - # allow duration to be off by 100 ms and a maximum of 1% - if abs(result - expected_val) < 0.100: - if expected_val and min(result, expected_val) / max(result, expected_val) > 0.99: - continue - assert result == expected_val, fmt_string % fmt_values - undefined_in_fixture = {} - for key, val in tag.__dict__.items(): - if key.startswith('_') or val is None: - continue - if key not in expected: - undefined_in_fixture[key] = val - assert not undefined_in_fixture, 'Missing data in fixture \n%s' % str(undefined_in_fixture) -# -# def test_generator(): -# for testfile, expected in testfiles.items(): -# yield get_info, testfile, expected - - -def test_pathlib_compatibility(): - try: - import pathlib - except ImportError: - return - testfile = next(iter(testfiles.keys())) - filename = pathlib.Path(testfolder) / testfile - tag = TinyTag.get(filename) - -@pytest.mark.skipif(sys.platform == "win32", reason='Windows does not support binary paths') -def test_binary_path_compatibility(): - binary_file_path = os.path.join(os.path.dirname(__file__).encode('utf-8'), b'\x01.mp3') - testfile = os.path.join(testfolder, next(iter(testfiles.keys()))) - shutil.copy(testfile, binary_file_path) - assert os.path.exists(binary_file_path) - TinyTag.get(binary_file_path) - os.unlink(binary_file_path) - assert not os.path.exists(binary_file_path) - - -@pytest.mark.xfail(raises=TinyTagException) -def test_unsupported_extension(): - bogus_file = os.path.join(testfolder, 'samples/there_is_no_such_ext.bogus') - TinyTag.get(bogus_file) - -@pytest.mark.xfail(raises=NotImplementedError) -def test_unsubclassed_tinytag_duration(): - tag = TinyTag(None, 0) - tag._determine_duration(None) - -@pytest.mark.xfail(raises=NotImplementedError) -def test_unsubclassed_tinytag_parse_tag(): - tag = TinyTag(None, 0) - tag._parse_tag(None) - -def test_mp3_length_estimation(): - ID3.set_estimation_precision(0.7) - tag = TinyTag.get(os.path.join(testfolder, 'samples/silence-44-s-v1.mp3')) - assert 3.5 < tag.duration < 4.0 - -@pytest.mark.xfail(raises=TinyTagException) -def test_unexpected_eof(): - tag = ID3.get(os.path.join(testfolder, 'samples/incomplete.mp3')) - -@pytest.mark.xfail(raises=TinyTagException) -def test_invalid_flac_file(): - tag = Flac.get(os.path.join(testfolder, 'samples/silence-44-s-v1.mp3')) - -@pytest.mark.xfail(raises=TinyTagException) -def test_invalid_mp3_file(): - tag = ID3.get(os.path.join(testfolder, 'samples/flac1.5sStereo.flac')) - -@pytest.mark.xfail(raises=TinyTagException) -def test_invalid_ogg_file(): - tag = Ogg.get(os.path.join(testfolder, 'samples/flac1.5sStereo.flac')) - -@pytest.mark.xfail(raises=TinyTagException) -def test_invalid_wave_file(): - tag = Wave.get(os.path.join(testfolder, 'samples/flac1.5sStereo.flac')) - -@pytest.mark.xfail(raises=TinyTagException) -def test_invalid_aiff_file(): - tag = Aiff.get(os.path.join(testfolder, 'samples/ilbm.aiff')) - -def test_unpad(): - # make sure that unpad only removes trailing 0-bytes - assert TinyTag._unpad('foo\x00') == 'foo' - assert TinyTag._unpad('foo\x00bar\x00') == 'foobar' - -def test_mp3_image_loading(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/cover_img.mp3'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 140000 < len(image_data) < 150000, 'Image is %d bytes but should be around 145kb' % len(image_data) - assert image_data.startswith(b'\xff\xd8\xff\xe0'), 'The image data must start with a jpeg header' - -def test_mp3_id3v22_image_loading(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/id3v22_image.mp3'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 18000 < len(image_data) < 19000, 'Image is %d bytes but should be around 18.1kb' % len(image_data) - assert image_data.startswith(b'\xff\xd8\xff\xe0'), 'The image data must start with a jpeg header' - -def test_mp3_image_loading_without_description(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/id3image_without_description.mp3'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 28600 < len(image_data) < 28700, 'Image is %d bytes but should be around 28.6kb' % len(image_data) - assert image_data.startswith(b'\xff\xd8\xff\xe0'), 'The image data must start with a jpeg header' - -def test_mp3_utf_8_invalid_string_raises_exception(): - with raises(TinyTagException): - tag = TinyTag.get(os.path.join(testfolder, 'samples/utf-8-id3v2-invalid-string.mp3')) - -def test_mp3_utf_8_invalid_string_can_be_ignored(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/utf-8-id3v2-invalid-string.mp3'), ignore_errors=True) - # the title used to be Gran dia, but I replaced the first byte with 0xFF, which should be ignored here - assert tag.title == 'ran día' - -def test_mp4_image_loading(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/iso8859_with_image.m4a'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 20000 < len(image_data) < 25000, 'Image is %d bytes but should be around 22kb' % len(image_data) - -def test_flac_image_loading(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/flac_with_image.flac'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 70000 < len(image_data) < 80000, 'Image is %d bytes but should be around 75kb' % len(image_data) - -def test_aiff_image_loading(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/test_with_image.aiff'), image=True) - image_data = tag.get_image() - assert image_data is not None - assert 15000 < len(image_data) < 25000, 'Image is %d bytes but should be around 20kb' % len(image_data) - -@pytest.mark.parametrize("testfile,expected", [ - pytest.param(testfile, expected) for testfile, expected in [ - ('samples/detect_mp3_id3.x', ID3), - ('samples/detect_mp3_fffb.x', ID3), - ('samples/detect_ogg.x', Ogg), - ('samples/detect_wav.x', Wave), - ('samples/detect_flac.x', Flac), - ('samples/detect_wma.x', Wma), - ('samples/detect_mp4_m4a.x', MP4), - ('samples/detect_aiff.x', Aiff), - ] +from __future__ import annotations + +import os.path + +from io import BytesIO, TextIOWrapper +from math import isclose +from pathlib import Path +from platform import python_implementation, system +from sys import stdout +from unittest import skipIf, TestCase + +from tinytag import ParseError, TinyTagException, UnsupportedFormatError +from tinytag import Images, OtherFields, TinyTag +from tinytag.tinytag import _ID3, _Ogg, _Wave, _Flac, _Wma, _MP4, _Aiff + +TYPE_CHECKING = False + +# Lazy imports for type checking +if TYPE_CHECKING: + from typing import Mapping, Union + ExpectedTag = Mapping[str, Union[str, float, OtherFields]] +else: + ExpectedTag = dict + +TEST_FILES: dict[str, ExpectedTag] = dict([ + ('vbri.mp3', { + 'other': OtherFields(), + 'channels': 2, + 'samplerate': 44100, + 'duration': 0.47020408163265304, + 'album': 'I Can Walk On Water I Can Fly', + 'year': '2007', + 'title': 'I Can Walk On Water I Can Fly', + 'artist': 'Basshunter', + 'track': 1, + 'filesize': 8192, + 'genre': 'Dance', + 'comment': 'Ripped by THSLIVE', + 'bitrate': 125.33333333333333, + }), + ('cbr.mp3', { + 'other': OtherFields(), + 'channels': 2, + 'samplerate': 44100, + 'duration': 0.48866995073891617, + 'album': 'I Can Walk On Water I Can Fly', + 'year': '2007', + 'title': 'I Can Walk On Water I Can Fly', + 'artist': 'Basshunter', + 'track': 1, + 'filesize': 8186, + 'bitrate': 128.0, + 'genre': 'Dance', + 'comment': 'Ripped by THSLIVE', + }), + ('vbr_xing_header.mp3', { + 'other': OtherFields(), + 'bitrate': 186.04383278145696, + 'channels': 1, + 'samplerate': 44100, + 'duration': 3.944489795918367, + 'filesize': 91731, + }), + ('vbr_xing_header_2channel.mp3', { + 'other': OtherFields({ + 'encoder_settings': [ + 'LAME 32bits version 3.99.5 (http://lame.sf.net)' + ], + 'tlen': ['249976'] + }), + 'filesize': 2000, + 'album': "The Harpers' Masque", + 'artist': 'Knodel and Valencia', + 'bitrate': 46.276128290848305, + 'channels': 2, + 'duration': 250.04408163265308, + 'samplerate': 22050, + 'title': 'Lochaber No More', + 'year': '1992', + }), + ('id3v22-test.mp3', { + 'other': OtherFields({ + 'encoded_by': ['iTunes v4.6'], + 'itunnorm': [ + ' 0000044E 00000061 00009B67 000044C3 00022478 00022182' + ' 00007FCC 00007E5C 0002245E 0002214E' + ], + 'itunes_cddb_1': [ + '9D09130B+174405+11+150+14097+27391+43983+65786+84877+99399+' + '113226+132452+146426+163829' + ], + 'itunes_cddb_tracknumber': ['3'], + }), + 'channels': 2, + 'samplerate': 44100, + 'track_total': 11, + 'duration': 0.13836297152858082, + 'album': 'Hymns for the Exiled', + 'year': '2004', + 'title': 'cosmic american', + 'artist': 'Anais Mitchell', + 'track': 3, + 'filesize': 5120, + 'bitrate': 160.0, + 'comment': 'Waterbug Records, www.anaismitchell.com', + }), + ('silence-44-s-v1.mp3', { + 'other': OtherFields(), + 'channels': 2, + 'samplerate': 44100, + 'genre': 'Darkwave', + 'duration': 3.738712956446946, + 'album': 'Quod Libet Test Data', + 'year': '2004', + 'title': 'Silence', + 'artist': 'piman', + 'track': 2, + 'filesize': 15070, + 'bitrate': 32.0, + }), + ('id3v1-latin1.mp3', { + 'other': OtherFields(), + 'genre': 'Rock', + 'album': 'The Young Americans', + 'title': 'Play Dead', + 'filesize': 256, + 'track': 12, + 'artist': 'Björk', + 'year': '1993', + 'comment': ' ', + }), + ('UTF16.mp3', { + 'other': OtherFields({ + 'musicbrainz artist id': ['664c3e0e-42d8-48c1-b209-1efca19c0325'], + 'musicbrainz album id': ['25322466-a29b-417b-b560-399687b91ddd'], + 'musicbrainz album artist id': [ + '664c3e0e-42d8-48c1-b209-1efca19c0325' + ], + 'musicbrainz disc id': ['p.5xoyYRtCVFe2gt0mfTfsXrO9U-'], + 'musicip puid': ['6ff97581-1c73-fc05-b4e4-a4ccee12ec84'], + 'asin': ['B003KVNV4S'], + 'musicbrainz album status': ['Official'], + 'musicbrainz album type': ['Album'], + 'musicbrainz album release country': ['United States'], + 'ufid': [ + ('http://musicbrainz.org\x00' + 'cf639964-eabb-4c40-9673-c2117e456ea5') + ], + 'publisher': ['4AD'], + 'tdat': ['1105'], + 'wxxx': [ + 'WIKIPEDIA_RELEASE\x00http://en.wikipedia.org/wiki/High_Violet' + ], + 'media': ['Digital'], + 'tlen': ['203733'], + 'encoder_settings': [ + 'LAME 32bits version 3.98.4 (http://www.mp3dev.org/)' + ], + }), + 'track_total': 11, + 'track': 7, + 'artist': 'The National', + 'year': '2010', + 'album': 'High Violet', + 'title': 'Lemonworld', + 'filesize': 20480, + 'genre': 'Indie', + 'comment': 'Track 7', + }), + ('utf-8-id3v2.mp3', { + 'other': OtherFields(), + 'genre': 'Acustico', + 'track_total': 21, + 'track': 1, + 'filesize': 2119, + 'title': 'Gran día', + 'artist': 'Paso a paso', + 'album': 'S/T', + 'disc_total': 0, + 'year': '2003', + }), + ('empty_file.mp3', { + 'other': OtherFields(), + 'filesize': 0 + }), + ('incomplete.mp3', { + 'other': OtherFields(), + 'filesize': 3 + }), + ('silence-44khz-56k-mono-1s.mp3', { + 'other': OtherFields(), + 'channels': 1, + 'samplerate': 44100, + 'duration': 1.0265261269342902, + 'filesize': 7280, + 'bitrate': 56.0, + }), + ('silence-22khz-mono-1s.mp3', { + 'other': OtherFields(), + 'channels': 1, + 'samplerate': 22050, + 'filesize': 4284, + 'bitrate': 32.0, + 'duration': 1.0438932496075353, + }), + ('id3v24-long-title.mp3', { + 'other': OtherFields({ + 'copyright': [ + '2013 Marathon Artists under exclsuive license from ' + 'Courtney Barnett' + ] + }), + 'track': 1, + 'disc_total': 1, + 'composer': 'Courtney Barnett', + 'album': 'The Double EP: A Sea of Split Peas', + 'filesize': 10000, + 'track_total': 12, + 'genre': 'AlternRock', + 'title': 'Out of the Woodwork', + 'artist': 'Courtney Barnett', + 'albumartist': 'Courtney Barnett', + 'disc': 1, + 'comment': 'Amazon.com Song ID: 240853806', + 'year': '2013', + }), + ('utf16be.mp3', { + 'other': OtherFields(), + 'title': '52-girls', + 'filesize': 2048, + 'track': 6, + 'album': 'party mix', + 'artist': 'The B52s', + 'genre': 'Rock', + 'year': '1981', + }), + ('id3v22.TCO.genre.mp3', { + 'other': OtherFields({ + 'encoded_by': ['iTunes 11.0.4'], + 'itunnorm': [ + ' 000019F0 00001E2A 00009F9A 0000C689 000312A1 00030C1A' + ' 0000902E 00008D36 00020882 000321D6' + ], + 'itunsmpb': [ + ' 00000000 00000210 000007B9 00000000008FB737 00000000' + ' 008242F1 00000000 00000000 00000000 00000000 00000000' + ' 00000000' + ], + 'itunpgap': ['0'], + }), + 'filesize': 500, + 'album': 'ARTPOP', + 'artist': 'Lady GaGa', + 'genre': 'Pop', + 'title': 'Applause', + }), + ('id3_comment_utf_16_with_bom.mp3', { + 'other': OtherFields({ + 'copyright': ['(c) 2008 nin'], + 'isrc': ['USTC40852229'], + 'bpm': ['60'], + 'url': ['www.nin.com'], + 'encoded_by': ['LAME 3.97'], + }), + 'filesize': 19980, + 'album': 'Ghosts I-IV', + 'albumartist': 'Nine Inch Nails', + 'artist': 'Nine Inch Nails', + 'disc': 1, + 'disc_total': 2, + 'title': '1 Ghosts I', + 'track': 1, + 'track_total': 36, + 'year': '2008', + 'comment': '3/4 time', + }), + ('id3_comment_utf_16_double_bom.mp3', { + 'other': OtherFields({ + 'label': ['Unclear'] + }), + 'filesize': 512, + 'album': 'The Embrace', + 'artist': 'Johannes Heil & D.Diggler', + 'comment': 'Unclear', + 'title': 'The Embrace (Romano Alfieri Remix)', + 'year': '2012', + }), + ('id3_genre_id_out_of_bounds.mp3', { + 'other': OtherFields(), + 'filesize': 512, + 'album': 'MECHANICAL ANIMALS', + 'artist': 'Manson', + 'genre': '(255)', + 'title': '01 GREAT BIG WHITE WORLD', + 'year': '0', + }), + ('image-text-encoding.mp3', { + 'other': OtherFields(), + 'channels': 1, + 'samplerate': 22050, + 'filesize': 11104, + 'title': 'image-encoding', + 'bitrate': 32.0, + 'duration': 1.0438932496075353, + }), + ('id3v1_does_not_overwrite_id3v2.mp3', { + 'other': OtherFields({ + 'love rating': ['L'], + 'publisher': ['Century Media'], + 'popm': ['MusicBee\x00Ä'] + }), + 'filesize': 1130, + 'album': 'Somewhere Far Beyond', + 'albumartist': 'Blind Guardian', + 'artist': 'Blind Guardian', + 'genre': 'Power Metal', + 'title': 'Time What Is Time', + 'track': 1, + 'year': '1992', + }), + ('non_ascii_filename_äää.mp3', { + 'other': OtherFields({ + 'encoder_settings': ['Lavf58.20.100'] + }), + 'filesize': 80919, + 'channels': 2, + 'duration': 5.067755102040817, + 'samplerate': 44100, + 'bitrate': 127.6701030927835, + }), + ('chinese_id3.mp3', { + 'other': OtherFields(), + 'filesize': 1000, + 'album': '½ÇÂäÖ®¸è', + 'albumartist': 'ËÕÔÆ', + 'artist': 'ËÕÔÆ', + 'bitrate': 128.0, + 'channels': 2, + 'duration': 0.052244897959183675, + 'genre': 'ÐÝÏÐÒôÀÖ', + 'samplerate': 44100, + 'title': '½ÇÂäÖ®¸è', + 'track': 1, + }), + ('cut_off_titles.mp3', { + 'other': OtherFields({ + 'encoder_settings': ['Lavf54.29.104'] + }), + 'filesize': 1000, + 'album': 'ERB', + 'artist': 'Epic Rap Battles Of History', + 'bitrate': 192.0, + 'channels': 2, + 'duration': 0.052244897959183675, + 'samplerate': 44100, + 'title': 'Tony Hawk VS Wayne Gretzky', + }), + ('id3_xxx_lang.mp3', { + 'other': OtherFields({ + 'script': ['Latn'], + 'acoustid id': ['2dc0b571-a633-45b0-aa5e-f3d25e4e0020'], + 'musicbrainz album type': ['album'], + 'musicbrainz album artist id': [ + '078a9376-3c04-4280-b7d7-b20e158f345d' + ], + 'musicbrainz artist id': ['078a9376-3c04-4280-b7d7-b20e158f345d'], + 'barcode': ['724386668721'], + 'musicbrainz album id': ['38b555fe-24c7-37b3-ad1b-f6dea9f1aafa'], + 'musicbrainz release track id': [ + '7f7c31a5-0905-39ba-ba72-68db91d3b9da' + ], + 'catalog_number': ['7243 8 66687 2 1'], + 'musicbrainz release group id': [ + '0f21095a-e629-389c-981a-d9569e9673c9' + ], + 'musicbrainz album status': ['official'], + 'asin': ['B000641ZIQ'], + 'musicbrainz album release country': ['US'], + 'isrc': ['USVI20400513'], + 'lyrics': ['Don\'t fret, precious'], + 'replaygain_track_gain': ['-3.95 dB'], + 'replaygain_track_peak': ['0.999969'], + 'replaygain_album_gain': ['-8.26 dB'], + 'publisher': ['Virgin Records America'], + 'media': ['CD'], + 'tso2': ['Perfect Circle, A'], + 'ufid': [ + ('http://musicbrainz.org\x00' + 'd2b8f0e6-735a-42ee-adf0-7eca4e65cd72') + ], + 'tsop': ['Perfect Circle, A'], + 'tory': ['2004'], + 'originalyear': ['2004'], + 'tdat': ['0211'], + 'ipls': [ + ('producer\x00Billy Howerdel\x00' + 'producer\x00Maynard James Keenan\x00' + 'engineer\x00Billy Howerdel\x00engineer\x00Critter') + ], + }), + 'filesize': 6943, + 'album': 'eMOTIVe', + 'albumartist': 'A Perfect Circle', + 'artist': 'A Perfect Circle', + 'composer': 'Billy Howerdel/Maynard James Keenan', + 'bitrate': 192.0, + 'channels': 2, + 'duration': 0.13198711063372717, + 'genre': 'Rock', + 'samplerate': 44100, + 'title': 'Counting Bodies Like Sheep to the Rhythm of the War Drums', + 'track': 10, + 'comment': ' ', + 'disc': 1, + 'disc_total': 1, + 'track_total': 12, + 'year': '2004', + }), + ('vbr8.mp3', { + 'filesize': 9504, + 'bitrate': 8.25, + 'channels': 1, + 'duration': 9.216, + 'other': OtherFields(), + 'samplerate': 8000, + }), + ('vbr8stereo.mp3', { + 'filesize': 9504, + 'bitrate': 8.25, + 'channels': 2, + 'duration': 9.216, + 'other': OtherFields(), + 'samplerate': 8000, + }), + ('vbr11.mp3', { + 'filesize': 9360, + 'bitrate': 8.143465909090908, + 'channels': 1, + 'duration': 9.195102040816327, + 'other': OtherFields(), + 'samplerate': 11025, + }), + ('vbr11stereo.mp3', { + 'filesize': 9360, + 'bitrate': 8.143465909090908, + 'channels': 2, + 'duration': 9.195102040816327, + 'other': OtherFields(), + 'samplerate': 11025, + }), + ('vbr16.mp3', { + 'filesize': 9432, + 'bitrate': 8.251968503937007, + 'channels': 1, + 'duration': 9.144, + 'other': OtherFields(), + 'samplerate': 16000, + }), + ('vbr16stereo.mp3', { + 'filesize': 9432, + 'bitrate': 8.251968503937007, + 'channels': 2, + 'duration': 9.144, + 'other': OtherFields(), + 'samplerate': 16000, + }), + ('vbr22.mp3', { + 'filesize': 9282, + 'bitrate': 8.145021489971347, + 'channels': 1, + 'duration': 9.11673469387755, + 'other': OtherFields(), + 'samplerate': 22050, + }), + ('vbr22stereo.mp3', { + 'filesize': 9282, + 'bitrate': 8.145021489971347, + 'channels': 2, + 'duration': 9.11673469387755, + 'other': OtherFields(), + 'samplerate': 22050, + }), + ('vbr32.mp3', { + 'filesize': 37008, + 'bitrate': 32.50592885375494, + 'channels': 1, + 'duration': 9.108, + 'other': OtherFields(), + 'samplerate': 32000, + }), + ('vbr32stereo.mp3', { + 'filesize': 37008, + 'bitrate': 32.50592885375494, + 'channels': 2, + 'duration': 9.108, + 'other': OtherFields(), + 'samplerate': 32000, + }), + ('vbr44.mp3', { + 'filesize': 36609, + 'bitrate': 32.21697198275862, + 'channels': 1, + 'duration': 9.09061224489796, + 'other': OtherFields(), + 'samplerate': 44100, + }), + ('vbr44stereo.mp3', { + 'filesize': 36609, + 'bitrate': 32.21697198275862, + 'channels': 2, + 'duration': 9.09061224489796, + 'other': OtherFields(), + 'samplerate': 44100, + }), + ('vbr48.mp3', { + 'filesize': 36672, + 'bitrate': 32.33862433862434, + 'channels': 1, + 'duration': 9.072, + 'other': OtherFields(), + 'samplerate': 48000, + }), + ('vbr48stereo.mp3', { + 'filesize': 36672, + 'bitrate': 32.33862433862434, + 'channels': 2, + 'duration': 9.072, + 'other': OtherFields(), + 'samplerate': 48000, + }), + ('id3v24_genre_null_byte.mp3', { + 'other': OtherFields(), + 'filesize': 256, + 'album': '\u79d8\u5bc6', + 'albumartist': 'aiko', + 'artist': 'aiko', + 'disc': 1, + 'genre': 'Pop', + 'title': '\u661f\u306e\u306a\u3044\u4e16\u754c', + 'track': 10, + 'year': '2008', + }), + ('vbr_xing_header_short.mp3', { + 'filesize': 432, + 'bitrate': 24.0, + 'channels': 1, + 'duration': 0.144, + 'other': OtherFields(), + 'samplerate': 8000, + }), + ('id3_multiple_artists.mp3', { + 'other': OtherFields({ + 'artist': [ + 'artist2', + 'artist3', + 'artist4', + 'artist5', + 'artist6', + 'artist7', + ] + }), + 'filesize': 2007, + 'bitrate': 57.39124999999999, + 'channels': 1, + 'duration': 0.1306122448979592, + 'samplerate': 44100, + 'artist': 'artist1', + 'genre': 'something 1', + }), + ('id3_frames.mp3', { + 'filesize': 27576, + 'bitrate': 50.03636363636364, + 'channels': 1, + 'duration': 3.96, + 'samplerate': 16000, + 'other': OtherFields(), + }), + ('id3v22_with_image.mp3', { + 'other': OtherFields(), + 'filesize': 2311, + 'title': 'image', + }), + ('utf16_no_bom.mp3', { + 'other': OtherFields(), + 'filesize': 1069, + 'title': 'no bom test ë', + 'artist': 'no bom test 2 ë', + }), + ('empty.ogg', { + 'other': OtherFields(), + 'duration': 3.684716553287982, + 'filesize': 4328, + 'bitrate': 112.0, + 'samplerate': 44100, + 'channels': 2, + }), + ('multipage-setup.ogg', { + 'other': OtherFields({ + 'transcoded': ['mp3;241'], + 'replaygain_album_gain': ['-10.29 dB'], + 'replaygain_album_peak': ['1.50579047'], + 'replaygain_track_peak': ['1.17979193'], + 'replaygain_track_gain': ['-10.02 dB'], + }), + 'genre': 'JRock', + 'duration': 4.128798185941043, + 'album': 'Timeless', + 'year': '2006', + 'title': 'Burst', + 'artist': 'UVERworld', + 'track': 7, + 'filesize': 76983, + 'bitrate': 160.0, + 'samplerate': 44100, + 'comment': 'SRCL-6240', + 'channels': 2, + }), + ('test.ogg', { + 'other': OtherFields(), + 'duration': 1.0, + 'album': 'the boss', + 'year': '2006', + 'title': 'the boss', + 'artist': 'james brown', + 'track': 1, + 'filesize': 7467, + 'bitrate': 160.0, + 'samplerate': 44100, + 'channels': 2, + 'comment': 'hello!', + }), + ('corrupt_metadata.ogg', { + 'other': OtherFields(), + 'filesize': 18648, + 'bitrate': 80.0, + 'duration': 2.132358276643991, + 'samplerate': 44100, + 'channels': 1, + }), + ('composer.ogg', { + 'other': OtherFields(), + 'filesize': 4480, + 'album': 'An Album', + 'artist': 'An Artist', + 'composer': 'some composer', + 'bitrate': 112.0, + 'duration': 3.684716553287982, + 'channels': 2, + 'genre': 'Some Genre', + 'samplerate': 44100, + 'title': 'A Title', + 'track': 2, + 'year': '2007', + 'comment': 'A Comment', + }), + ('ogg_with_image.ogg', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 0.1, + 'filesize': 5759, + 'bitrate': 96.0, + 'samplerate': 44100, + 'artist': 'Sample Artist', + 'title': 'Sample Title', + }), + ('test.opus', { + 'other': OtherFields({ + 'encoder': ['Lavc57.24.102 libopus'], + 'arrange': ['\u6771\u65b9'], + 'catalogid': ['ARCD0024'], + 'discid': ['A212230D'], + 'event': ['\u4f8b\u5927\u796d5'], + 'lyricist': ['Haruka'], + 'mastering': ['Hedonist'], + 'origin': ['\u6771\u65b9\u5e7b\u60f3\u90f7'], + 'originaltitle': ['Bad Apple!!'], + 'performer': ['Masayoshi Minoshima'], + 'vocal': ['nomico'], + }), + 'albumartist': 'Alstroemeria Records', + 'samplerate': 48000, + 'channels': 2, + 'track': 1, + 'disc': 1, + 'title': 'Bad Apple!!', + 'duration': 0.9935, + 'bitrate': 51.5832913940614, + 'year': '2008.05.25', + 'filesize': 10000, + 'artist': 'nomico', + 'album': 'Exserens - A selection of Alstroemeria Records', + 'comment': 'ARCD0018 - Lovelight', + 'disc_total': 1, + 'track_total': 13, + }), + ('8khz_5s.opus', { + 'other': OtherFields({ + 'encoder': ['opusenc from opus-tools 0.2'] + }), + 'filesize': 7251, + 'channels': 1, + 'samplerate': 48000, + 'duration': 5.0, + 'bitrate': 9.5952 + }), + ('test_flac.oga', { + 'other': OtherFields({ + 'copyright': ['test3'], + 'isrc': ['test4'], + 'lyrics': ['test7'] + }), + 'filesize': 9273, + 'album': 'test2', + 'artist': 'test6', + 'comment': 'test5', + 'bitrate': 20.022488249118684, + 'duration': 3.705034013605442, + 'channels': 2, + 'genre': 'Acoustic', + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'test1', + 'track': 5, + 'year': '2023', + }), + ('test.spx', { + 'other': OtherFields(), + 'filesize': 7921, + 'channels': 1, + 'samplerate': 16000, + 'bitrate': -1, + 'duration': 2.1445625, + 'artist': 'test1', + 'title': 'test2', + 'comment': 'Encoded with Speex 1.2.0', + }), + ('test.wav', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 1.0, + 'filesize': 176444, + 'bitrate': 1411.2, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('test3sMono.wav', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 3.0, + 'filesize': 264644, + 'bitrate': 705.6, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('test-tagged.wav', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 1.0, + 'filesize': 176688, + 'album': 'thealbum', + 'artist': 'theartisst', + 'bitrate': 1411.2, + 'genre': 'Acid', + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'thetitle', + 'track': 66, + 'comment': 'hello', + 'year': '2014', + }), + ('test-riff-tags.wav', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 1.0, + 'filesize': 176540, + 'artist': 'theartisst', + 'bitrate': 1411.2, + 'genre': 'Acid', + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'thetitle', + 'comment': 'hello', + 'year': '2014', + }), + ('silence-22khz-mono-1s.wav', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 0.9991836734693877, + 'filesize': 48160, + 'bitrate': 352.8, + 'samplerate': 22050, + 'bitdepth': 16, + }), + ('id3_header_with_a_zero_byte.wav', { + 'other': OtherFields({ + 'title': ['Stacked'] + }), + 'channels': 1, + 'duration': 1.0, + 'filesize': 44280, + 'bitrate': 352.8, + 'samplerate': 22050, + 'bitdepth': 16, + 'artist': 'Purpley', + 'title': 'Test000', + 'track': 17, + 'album': 'prototypes', + }), + ('adpcm.wav', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 12.167256235827665, + 'filesize': 268686, + 'bitrate': 176.4, + 'samplerate': 44100, + 'bitdepth': 4, + 'artist': 'test artist', + 'title': 'test title', + 'track': 1, + 'album': 'test album', + 'comment': 'test comment', + 'genre': 'test genre', + 'year': '1990', + }), + ('riff_extra_zero.wav', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 0.11609977324263039, + 'filesize': 20670, + 'bitrate': 1411.2, + 'samplerate': 44100, + 'bitdepth': 16, + 'artist': 'B.O.S.E.', + 'title': 'Mission Bass', + 'album': '808 Bass Express', + 'genre': 'Hip-Hop/Rap', + 'year': '1996', + 'track': 3, + }), + ('riff_extra_zero_2.wav', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 0.11609977324263039, + 'filesize': 20682, + 'bitrate': 1411.2, + 'samplerate': 44100, + 'bitdepth': 16, + 'artist': 'The Jimmy Castor Bunch', + 'title': 'It\'s Just Begun', + 'album': 'The Perfect Beats, Vol. 4', + 'genre': 'Pop Electronica', + 'track': 7, + }), + ('wav_invalid_track_number.wav', { + 'other': OtherFields(), + 'filesize': 8908, + 'bitrate': 705.6, + 'duration': 0.1, + 'samplerate': 44100, + 'channels': 1, + 'bitdepth': 16, + }), + ('gsm_6_10.wav', { + 'other': OtherFields(), + 'bitdepth': 1, + 'bitrate': 44.1, + 'channels': 1, + 'duration': 0.16507936507936508, + 'filesize': 1246, + 'samplerate': 44100, + 'album': 'album', + 'artist': 'artist', + 'title': 'track', + 'track': 99, + 'year': '2010', + 'comment': 'some comment here', + 'genre': 'Bass', + }), + ('wav_with_image.wav', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 2.14475, + 'filesize': 22902, + 'bitrate': 64.0, + 'samplerate': 8000, + 'bitdepth': 8, + }), + ('flac1sMono.flac', { + 'other': OtherFields(), + 'genre': 'Avantgarde', + 'album': 'alb', + 'year': '2014', + 'duration': 1.0, + 'title': 'track', + 'track': 23, + 'artist': 'art', + 'channels': 1, + 'filesize': 26632, + 'bitrate': 213.056, + 'samplerate': 44100, + 'bitdepth': 16, + 'comment': 'hello', + }), + ('flac453sStereo.flac', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 453.51473922902494, + 'filesize': 84236, + 'bitrate': 1.4859230399999999, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('flac1.5sStereo.flac', { + 'other': OtherFields(), + 'channels': 2, + 'album': 'alb', + 'year': '2014', + 'duration': 1.4995238095238095, + 'title': 'track', + 'track': 23, + 'artist': 'art', + 'filesize': 59868, + 'bitrate': 319.39739599872973, + 'genre': 'Avantgarde', + 'samplerate': 44100, + 'bitdepth': 16, + 'comment': 'hello', + }), + ('flac_application.flac', { + 'other': OtherFields({ + 'replaygain_track_peak': ['0.9976'], + 'musicbrainz_albumartistid': [ + 'e5c7b94f-e264-473c-bb0f-37c85d4d5c70' + ], + 'musicbrainz_trackid': ['e65fb332-0c1e-4172-85e0-59cd37e5669e'], + 'replaygain_album_gain': ['-8.14 dB'], + 'labelid': ['RTRADLP480'], + 'musicbrainz_albumid': ['359a91e9-3bb3-4b60-a823-8aaa4bad1e36'], + 'artistsort': ['Belle and Sebastian'], + 'replaygain_track_gain': ['-8.08 dB'], + 'replaygain_album_peak': ['1.0000'], + }), + 'channels': 2, + 'track_total': 11, + 'album': 'Belle and Sebastian Write About Love', + 'year': '2010-10-11', + 'duration': 273.64, + 'title': 'I Want the World to Stop', + 'track': 4, + 'artist': 'Belle and Sebastian', + 'filesize': 13000, + 'bitrate': 0.38006139453296306, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('no-tags.flac', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 3.684716553287982, + 'filesize': 4692, + 'bitrate': 10.186943678613627, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('variable-block.flac', { + 'other': OtherFields({ + 'discid': ['AA0B360B'], + 'japanese title': ['アップルシード オリジナル・サウンドトラック'], + 'organization': ['Sony Music Records (SRCP-371)'], + 'ripper': ['Exact Audio Copy 0.99pb5'], + 'replaygain_album_gain': ['-8.68 dB'], + 'replaygain_album_peak': ['1.000000'], + 'replaygain_track_gain': ['-9.61 dB'], + 'replaygain_track_peak': ['1.000000'], + }), + 'channels': 2, + 'album': 'Appleseed Original Soundtrack', + 'year': '2004', + 'duration': 261.68, + 'title': 'DIVE FOR YOU', + 'track': 1, + 'track_total': 11, + 'artist': 'Boom Boom Satellites', + 'filesize': 10240, + 'bitrate': 0.31305411189238763, + 'disc': 1, + 'genre': 'Anime Soundtrack', + 'samplerate': 44100, + 'bitdepth': 16, + 'disc_total': 2, + 'comment': 'Original Soundtrack', + 'composer': 'Boom Boom Satellites (Lyrics)', + }), + ('106-invalid-streaminfo.flac', { + 'other': OtherFields(), + 'filesize': 4692 + }), + ('106-short-picture-block-size.flac', { + 'other': OtherFields(), + 'filesize': 4692, + 'bitrate': 10.186943678613627, + 'channels': 2, + 'duration': 3.684716553287982, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('with_padded_id3_header.flac', { + 'other': OtherFields(), + 'filesize': 16070, + 'album': 'album', + 'artist': 'artist', + 'bitrate': 283.4748, + 'channels': 1, + 'duration': 0.45351473922902497, + 'genre': 'genre', + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'title', + 'track': 1, + 'year': '2018', + 'comment': 'comment', + }), + ('with_padded_id3_header2.flac', { + 'other': OtherFields({ + 'tlen': ['297666'], + 'encoded_by': ['Exact Audio Copy (Sicherer Modus)'], + 'encoder_settings': [ + 'flac.exe -T "artist=Unbekannter Künstler" ' + '-T "title=Track01" -T "album=Unbekannter Titel" ' + '-T "date=" -T "tracknumber=01" -T "genre=" -5' + ], + 'artist': ['Unbekannter Künstler'], + 'album': ['Unbekannter Titel'], + 'title': ['Track01'], + }), + 'filesize': 19522, + 'album': 'album', + 'artist': 'artist', + 'bitrate': 344.36807999999996, + 'channels': 1, + 'disc': 1, + 'disc_total': 1, + 'duration': 0.45351473922902497, + 'genre': 'genre', + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'title', + 'track': 1, + 'track_total': 5, + 'year': '2018', + 'comment': 'comment', + }), + ('flac_invalid_track_number.flac', { + 'other': OtherFields(), + 'filesize': 235, + 'bitrate': 18.8, + 'channels': 1, + 'duration': 0.1, + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('flac_with_image.flac', { + 'other': OtherFields({ + 'artist': ['artist 2', 'artist 3'], + 'genre': ['genre 2'], + 'album': ['album 2'], + 'url': ['https://example.com'], + }), + 'filesize': 2824, + 'album': 'album 1', + 'artist': 'artist 1', + 'bitrate': 225.92, + 'channels': 1, + 'duration': 0.1, + 'genre': 'genre 1', + 'samplerate': 44100, + 'bitdepth': 16, + }), + ('test2.wma', { + 'other': OtherFields({ + '_track': ['0'], + 'mediaprimaryclassid': ['{D1607DBC-E323-4BE2-86A1-48A42A28441E}'], + 'encodingtime': ['128861118183900000'], + 'wmfsdkversion': ['11.0.5721.5145'], + 'wmfsdkneeded': ['0.0.0.0000'], + 'isvbr': ['1'], + 'peakvalue': ['30369'], + 'averagelevel': ['7291'], + }), + 'samplerate': 44100, + 'album': 'The Colour and the Shape', + 'title': 'Doll', + 'bitrate': 64.04, + 'filesize': 5800, + 'track': 1, + 'albumartist': 'Foo Fighters', + 'artist': 'Foo Fighters', + 'duration': 83.406, + 'year': '1997', + 'genre': 'Alternative', + 'composer': 'Foo Fighters', + 'channels': 2, + }), + ('lossless.wma', { + 'other': OtherFields(), + 'samplerate': 44100, + 'bitrate': 667.296, + 'filesize': 2500, + 'bitdepth': 16, + 'duration': 43.133, + 'channels': 2, + }), + ('wma_invalid_track_number.wma', { + 'other': OtherFields({ + 'encoder_settings': ['Lavf60.16.100'] + }), + 'filesize': 3940, + 'bitrate': 128.0, + 'duration': 2.1409999999999996, + 'samplerate': 44100, + 'channels': 1, + }), + ('test.m4a', { + 'other': OtherFields({ + 'itunsmpb': [ + ' 00000000 00000840 000001DC 0000000000D3E9E4 00000000' + ' 00000000 00000000 00000000 00000000 00000000 00000000' + ' 00000000' + ], + 'itunnorm': [ + ' 00000358 0000032E 000020AE 000020D9 0003A228 00032A28' + ' 00007E20 00007E90 00007BFD 00009293' + ], + 'itunes_cddb_ids': ['11++'], + 'ufidhttp://www.cddb.com/id3/taginfo1.html': [ + '3CD3N48Q241232290U3387DD249F72E6B082B283425ADB9B0F324P1' + ], + 'bpm': ['0'], + 'encoded_by': ['iTunes 10.5'], + 'cpil': ['0'], + 'pgap': ['0'], + }), + 'samplerate': 44100, + 'duration': 314.97868480725623, + 'bitrate': 256.0, + 'channels': 2, + 'genre': 'Pop', + 'year': '2011', + 'title': 'Nothing', + 'album': 'Only Our Hearts To Lose', + 'track_total': 11, + 'track': 11, + 'artist': 'Marian', + 'filesize': 61432, + }), + ('mpeg4_with_image.m4a', { + 'other': OtherFields({ + 'publisher': ['test7'], + 'bpm': ['1'], + 'encoded_by': ['Lavf60.3.100'] + }), + 'artist': 'test1', + 'composer': 'test8', + 'filesize': 7371, + 'samplerate': 8000, + 'duration': 1.294, + 'channels': 1, + 'bitrate': 27.887, + }), + ('alac_file.m4a', { + 'other': OtherFields({ + 'copyright': ['© Hyperion Records Ltd, London'], + 'lyrics': ['Album notes:'], + 'upc': ['0034571177380'] + }), + 'artist': 'Howard Shelley', + 'filesize': 20000, + 'composer': 'Clementi, Muzio (1752-1832)', + 'title': 'Clementi: Piano Sonata in D major, Op 25 No 6 - Movement 2: ' + 'Un poco andante', + 'album': 'Clementi: The Complete Piano Sonatas, Vol. 4', + 'year': '2009', + 'track': 14, + 'track_total': 27, + 'disc': 1, + 'disc_total': 1, + 'samplerate': 44100, + 'duration': 166.62639455782312, + 'genre': 'Classical', + 'albumartist': 'Howard Shelley', + 'channels': 2, + 'bitrate': 436.743, + 'bitdepth': 16, + }), + ('mpeg4_desc_cmt.m4a', { + 'other': OtherFields({ + 'description': ['test description'], + 'encoded_by': ['Lavf59.27.100'] + }), + 'filesize': 32006, + 'bitrate': 101.038, + 'channels': 2, + 'comment': 'test comment', + 'duration': 2.36, + 'samplerate': 44100, + }), + ('mpeg4_xa9des.m4a', { + 'other': OtherFields({ + 'description': ['test description'] + }), + 'filesize': 2639, + 'comment': 'test comment', + 'duration': 727.1066666666667, + }), + ('test2.m4a', { + 'other': OtherFields({ + 'publisher': ['test7'], + 'bpm': ['99999'], + 'encoded_by': ['Lavf60.3.100'] + }), + 'artist': 'test1', + 'composer': 'test8', + 'filesize': 6260, + 'samplerate': 8000, + 'duration': 1.294, + 'channels': 1, + 'bitrate': 27.887, + }), + ('mvhd_version_1.m4a', { + 'other': OtherFields(), + 'title': '64-bit test', + 'filesize': 2048, + 'samplerate': 44100, + 'duration': 123251.6585941043, + 'channels': 2, + 'bitrate': 0.0, + }), + ('multi_value.m4a', { + 'other': OtherFields({ + 'artist': ['another artist', 'yet another artist'], + 'custom': ['value1', 'value2', 'value3'], + 'encoded_by': ['Lavf61.7.100'] + }), + 'artist': 'some artist', + 'title': 'some title', + 'album': 'some album', + 'filesize': 1995, + 'samplerate': 44100, + 'duration': 0.524, + 'channels': 1, + 'bitrate': 1.666, + }), + ('test-tagged.aiff', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 1.0, + 'filesize': 177620, + 'artist': 'theartist', + 'bitrate': 1411.2, + 'genre': 'Acid', + 'samplerate': 44100, + 'bitdepth': 16, + 'track': 1, + 'title': 'thetitle', + 'album': 'thealbum', + 'comment': 'hello', + 'year': '2014', + }), + ('test.aiff', { + 'other': OtherFields({ + 'copyright': ['℗ 1992 Ace Records'] + }), + 'channels': 2, + 'duration': 0.0, + 'filesize': 164, + 'bitrate': 1411.2, + 'samplerate': 44100, + 'bitdepth': 16, + 'title': 'Go Out and Get Some', + 'comment': 'Millie Jackson - Get It Out \'cha System - 1978', + }), + ('pluck-pcm8.aiff', { + 'other': OtherFields(), + 'channels': 2, + 'duration': 0.2999546485260771, + 'filesize': 6892, + 'artist': 'Serhiy Storchaka', + 'title': 'Pluck', + 'album': 'Python Test Suite', + 'bitrate': 176.4, + 'samplerate': 11025, + 'bitdepth': 8, + 'comment': 'Audacity Pluck + Wahwah', + 'year': '2013', + }), + ('M1F1-mulawC-AFsp.afc', { + 'other': OtherFields({ + 'comment': ['user: kabal@CAPELLA', 'program: CopyAudio'] + }), + 'channels': 2, + 'duration': 2.936625, + 'filesize': 47148, + 'bitrate': 256.0, + 'samplerate': 8000, + 'bitdepth': 16, + 'comment': 'AFspdate: 2003-01-30 03:28:34 UTC', + }), + ('invalid_sample_rate.aiff', { + 'other': OtherFields(), + 'channels': 1, + 'filesize': 4096, + 'bitdepth': 16, + }), + ('aiff_extra_tags.aiff', { + 'other': OtherFields({ + 'copyright': ['test'], + 'isrc': ['CC-XXX-YY-NNNNN'] + }), + 'channels': 1, + 'duration': 2.176, + 'filesize': 18532, + 'bitrate': 64.0, + 'samplerate': 8000, + 'bitdepth': 8, + 'title': 'song title', + 'artist': 'artist 1;artist 2', + }), + ('aiff_with_image.aiff', { + 'other': OtherFields(), + 'channels': 1, + 'duration': 2.176, + 'filesize': 21044, + 'bitrate': 64.0, + 'samplerate': 8000, + 'bitdepth': 8, + 'title': 'image', + }), ]) -def test_detect_magic_headers(testfile, expected): - filename = os.path.join(testfolder, testfile) - with io.open(filename, 'rb') as fh: - parser = TinyTag.get_parser_class(filename, fh) - assert parser == expected - -def test_show_hint_for_wrong_usage(): - with pytest.raises(Exception) as exc_info: - TinyTag('filename.mp3', 0) - assert exc_info.type == Exception - assert exc_info.value.args[0] == 'Use `TinyTag.get(filepath)` instead of `TinyTag(filepath)`' - - -def test_to_str(): - tag = TinyTag.get(os.path.join(testfolder, 'samples/id3v22-test.mp3')) - assert str(tag) # since the dict is not ordered we cannot == 'somestring' - assert repr(tag) # since the dict is not ordered we cannot == 'somestring' - assert str(tag) == '{"album": "Hymns for the Exiled", "albumartist": null, "artist": "Anais Mitchell", "audio_offset": 2225, "bitrate": 160, "channels": 2, "comment": "Waterbug Records, www.anaismitchell.com", "composer": null, "disc": null, "disc_total": null, "duration": 0.13836297152858082, "extra": {}, "filesize": 5120, "genre": null, "samplerate": 44100, "title": "cosmic american", "track": "3", "track_total": "11", "year": "2004"}' + +SAMPLE_FOLDER = os.path.join(os.path.dirname(__file__), 'samples') + + +class TestAll(TestCase): + + @classmethod + def setUpClass(cls) -> None: + # Use utf-8 encoding for debug print() + if isinstance(stdout, TextIOWrapper): + stdout.reconfigure(encoding='utf-8') + + def compare_tag(self, + results: ExpectedTag, + expected: ExpectedTag, + file: str) -> None: + def error_fmt(value: str | float | list[str]) -> str: + return f'{repr(value)} ({type(value)})' + + def assert_complete_data(results: ExpectedTag | OtherFields, + expected: ExpectedTag | OtherFields) -> None: + missing_result_fields = set(expected) - set(results) + missing_expected_fields = set(results) - set(expected) + self.assertFalse( + missing_result_fields, + f'Missing fields in tag \n{missing_result_fields}') + self.assertFalse( + missing_expected_fields, + f'Missing fields in test case \n{missing_expected_fields}') + + def assert_values_match(path: str, + result_val: str | float | list[str], + expected_val: str | float | list[str]) -> None: + fmt_string = 'field "%s": got %s expected %s in %s!' + fmt_values = ( + path, error_fmt(result_val), error_fmt(expected_val), file) + values_match = False + # lets not copy *all* the lyrics inside the fixture + if (path == 'other.lyrics' + and isinstance(expected_val, list) + and isinstance(result_val, list)): + values_match = result_val[0].startswith(expected_val[0]) + elif (isinstance(result_val, float) + and isinstance(expected_val, float)): + values_match = isclose(result_val, expected_val) + else: + values_match = result_val == expected_val + self.assertTrue(values_match, fmt_string % fmt_values) + + assert_complete_data(results, expected) + + for path, result_val in results.items(): + expected_val = expected[path] + if (isinstance(result_val, OtherFields) + and isinstance(expected_val, OtherFields)): + assert_complete_data(result_val, expected_val) + + for other_key, other_result_val in result_val.items(): + other_path = f"{path}.{other_key}" + assert_values_match( + other_path, other_result_val, + expected_val[other_key] + ) + elif (not isinstance(result_val, OtherFields) + and not isinstance(expected_val, OtherFields)): + assert_values_match(path, result_val, expected_val) + + def test_file_reading_all(self) -> None: + for testfile, expected in TEST_FILES.items(): + with self.subTest(testfile=testfile, expected=expected): + filename = os.path.join(SAMPLE_FOLDER, testfile) + tag = TinyTag.get( + filename, tags=True, duration=True, image=True) + results = { + key: val for key, val in tag.__dict__.items() + if not key.startswith('_') and key != 'filename' + and val is not None and not isinstance(val, Images) + } + self.compare_tag(results, expected, filename) + + def test_file_reading_tags(self) -> None: + for testfile, expected in TEST_FILES.items(): + with self.subTest(testfile=testfile, expected=expected): + filename = os.path.join(SAMPLE_FOLDER, testfile) + excluded_attrs = { + 'bitdepth', 'bitrate', 'channels', 'duration', 'samplerate' + } + tag = TinyTag.get(filename, tags=True, duration=False) + results = { + key: val for key, val in tag.__dict__.items() + if not key.startswith('_') and key != 'filename' + and val is not None and not isinstance(val, Images) + } + filtered_expected = { + key: val for key, val in expected.items() + if key not in excluded_attrs + } + self.compare_tag(results, filtered_expected, filename) + assert tag.images.any is None + + def test_file_reading_duration(self) -> None: + for testfile, expected in TEST_FILES.items(): + with self.subTest(testfile=testfile, expected=expected): + filename = os.path.join(SAMPLE_FOLDER, testfile) + allowed_attrs = { + 'bitdepth', 'bitrate', 'channels', 'duration', + 'filesize', 'samplerate'} + tag = TinyTag.get(filename, tags=False, duration=True) + results = { + key: val for key, val in tag.__dict__.items() + if not key.startswith('_') and key != 'filename' + and val is not None and not isinstance(val, Images) + and not isinstance(val, OtherFields) + } + filtered_expected = { + key: val for key, val in expected.items() + if key in allowed_attrs + } + self.compare_tag(results, filtered_expected, filename) + assert tag.images.any is None + + def test_pathlib_compatibility(self) -> None: + testfile = next(iter(TEST_FILES.keys())) + filename = Path(SAMPLE_FOLDER) / testfile + TinyTag.get(filename) + self.assertTrue(TinyTag.is_supported(filename)) + + def test_file_obj_compatibility(self) -> None: + testfile = next(iter(TEST_FILES.keys())) + filename = os.path.join(SAMPLE_FOLDER, testfile) + with open(filename, 'rb') as file_handle: + tag = TinyTag.get(file_obj=file_handle) + file_handle.seek(0) + tag_bytesio = TinyTag.get(file_obj=BytesIO(file_handle.read())) + self.assertEqual(tag.filesize, tag_bytesio.filesize) + + @skipIf( + system() == 'Windows' and python_implementation() == 'PyPy', + reason='PyPy on Windows not supported' + ) + def test_binary_path_compatibility(self) -> None: + binary_file_path = os.path.join( + SAMPLE_FOLDER, 'non_ascii_filename_äää.mp3').encode('utf-8') + tag = TinyTag.get(binary_file_path) + self.assertEqual(tag.samplerate, 44100) + self.assertEqual(tag.other['encoder_settings'], ['Lavf58.20.100']) + + def test_unsupported_extension(self) -> None: + bogus_file = os.path.join(SAMPLE_FOLDER, 'there_is_no_such_ext.bogus') + with self.assertRaises(UnsupportedFormatError) as context: + TinyTag.get(bogus_file) + self.assertIsInstance(context.exception, TinyTagException) + + def test_override_encoding(self) -> None: + chinese_id3 = os.path.join(SAMPLE_FOLDER, 'chinese_id3.mp3') + tag = TinyTag.get(chinese_id3, encoding='gbk') + self.assertEqual(tag.artist, '苏云') + self.assertEqual(tag.album, '角落之歌') + + def test_unsubclassed_tinytag_load(self) -> None: + tag = TinyTag() + tag._load(tags=True, duration=True) + self.assertFalse(tag._tags_parsed) + + def test_unsubclassed_tinytag_duration(self) -> None: + tag = TinyTag() + with self.assertRaises(NotImplementedError): + tag._determine_duration(None) # type: ignore + + def test_unsubclassed_tinytag_parse_tag(self) -> None: + tag = TinyTag() + with self.assertRaises(NotImplementedError): + tag._parse_tag(None) # type: ignore + + def test_invalid_file(self) -> None: + for path, cls in ( + ('silence-44-s-v1.mp3', _Flac), + ('flac1.5sStereo.flac', _Ogg), + ('flac1.5sStereo.flac', _Wave), + ('flac1.5sStereo.flac', _Wma), + ('ilbm.aiff', _Aiff), + ): + with self.subTest(path=path, cls=cls): + with self.assertRaises(ParseError) as context: + cls.get(os.path.join(SAMPLE_FOLDER, path)) + self.assertIsInstance(context.exception, TinyTagException) + + def test_image_loading(self) -> None: + for path, expected_size, desc in ( + ('image-text-encoding.mp3', 5708, 'cover'), + ('id3v22_with_image.mp3', 1220, 'some image ë'), + ('mpeg4_with_image.m4a', 1220, None), + ('flac_with_image.flac', 1220, 'some image ë'), + ('wav_with_image.wav', 4627, 'some image ë'), + ('aiff_with_image.aiff', 1220, 'some image ë'), + ): + with self.subTest(path=path, expected_size=expected_size, + desc=desc): + tag = TinyTag.get( + os.path.join(SAMPLE_FOLDER, path), image=True) + image = tag.images.any + manual_image = tag.images.front_cover + if manual_image is None: + manual_image = tag.images.other['generic'][0] + assert image is not None + assert manual_image is not None + self.assertIn(image.name, {'front_cover', 'generic'}) + assert image.data is not None + self.assertEqual(image.data, manual_image.data) + with self.assertWarns(DeprecationWarning): + self.assertEqual(image.data, tag.get_image()) + image_size = len(image.data) + self.assertEqual( + image_size, expected_size, + (f'Image is {image_size} bytes but should be ' + f'{expected_size} bytes') + ) + self.assertTrue( + image.data.startswith(b'\xff\xd8\xff\xe0'), + 'The image data must start with a jpeg header' + ) + self.assertEqual(image.mime_type, 'image/jpeg') + self.assertEqual(image.description, desc) + + def test_image_loading_other(self) -> None: + tag = TinyTag.get( + os.path.join(SAMPLE_FOLDER, 'ogg_with_image.ogg'), image=True) + image = tag.images.other['bright_colored_fish'][0] + assert image.data is not None + assert tag.images.any is not None + self.assertEqual(tag.images.any.data, image.data) + with self.assertWarns(DeprecationWarning): + self.assertEqual(image.data, tag.get_image()) + self.assertEqual(image.mime_type, 'image/jpeg') + self.assertEqual(image.name, 'bright_colored_fish') + self.assertEqual(image.description, 'some image ë') + self.assertEqual(len(image.data), 1220) + self.assertEqual( + str(image), + "Image(name='bright_colored_fish', data=b'\\xff\\xd8\\xff\\xe0" + "\\x00\\x10JFIF\\x00\\x01\\x01\\x01\\x00H\\x00H\\x00\\x00\\xff" + "\\xe2\\x02\\xb0ICC_PROFILE\\x00\\x01\\x01\\x00\\x00\\x02" + "\\xa0lcm..', mime_type='image/jpeg', description='some image ë')" + ) + + def test_mp3_utf_8_invalid_string(self) -> None: + tag = TinyTag.get( + os.path.join(SAMPLE_FOLDER, 'utf-8-id3v2-invalid-string.mp3')) + # the title used to be Gran dia, but I replaced the first byte with + # 0xFF, which should be ignored here + self.assertEqual(tag.title, '�ran día') + + def test_detect_magic_headers(self) -> None: + for testfile, expected in ( + ('detect_mp3_id3.x', _ID3), + ('detect_mp3_fffb.x', _ID3), + ('detect_ogg_flac.x', _Ogg), + ('detect_ogg_opus.x', _Ogg), + ('detect_ogg_vorbis.x', _Ogg), + ('detect_wav.x', _Wave), + ('detect_flac.x', _Flac), + ('detect_wma.x', _Wma), + ('detect_mp4_m4a.x', _MP4), + ('detect_aiff.x', _Aiff), + ): + with self.subTest(testfile=testfile, expected=expected): + filename = os.path.join(SAMPLE_FOLDER, testfile) + tag = TinyTag.get(filename) + self.assertIsInstance(tag, expected) + + def test_show_hint_for_wrong_usage(self) -> None: + with self.assertRaises(ValueError) as context: + TinyTag.get() + self.assertIsInstance(context.exception, ValueError) + self.assertEqual( + str(context.exception), + 'Either filename or file_obj argument is required' + ) + + def test_deprecations(self) -> None: + file_path = os.path.join(SAMPLE_FOLDER, 'flac_with_image.flac') + with self.assertWarns(DeprecationWarning): + tag = TinyTag.get( + filename=file_path, image=True, ignore_errors=True) + with self.assertWarns(DeprecationWarning): + tag = TinyTag.get( + filename=file_path, image=True, ignore_errors=False) + with self.assertWarns(DeprecationWarning): + assert tag.audio_offset is None + with self.assertWarns(DeprecationWarning): + self.assertEqual(str(tag.extra), "{'url': 'https://example.com'}") + with self.assertWarns(DeprecationWarning): + assert tag.images.any is not None + self.assertEqual(tag.get_image(), tag.images.any.data) + + def test_str_vars(self) -> None: + tag = TinyTag.get( + os.path.join(SAMPLE_FOLDER, 'flac_with_image.flac'), image=True) + vars_str = str(vars(tag)) + self.assertIn( + "flac_with_image.flac', 'filesize': 2824, 'duration': 0.1, " + "'channels': 1, 'bitrate': 225.92, " + "'bitdepth': 16, 'samplerate': 44100, 'artist': 'artist 1', " + "'albumartist': None, 'composer': None, 'album': 'album 1', " + "'disc': None, 'disc_total': None, 'title': None, 'track': None, " + "'track_total': None, 'genre': 'genre 1', 'year': None, " + "'comment': None, 'images': None: + tag = TinyTag.get( + os.path.join(SAMPLE_FOLDER, 'flac_with_image.flac'), image=True) + self.assertTrue(str(tag.as_dict()).endswith( + "flac_with_image.flac', 'filesize': 2824, 'duration': 0.1, " + "'channels': 1, 'bitrate': 225.92, " + "'bitdepth': 16, 'samplerate': 44100, 'artist': ['artist 1', " + "'artist 2', 'artist 3'], 'album': ['album 1', 'album 2'], " + "'genre': ['genre 1', 'genre 2'], 'url': ['https://example.com']}" + )) + self.assertEqual( + str(tag.images.as_dict()), + "{'front_cover': [Image(name='front_cover', data=b'\\xff\\xd8\\xff" + "\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x01\\x00H\\x00H\\x00\\x00" + "\\xff\\xe2\\x02\\xb0ICC_PROFILE\\x00\\x01\\x01\\x00\\x00\\x02" + "\\xa0lcm..', mime_type='image/jpeg', description='some image ë')]" + ", 'bright_colored_fish': [Image(name='bright_colored_fish', " + "data=b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x01" + "\\x00H\\x00H\\x00\\x00\\xff\\xe2\\x02\\xb0ICC_PROFILE\\x00\\x01" + "\\x01\\x00\\x00\\x02\\xa0lcm..', mime_type='image/jpeg', " + "description='some image ë')]}" + ) diff --git a/tinytag/tests/test_cli.py b/tinytag/tests/test_cli.py index 82cffd2..b8cce31 100644 --- a/tinytag/tests/test_cli.py +++ b/tinytag/tests/test_cli.py @@ -1,117 +1,122 @@ -import json -import os -import sys -from subprocess import check_output, CalledProcessError -from tempfile import NamedTemporaryFile - -import pytest - -project_folder = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -sample_folder = os.path.join(project_folder, 'tinytag', 'tests', 'samples') -mp3_with_image = os.path.join(sample_folder, 'id3image_without_description.mp3') -bogus_file = os.path.join(sample_folder, 'there_is_no_such_ext.bogus') -assert os.path.exists(mp3_with_image) - -tinytag_attributes = {'album', 'albumartist', 'artist', 'audio_offset', 'bitrate', 'channels', - 'comment', 'composer', 'disc', 'disc_total', 'duration', 'extra', 'filesize', - 'filename', 'genre', 'samplerate', 'title', 'track', 'track_total', 'year'} - - -def run_cli(args): - output = check_output('python -m tinytag ' + args, cwd=project_folder, shell=True) - return output.decode('utf-8') - - -def file_size(filename): - return os.stat(filename).st_size - - -@pytest.mark.xfail(raises=CalledProcessError) -def test_wrong_params(): - assert 'tinytag [options] 0 - with open(temp_file.name, 'rb') as fh: - image_data = fh.read(20) - assert image_data.startswith(b'\xff') - assert b'JFIF' in image_data - - -@pytest.mark.skipif(sys.platform == "win32", reason="NamedTemporaryFile cant be reopened on windows") -def test_save_image_short_opt(): - temp_file = NamedTemporaryFile() - assert file_size(temp_file.name) == 0 - run_cli('-i %s %s' % (temp_file.name, mp3_with_image)) - assert file_size(temp_file.name) > 0 +# SPDX-FileCopyrightText: 2020-2024 tinytag Contributors +# SPDX-License-Identifier: MIT +# pylint: disable=missing-class-docstring,missing-function-docstring +# pylint: disable=missing-module-docstring -@pytest.mark.skipif(sys.platform == "win32", reason="NamedTemporaryFile cant be reopened on windows") -def test_save_image_bulk(): - temp_file = NamedTemporaryFile(suffix='.jpg') - temp_file_no_ext = temp_file.name[:-4] - assert file_size(temp_file.name) == 0 - run_cli('-i %s %s %s %s' % (temp_file.name, mp3_with_image, mp3_with_image, mp3_with_image)) - assert file_size(temp_file.name) == 0 - assert file_size(temp_file_no_ext + '00000.jpg') > 0 - assert file_size(temp_file_no_ext + '00001.jpg') > 0 - assert file_size(temp_file_no_ext + '00002.jpg') > 0 - - -def test_meta_data_output_default_json(): - output = run_cli(mp3_with_image) - data = json.loads(output) - assert data - assert set(data.keys()) == tinytag_attributes - - -def test_meta_data_output_format_json(): - output = run_cli('-f json ' + mp3_with_image) - data = json.loads(output) - assert data - assert set(data.keys()) == tinytag_attributes - - -def test_meta_data_output_format_csv(): - output = run_cli('-f csv ' + mp3_with_image) - lines = [line for line in output.split(os.linesep) if line] - assert all(',' in line for line in lines) - attributes = set(line.split(',')[0] for line in lines) - assert set(attributes) == tinytag_attributes - - -def test_meta_data_output_format_tsv(): - output = run_cli('-f tsv ' + mp3_with_image) - lines = [line for line in output.split(os.linesep) if line] - assert all('\t' in line for line in lines) - attributes = set(line.split('\t')[0] for line in lines) - assert set(attributes) == tinytag_attributes - - -def test_meta_data_output_format_tabularcsv(): - output = run_cli('-f tabularcsv ' + mp3_with_image) - header, line, rest = output.split(os.linesep) - assert set(header.split(',')) == tinytag_attributes - - -@pytest.mark.xfail(raises=CalledProcessError) -def test_fail_on_unsupported_file(): - run_cli(bogus_file) - - -def test_fail_skip_unsupported_file_long_opt(): - run_cli('--skip-unsupported ' + bogus_file) - +import json +import os -def test_fail_skip_unsupported_file_short_opt(): - run_cli('-s ' + bogus_file) +from subprocess import check_output, CalledProcessError, STDOUT +from sys import executable +from tempfile import NamedTemporaryFile +from unittest import TestCase + +PROJECT_FOLDER = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +SAMPLE_FOLDER = os.path.join(PROJECT_FOLDER, 'tinytag', 'tests', 'samples') +MP3_WITH_IMG = os.path.join(SAMPLE_FOLDER, 'image-text-encoding.mp3') +BOGUS_FILE = os.path.join(SAMPLE_FOLDER, 'there_is_no_such_ext.bogus') +TINYTAG_ATTRIBUTES = { + 'album', 'albumartist', 'artist', 'bitdepth', 'bitrate', + 'channels', 'comment', 'composer', 'disc', 'disc_total', 'duration', + 'filesize', 'filename', 'genre', 'samplerate', 'title', 'track', + 'track_total', 'year' +} + + +class TestCLI(TestCase): + + @staticmethod + def run_cli(args: str) -> str: + debug_env = str(os.environ.pop("TINYTAG_DEBUG", None)) + output = check_output( + f'{executable} -m tinytag ' + args, cwd=PROJECT_FOLDER, + shell=True, stderr=STDOUT) + if debug_env: + os.environ["TINYTAG_DEBUG"] = debug_env + return output.decode('utf-8') + + def test_wrong_params(self) -> None: + with self.assertRaises(CalledProcessError) as excinfo: + self.run_cli('-lol') + output = excinfo.exception.stdout.strip() + self.assertEqual( + output, b"-lol: [Errno 2] No such file or directory: '-lol'") + + def test_print_help(self) -> None: + self.assertIn('tinytag [options] None: + with NamedTemporaryFile() as temp_file: + self.assertEqual(os.path.getsize(temp_file.name), 0) + self.run_cli(f'--save-image {temp_file.name} {MP3_WITH_IMG}') + self.assertGreater(os.path.getsize(temp_file.name), 0) + with open(temp_file.name, 'rb') as file_handle: + image_data = file_handle.read(20) + self.assertTrue(image_data.startswith(b'\xff')) + self.assertIn(b'JFIF', image_data) + + def test_save_image_short_opt(self) -> None: + with NamedTemporaryFile() as temp_file: + self.assertEqual(os.path.getsize(temp_file.name), 0) + self.run_cli(f'-i {temp_file.name} {MP3_WITH_IMG}') + self.assertGreater(os.path.getsize(temp_file.name), 0) + + def test_save_image_bulk(self) -> None: + temp_name = None + with NamedTemporaryFile(suffix='.jpg') as temp_file: + temp_name = temp_file.name + temp_name_no_ext = temp_name[:-4] + self.assertEqual(os.path.getsize(temp_name), 0) + self.run_cli( + f'-i {temp_name} {MP3_WITH_IMG} {MP3_WITH_IMG} {MP3_WITH_IMG}') + self.assertFalse(os.path.isfile(temp_name)) + self.assertGreater(os.path.getsize(temp_name_no_ext + '00000.jpg'), 0) + self.assertGreater(os.path.getsize(temp_name_no_ext + '00001.jpg'), 0) + self.assertGreater(os.path.getsize(temp_name_no_ext + '00002.jpg'), 0) + + def test_meta_data_output_default_json(self) -> None: + output = self.run_cli(MP3_WITH_IMG) + data = json.loads(output) + self.assertTrue(data) + self.assertTrue(set(data.keys()).issubset(TINYTAG_ATTRIBUTES)) + + def test_meta_data_output_format_json(self) -> None: + output = self.run_cli('-f json ' + MP3_WITH_IMG) + data = json.loads(output) + self.assertTrue(data) + self.assertTrue(set(data.keys()).issubset(TINYTAG_ATTRIBUTES)) + + def test_meta_data_output_format_csv(self) -> None: + output = self.run_cli('-f csv ' + MP3_WITH_IMG) + lines = [line for line in output.split(os.linesep) if line] + self.assertTrue(all(',' in line for line in lines)) + attributes = set(line.split(',')[0] for line in lines) + self.assertTrue(set(attributes).issubset(TINYTAG_ATTRIBUTES)) + + def test_meta_data_output_format_tsv(self) -> None: + output = self.run_cli('-f tsv ' + MP3_WITH_IMG) + lines = [line for line in output.split(os.linesep) if line] + self.assertTrue(all('\t' in line for line in lines)) + attributes = set(line.split('\t')[0] for line in lines) + self.assertTrue(set(attributes).issubset(TINYTAG_ATTRIBUTES)) + + def test_meta_data_output_format_tabularcsv(self) -> None: + output = self.run_cli('-f tabularcsv ' + MP3_WITH_IMG) + header, _line, _rest = output.split(os.linesep) + self.assertTrue(set(header.split(',')).issubset(TINYTAG_ATTRIBUTES)) + + def test_meta_data_output_format_invalid(self) -> None: + output = self.run_cli('-f invalid ' + MP3_WITH_IMG) + self.assertFalse(output) + + def test_fail_on_unsupported_file(self) -> None: + with self.assertRaises(CalledProcessError): + self.run_cli(BOGUS_FILE) + + def test_fail_skip_unsupported_file_long_opt(self) -> None: + self.run_cli('--skip-unsupported ' + BOGUS_FILE) + + def test_fail_skip_unsupported_file_short_opt(self) -> None: + self.run_cli('-s ' + BOGUS_FILE) diff --git a/tinytag/tinytag.py b/tinytag/tinytag.py index 26d35d4..e901d5a 100644 --- a/tinytag/tinytag.py +++ b/tinytag/tinytag.py @@ -1,203 +1,267 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: 2014-2025 tinytag Contributors +# SPDX-License-Identifier: MIT -# tinytag - an audio meta info reader -# Copyright (c) 2014-2018 Tom Wallroth -# -# Sources on github: -# http://github.com/devsnd/tinytag/ +# tinytag - an audio file metadata reader +# http://github.com/tinytag/tinytag # MIT License -# Copyright (c) 2014-2019 Tom Wallroth +# Copyright (c) 2014-2025 Tom Wallroth, Mat (mathiascode), et al. -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - - -from __future__ import print_function - -import aifc -import json -import operator -from chunk import Chunk -from collections import OrderedDict, defaultdict -try: - from collections.abc import MutableMapping -except ImportError: - from collections import MutableMapping -import codecs -from functools import reduce -import struct -import os -import io -import sys +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +"""Audio file metadata reader.""" + +from __future__ import annotations +from binascii import a2b_base64 from io import BytesIO -import re - -DEBUG = os.environ.get('DEBUG', False) # some of the parsers can print debug info - - -class TinyTagException(LookupError): # inherit LookupError for backwards compat - pass - - -def _read(fh, nbytes): # helper function to check if we haven't reached EOF - b = fh.read(nbytes) - if len(b) < nbytes: - raise TinyTagException('Unexpected end of file') - return b - - -def stderr(*args): - sys.stderr.write('%s\n' % ' '.join(repr(arg) for arg in args)) - sys.stderr.flush() - - -def _bytes_to_int_le(b): - fmt = {1: ' None: + self.filename: str | None = None + self.filesize = 0 + + self.duration: float | None = None + self.channels: int | None = None + self.bitrate: float | None = None + self.bitdepth: int | None = None + self.samplerate: int | None = None + + self.artist: str | None = None + self.albumartist: str | None = None + self.composer: str | None = None + self.album: str | None = None + self.disc: int | None = None + self.disc_total: int | None = None + self.title: str | None = None + self.track: int | None = None + self.track_total: int | None = None + self.genre: str | None = None + self.year: str | None = None + self.comment: str | None = None + + self.images = Images() + self.other: _StringListDict = OtherFields() + + self._filehandler: BinaryIO | None = None + self._default_encoding: str | None = None # override for some formats + self._parse_duration = True + self._parse_tags = True self._load_image = False - self._image_data = None - self._ignore_errors = ignore_errors - - def as_dict(self): - return {k: v for k, v in self.__dict__.items() if not k.startswith('_')} + self._tags_parsed = False + self.__dict__: dict[str, str | float | Images | OtherFields | None] @classmethod - def is_supported(cls, filename): - return cls._get_parser_for_filename(filename) is not None + def get(cls, + filename: bytes | str | PathLike[Any] | None = None, + file_obj: BinaryIO | None = None, + tags: bool = True, + duration: bool = True, + image: bool = False, + encoding: str | None = None, + ignore_errors: bool | None = None) -> TinyTag: + """Return a tag object for an audio file.""" + should_close_file = file_obj is None + filename_str = None + if filename: + if should_close_file: + # pylint: disable=consider-using-with + file_obj = open(filename, 'rb') + filename_str = fsdecode(filename) + if file_obj is None: + raise ValueError( + 'Either filename or file_obj argument is required') + if ignore_errors is not None: + # pylint: disable=import-outside-toplevel + from warnings import warn + warn('ignore_errors argument is obsolete, and will be removed in ' + 'the future', DeprecationWarning, stacklevel=2) + try: + # pylint: disable=protected-access + file_obj.seek(0, SEEK_END) + filesize = file_obj.tell() + file_obj.seek(0) + parser_class = cls._get_parser_class(filename_str, file_obj) + tag = parser_class() + tag._filehandler = file_obj + tag._default_encoding = encoding + tag.filename = filename_str + tag.filesize = filesize + if filesize > 0: + try: + tag._load(tags=tags, duration=duration, image=image) + except Exception as exc: + raise ParseError(exc) from exc + return tag + finally: + if should_close_file: + file_obj.close() - def get_image(self): - return self._image_data + @classmethod + def is_supported(cls, filename: bytes | str | PathLike[Any]) -> bool: + """Check if a specific file is supported based on its file + extension.""" + filename_str = fsdecode(filename) + return cls._get_parser_for_filename(filename_str) is not None + + def as_dict(self) -> dict[str, str | float | list[str]]: + """Return a flat dictionary representation of available + metadata.""" + fields: dict[str, str | float | list[str]] = {} + for key, value in self.__dict__.items(): + if key.startswith('_'): + continue + if isinstance(value, Images): + continue + if not isinstance(value, OtherFields): + if value is None: + continue + if key != 'filename' and isinstance(value, str): + fields[key] = [value] + else: + fields[key] = value + continue + for other_key, other_values in value.items(): + other_fields = fields.get(other_key) + if not isinstance(other_fields, list): + other_fields = fields[other_key] = [] + other_fields += other_values + return fields @classmethod - def _get_parser_for_filename(cls, filename): - mapping = { - (b'.mp3',): ID3, - (b'.oga', b'.ogg', b'.opus'): Ogg, - (b'.wav',): Wave, - (b'.flac',): Flac, - (b'.wma',): Wma, - (b'.m4b', b'.m4a', b'.mp4'): MP4, - (b'.aiff', b'.aifc', b'.aif', b'.afc'): Aiff, - } - if not isinstance(filename, bytes): # convert filename to binary - filename = filename.encode('ASCII', errors='ignore').lower() - for ext, tagclass in mapping.items(): + def _get_parser_for_filename(cls, filename: str) -> type[TinyTag] | None: + if cls._file_extension_mapping is None: + cls._file_extension_mapping = { + ('.mp1', '.mp2', '.mp3'): _ID3, + ('.oga', '.ogg', '.opus', '.spx'): _Ogg, + ('.wav',): _Wave, + ('.flac',): _Flac, + ('.wma',): _Wma, + ('.m4b', '.m4a', '.m4r', '.m4v', '.mp4', + '.aax', '.aaxc'): _MP4, + ('.aiff', '.aifc', '.aif', '.afc'): _Aiff, + } + filename = filename.lower() + for ext, tagclass in cls._file_extension_mapping.items(): if filename.endswith(ext): return tagclass + return None @classmethod - def _get_parser_for_file_handle(cls, fh): + def _get_parser_for_file_handle( + cls, + filehandle: BinaryIO + ) -> type[TinyTag] | None: # https://en.wikipedia.org/wiki/List_of_file_signatures - magic_bytes_mapping = { - b'^ID3': ID3, - b'^\xff\xfb': ID3, - b'^OggS': Ogg, - b'^RIFF....WAVE': Wave, - b'^fLaC': Flac, - b'^\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C': Wma, - b'....ftypM4A': MP4, # https://www.file-recovery.com/m4a-signature-format.htm - b'\xff\xf1': MP4, # https://www.garykessler.net/library/file_sigs.html - b'^FORM....AIFF': Aiff, - b'^FORM....AIFC': Aiff, - } - header = fh.peek(max(len(sig) for sig in magic_bytes_mapping)) - for magic, parser in magic_bytes_mapping.items(): - if re.match(magic, header): - return parser + header = filehandle.read(35) + filehandle.seek(0) + if header.startswith(b'ID3') or header.startswith(b'\xff\xfb'): + return _ID3 + if header.startswith(b'fLaC'): + return _Flac + if ((header[4:8] == b'ftyp' + and header[8:11] in {b'M4A', b'M4B', b'aax'}) + or b'\xff\xf1' in header): + return _MP4 + if (header.startswith(b'OggS') + and (header[29:33] == b'FLAC' or header[29:35] == b'vorbis' + or header[28:32] == b'Opus' or header[29:34] == b'Speex')): + return _Ogg + if header.startswith(b'RIFF') and header[8:12] == b'WAVE': + return _Wave + if header.startswith(b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00' + b'\xAA\x00\x62\xCE\x6C'): + return _Wma + if header.startswith(b'FORM') and header[8:12] in {b'AIFF', b'AIFC'}: + return _Aiff + return None @classmethod - def get_parser_class(cls, filename, filehandle): - if cls != TinyTag: # if `get` is invoked on TinyTag, find parser by ext - return cls # otherwise use the class on which `get` was invoked - parser_class = cls._get_parser_for_filename(filename) - if parser_class is not None: - return parser_class + def _get_parser_class( + cls, + filename: str | None = None, + filehandle: BinaryIO | None = None + ) -> type[TinyTag]: + if cls != TinyTag: + return cls + if filename: + parser_class = cls._get_parser_for_filename(filename) + if parser_class is not None: + return parser_class # try determining the file type by magic byte header - parser_class = cls._get_parser_for_file_handle(filehandle) - if parser_class is not None: - return parser_class - raise TinyTagException('No tag reader found to support filetype! ') - - @classmethod - def get(cls, filename, tags=True, duration=True, image=False, ignore_errors=False): - try: # cast pathlib.Path to str - import pathlib - if isinstance(filename, pathlib.Path): - filename = str(filename.absolute()) - except ImportError: - pass - else: - filename = os.path.expanduser(filename) - size = os.path.getsize(filename) - if not size > 0: - return TinyTag(None, 0) - with io.open(filename, 'rb') as af: - parser_class = cls.get_parser_class(filename, af) - tag = parser_class(af, size, ignore_errors=ignore_errors) - tag.load(tags=tags, duration=duration, image=image) - tag.extra = dict(tag.extra) # turn default dict into dict so that it can throw KeyError - return tag - - def __str__(self): - return json.dumps(OrderedDict(sorted(self.as_dict().items()))) - - def __repr__(self): - return str(self) - - def load(self, tags, duration, image=False): + if filehandle: + parser_class = cls._get_parser_for_file_handle(filehandle) + if parser_class is not None: + return parser_class + raise UnsupportedFormatError( + 'No tag reader found to support file type') + + def _load(self, tags: bool, duration: bool, image: bool = False) -> None: + self._parse_tags = tags + self._parse_duration = duration self._load_image = image + if self._filehandler is None: + return if tags: self._parse_tag(self._filehandler) if duration: @@ -205,273 +269,532 @@ def load(self, tags, duration, image=False): self._filehandler.seek(0) self._determine_duration(self._filehandler) - def _set_field(self, fieldname, bytestring, transfunc=None): - """convienience function to set fields of the tinytag by name. - the payload (bytestring) can be changed using the transfunc""" - write_dest = self # write into the TinyTag by default - get_func = getattr - set_func = setattr - is_extra = fieldname.startswith('extra.') # but if it's marked as extra field - if is_extra: - fieldname = fieldname[6:] - write_dest = self.extra # write into the extra field instead - get_func = operator.getitem - set_func = operator.setitem - if get_func(write_dest, fieldname): # do not overwrite existing data + def _set_field(self, fieldname: str, value: str | float, + check_conflict: bool = True) -> None: + if fieldname.startswith(self._OTHER_PREFIX): + fieldname = fieldname[len(self._OTHER_PREFIX):] + if check_conflict and fieldname in self.__dict__: + fieldname = '_' + fieldname + if fieldname not in self.other: + self.other[fieldname] = [] + self.other[fieldname].append(str(value)) + if _DEBUG: + print(f'Adding value "{value} to field "{fieldname}"') return - value = bytestring if transfunc is None else transfunc(bytestring) - if DEBUG: - stderr('Setting field "%s" to "%s"' % (fieldname, value)) - if fieldname == 'genre': - genre_id = 255 - if value.isdigit(): # funky: id3v1 genre hidden in a id3v2 field - genre_id = int(value) - else: # funkier: the TCO may contain genres in parens, e.g. '(13)' - genre_in_parens = re.match('^\\((\\d+)\\)$', value) - if genre_in_parens: - genre_id = int(genre_in_parens.group(1)) - if 0 <= genre_id < len(ID3.ID3V1_GENRES): - value = ID3.ID3V1_GENRES[genre_id] - if fieldname in ("track", "disc"): - if type(value).__name__ in ('str', 'unicode') and '/' in value: - current, total = value.split('/')[:2] - set_func(write_dest, "%s_total" % fieldname, total) - else: - # Converting 'track', 'disk' to string for type consistency. - current = str(value) if isinstance(value, int) else value - set_func(write_dest, fieldname, current) - elif fieldname in ("track_total", "disc_total") and isinstance(value, int): - # Converting to string 'track_total', 'disc_total' for type consistency. - set_func(write_dest, fieldname, str(value)) - else: - set_func(write_dest, fieldname, value) + old_value = self.__dict__.get(fieldname) + new_value = value + if isinstance(new_value, str): + # First value goes in tag, others in tag.other + values = new_value.split('\x00') + for index, i_value in enumerate(values): + if index or old_value and i_value != old_value: + self._set_field( + self._OTHER_PREFIX + fieldname, i_value, + check_conflict=False) + continue + new_value = i_value + if old_value: + return + elif not new_value and old_value: + # Prioritize non-zero integer values + return + if _DEBUG: + print(f'Setting field "{fieldname}" to "{new_value!r}"') + self.__dict__[fieldname] = new_value - def _determine_duration(self, fh): - raise NotImplementedError() + def _determine_duration(self, fh: BinaryIO) -> None: + raise NotImplementedError - def _parse_tag(self, fh): - raise NotImplementedError() + def _parse_tag(self, fh: BinaryIO) -> None: + raise NotImplementedError - def update(self, other): + def _update(self, other: TinyTag) -> None: # update the values of this tag with the values from another tag - for key in ['track', 'track_total', 'title', 'artist', - 'album', 'albumartist', 'year', 'duration', - 'genre', 'disc', 'disc_total', 'comment', 'composer']: - if not getattr(self, key) and getattr(other, key): - setattr(self, key, getattr(other, key)) + for key, value in other.__dict__.items(): + if key.startswith('_'): + continue + if isinstance(value, OtherFields): + for other_key, other_values in other.other.items(): + for other_value in other_values: + self._set_field( + self._OTHER_PREFIX + other_key, other_value, + check_conflict=False) + elif isinstance(value, Images): + self.images._update(value) # pylint: disable=protected-access + elif value is not None: + self._set_field(key, value) @staticmethod - def _unpad(s): - # strings in mp3 and asf *may* be terminated with a zero byte at the end - return s.replace('\x00', '') - - -class MP4(TinyTag): - # see: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html - # and: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html - - class Parser: - # https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW34 - ATOM_DECODER_BY_TYPE = { - 0: lambda x: x, # 'reserved', - 1: lambda x: codecs.decode(x, 'utf-8', 'replace'), # UTF-8 - 2: lambda x: codecs.decode(x, 'utf-16', 'replace'), # UTF-16 - 3: lambda x: codecs.decode(x, 's/jis', 'replace'), # S/JIS - # 16: duration in millis - 13: lambda x: x, # JPEG - 14: lambda x: x, # PNG - 21: lambda x: struct.unpack('>b', x)[0], # BE Signed int - 22: lambda x: struct.unpack('>B', x)[0], # BE Unsigned int - 23: lambda x: struct.unpack('>f', x)[0], # BE Float32 - 24: lambda x: struct.unpack('>d', x)[0], # BE Float64 - # 27: lambda x: x, # BMP - # 28: lambda x: x, # QuickTime Metadata atom - 65: lambda x: struct.unpack('b', x)[0], # 8-bit Signed int - 66: lambda x: struct.unpack('>h', x)[0], # BE 16-bit Signed int - 67: lambda x: struct.unpack('>i', x)[0], # BE 32-bit Signed int - 74: lambda x: struct.unpack('>q', x)[0], # BE 64-bit Signed int - 75: lambda x: struct.unpack('B', x)[0], # 8-bit Unsigned int - 76: lambda x: struct.unpack('>H', x)[0], # BE 16-bit Unsigned int - 77: lambda x: struct.unpack('>I', x)[0], # BE 32-bit Unsigned int - 78: lambda x: struct.unpack('>Q', x)[0], # BE 64-bit Unsigned int - } - - @classmethod - def make_data_atom_parser(cls, fieldname): - def parse_data_atom(data_atom): - data_type = struct.unpack('>I', data_atom[:4])[0] - conversion = cls.ATOM_DECODER_BY_TYPE.get(data_type) - if conversion is None: - stderr('Cannot convert data type: %s' % data_type) - return {} # don't know how to convert data atom - # skip header & null-bytes, convert rest - return {fieldname: conversion(data_atom[8:])} - return parse_data_atom - - @classmethod - def make_number_parser(cls, fieldname1, fieldname2): - def _(data_atom): - number_data = data_atom[8:14] - numbers = struct.unpack('>HHH', number_data) - # for some reason the first number is always irrelevant. - return {fieldname1: numbers[1], fieldname2: numbers[2]} - return _ - - @classmethod - def parse_id3v1_genre(cls, data_atom): - # dunno why the genre is offset by -1 but that's how mutagen does it - idx = struct.unpack('>H', data_atom[8:])[0] - 1 - if idx < len(ID3.ID3V1_GENRES): - return {'genre': ID3.ID3V1_GENRES[idx]} - return {'genre': None} - - @classmethod - def parse_audio_sample_entry(cls, data): - # this atom also contains the esds atom: - # https://ffmpeg.org/doxygen/0.6/mov_8c-source.html - # http://xhelmboyx.tripod.com/formats/mp4-layout.txt - datafh = BytesIO(data) - datafh.seek(16, os.SEEK_CUR) # jump over version and flags - channels = struct.unpack('>H', datafh.read(2))[0] - datafh.seek(2, os.SEEK_CUR) # jump over bit_depth - datafh.seek(2, os.SEEK_CUR) # jump over QT compr id & pkt size - sr = struct.unpack('>I', datafh.read(4))[0] - esds_atom_size = struct.unpack('>I', data[28:32])[0] - esds_atom = BytesIO(data[36:36 + esds_atom_size]) - # http://sasperger.tistory.com/103 - esds_atom.seek(22, os.SEEK_CUR) # jump over most data... - esds_atom.seek(4, os.SEEK_CUR) # jump over max bitrate - avg_br = struct.unpack('>I', esds_atom.read(4))[0] / 1000.0 # kbit/s - return {'channels': channels, 'samplerate': sr, 'bitrate': avg_br} - - @classmethod - def parse_mvhd(cls, data): - # http://stackoverflow.com/a/3639993/1191373 - walker = BytesIO(data) - version = struct.unpack('b', walker.read(1))[0] - walker.seek(3, os.SEEK_CUR) # jump over flags - if version == 0: # uses 32 bit integers for timestamps - walker.seek(8, os.SEEK_CUR) # jump over create & mod times - time_scale = struct.unpack('>I', walker.read(4))[0] - duration = struct.unpack('>I', walker.read(4))[0] - else: # version == 1: # uses 64 bit integers for timestamps - walker.seek(16, os.SEEK_CUR) # jump over create & mod times - time_scale = struct.unpack('>I', walker.read(4))[0] - duration = struct.unpack('>q', walker.read(8))[0] - return {'duration': float(duration) / time_scale} - - @classmethod - def debug_atom(cls, data): - stderr(data) # use this function to inspect atoms in an atom tree - return {} - - # The parser tree: Each key is an atom name which is traversed if existing. - # Leaves of the parser tree are callables which receive the atom data. - # callables return {fieldname: value} which is updates the TinyTag. - META_DATA_TREE = {b'moov': {b'udta': {b'meta': {b'ilst': { - # see: http://atomicparsley.sourceforge.net/mpeg-4files.html - b'\xa9alb': {b'data': Parser.make_data_atom_parser('album')}, - b'\xa9ART': {b'data': Parser.make_data_atom_parser('artist')}, - b'aART': {b'data': Parser.make_data_atom_parser('albumartist')}, - # b'cpil': {b'data': Parser.make_data_atom_parser('compilation')}, - b'\xa9cmt': {b'data': Parser.make_data_atom_parser('comment')}, - b'disk': {b'data': Parser.make_number_parser('disc', 'disc_total')}, - b'\xa9wrt': {b'data': Parser.make_data_atom_parser('composer')}, - b'\xa9day': {b'data': Parser.make_data_atom_parser('year')}, - b'\xa9gen': {b'data': Parser.make_data_atom_parser('genre')}, - b'gnre': {b'data': Parser.parse_id3v1_genre}, - b'\xa9nam': {b'data': Parser.make_data_atom_parser('title')}, - b'trkn': {b'data': Parser.make_number_parser('track', 'track_total')}, - }}}}} - - # see: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html - AUDIO_DATA_TREE = { - b'moov': { - b'mvhd': Parser.parse_mvhd, - b'trak': {b'mdia': {b"minf": {b"stbl": {b"stsd": {b'mp4a': - Parser.parse_audio_sample_entry - }}}}} - } + def _unpad(s: str) -> str: + # certain strings *may* be terminated with a zero byte at the end + return s.strip('\x00') + + def get_image(self) -> bytes | None: + """Deprecated, use 'images.any' instead.""" + from warnings import warn # pylint: disable=import-outside-toplevel + warn('get_image() is deprecated, and will be removed in the future. ' + "Use 'images.any' instead.", + DeprecationWarning, stacklevel=2) + image = self.images.any + return image.data if image is not None else None + + @property + def audio_offset(self) -> None: # pylint: disable=useless-return + """Obsolete.""" + from warnings import warn # pylint: disable=import-outside-toplevel + warn("'audio_offset' attribute is obsolete, and will be " + 'removed in the future', + DeprecationWarning, stacklevel=2) + return None + + @property + def extra(self) -> dict[str, str]: + """Deprecated, use 'other' instead.""" + from warnings import warn # pylint: disable=import-outside-toplevel + warn("'extra' attribute is deprecated, and will be " + "removed in the future. Use 'other' instead.", + DeprecationWarning, stacklevel=2) + extra_keys = {'copyright', 'initial_key', 'isrc', 'lyrics', 'url'} + return {k: v[0] for k, v in self.other.items() if k in extra_keys} + + +class Images: + """A class containing images embedded in an audio file.""" + _OTHER_PREFIX = 'other.' + + def __init__(self) -> None: + self.front_cover: Image | None = None + self.back_cover: Image | None = None + self.media: Image | None = None + + self.other: _ImageListDict = OtherImages() + self.__dict__: dict[str, Image | OtherImages | None] + + @property + def any(self) -> Image | None: + """Return a cover image. + If not present, fall back to any other available image. + """ + for value in self.__dict__.values(): + if isinstance(value, OtherImages): + for other_images in value.values(): + for image in other_images: + return image + continue + if value is not None: + return value + return None + + def as_dict(self) -> dict[str, list[Image]]: + """Return a flat dictionary representation of available images.""" + images: dict[str, list[Image]] = {} + for key, value in self.__dict__.items(): + if not isinstance(value, OtherImages): + if value is not None: + images[key] = [value] + continue + for other_key, other_values in value.items(): + other_images = images.get(other_key) + if not isinstance(other_images, list): + other_images = images[other_key] = [] + other_images += other_values + return images + + def _set_field(self, fieldname: str, value: Image) -> None: + old_value = self.__dict__.get(fieldname) + if fieldname.startswith(self._OTHER_PREFIX) or old_value is not None: + fieldname = fieldname[len(self._OTHER_PREFIX):] + other_values = self.other.get(fieldname, []) + other_values.append(value) + if _DEBUG: + print(f'Setting other image field "{fieldname}"') + self.other[fieldname] = other_values + return + if _DEBUG: + print(f'Setting image field "{fieldname}"') + self.__dict__[fieldname] = value + + def _update(self, other: Images) -> None: + for key, value in other.__dict__.items(): + if isinstance(value, OtherImages): + for other_key, other_values in value.items(): + for image_other in other_values: + self._set_field( + self._OTHER_PREFIX + other_key, image_other) + continue + if value is not None: + self._set_field(key, value) + + +class Image: + """A class representing an image embedded in an audio file.""" + def __init__(self, + name: str, + data: bytes, + mime_type: str | None = None) -> None: + self.name = name + self.data = data + self.mime_type = mime_type + self.description: str | None = None + + def __repr__(self) -> str: + variables = vars(self).copy() + data = variables.get("data") + if data is not None: + variables["data"] = (data[:45] + b'..') if len(data) > 45 else data + data_str = ', '.join(f'{k}={v!r}' for k, v in variables.items()) + return f'{type(self).__name__}({data_str})' + + +class OtherFields(_StringListDict): + """A dictionary containing additional metadata fields of an audio file.""" + + +class OtherImages(_ImageListDict): + """A dictionary containing additional images embedded in an audio file.""" + + +class _MP4(TinyTag): + """MP4 Audio Parser. + + https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html + https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html + """ + + _CUSTOM_FIELD_NAME_MAPPING = { + 'artists': 'artist', + 'conductor': 'other.conductor', + 'discsubtitle': 'other.set_subtitle', + 'initialkey': 'other.initial_key', + 'isrc': 'other.isrc', + 'language': 'other.language', + 'lyricist': 'other.lyricist', + 'media': 'other.media', + 'website': 'other.url', + 'license': 'other.license', + 'barcode': 'other.barcode', + 'catalognumber': 'other.catalog_number', } - - IMAGE_DATA_TREE = {b'moov': {b'udta': {b'meta': {b'ilst': { - b'covr': {b'data': Parser.make_data_atom_parser('_image_data')}, - }}}}} - - VERSIONED_ATOMS = {b'meta', b'stsd'} # those have an extra 4 byte header - FLAGGED_ATOMS = {b'stsd'} # these also have an extra 4 byte header - - def _determine_duration(self, fh): - self._traverse_atoms(fh, path=self.AUDIO_DATA_TREE) - - def _parse_tag(self, fh): - self._traverse_atoms(fh, path=self.META_DATA_TREE) - if self._load_image: # A bit inefficient, we rewind the file - self._filehandler.seek(0) # to parse it again for the image - self._traverse_atoms(fh, path=self.IMAGE_DATA_TREE) - - def _traverse_atoms(self, fh, path, stop_pos=None, curr_path=None): - header_size = 8 - atom_header = fh.read(header_size) - while len(atom_header) == header_size: - atom_size = struct.unpack('>I', atom_header[:4])[0] - header_size + _IMAGE_MIME_TYPES = { + 13: 'image/jpeg', + 14: 'image/png' + } + _UNPACK_FORMATS = { + 1: '>b', + 2: '>h', + 4: '>i', + 8: '>q' + } + _VERSIONED_ATOMS = {b'meta', b'stsd'} # those have an extra 4 byte header + _FLAGGED_ATOMS = {b'stsd'} # these also have an extra 4 byte header + _ILST_PATH = [b'ftyp', b'moov', b'udta', b'meta', b'ilst'] + + _audio_data_tree: _DataTreeDict | None = None + _meta_data_tree: _DataTreeDict | None = None + + def _determine_duration(self, fh: BinaryIO) -> None: + # https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html + if _MP4._audio_data_tree is None: + _MP4._audio_data_tree = { + b'moov': { + b'mvhd': _MP4._parse_mvhd, + b'trak': {b'mdia': {b"minf": {b"stbl": {b"stsd": { + b'mp4a': _MP4._parse_audio_sample_entry_mp4a, + b'alac': _MP4._parse_audio_sample_entry_alac + }}}}} + } + } + self._traverse_atoms(fh, path=_MP4._audio_data_tree) + + def _parse_tag(self, fh: BinaryIO) -> None: + # The parser tree: Each key is an atom name which is traversed if + # existing. Leaves of the parser tree are callables which receive + # the atom data. Callables return {fieldname: value} which is updates + # the TinyTag. + if _MP4._meta_data_tree is None: + _MP4._meta_data_tree = {b'moov': {b'udta': {b'meta': {b'ilst': { + # http://atomicparsley.sourceforge.net/mpeg-4files.html + # https://metacpan.org/dist/Image-ExifTool/source/lib/Image/ExifTool/QuickTime.pm#L3093 + b'\xa9ART': {b'data': _MP4._data_parser('artist')}, + b'\xa9alb': {b'data': _MP4._data_parser('album')}, + b'\xa9cmt': {b'data': _MP4._data_parser('comment')}, + b'\xa9con': {b'data': _MP4._data_parser('other.conductor')}, + b'\xa9day': {b'data': _MP4._data_parser('year')}, + b'\xa9des': {b'data': _MP4._data_parser('other.description')}, + b'\xa9dir': {b'data': _MP4._data_parser('other.director')}, + b'\xa9gen': {b'data': _MP4._data_parser('genre')}, + b'\xa9lyr': {b'data': _MP4._data_parser('other.lyrics')}, + b'\xa9mvn': {b'data': _MP4._data_parser('movement')}, + b'\xa9nam': {b'data': _MP4._data_parser('title')}, + b'\xa9pub': {b'data': _MP4._data_parser('other.publisher')}, + b'\xa9too': {b'data': _MP4._data_parser('other.encoded_by')}, + b'\xa9wrt': {b'data': _MP4._data_parser('composer')}, + b'aART': {b'data': _MP4._data_parser('albumartist')}, + b'cprt': {b'data': _MP4._data_parser('other.copyright')}, + b'desc': {b'data': _MP4._data_parser('other.description')}, + b'disk': {b'data': _MP4._nums_parser('disc', 'disc_total')}, + b'gnre': {b'data': _MP4._parse_id3v1_genre}, + b'trkn': {b'data': _MP4._nums_parser('track', 'track_total')}, + b'tmpo': {b'data': _MP4._data_parser('other.bpm')}, + b'covr': {b'data': _MP4._parse_cover_image}, + b'----': _MP4._parse_custom_field, + }}}}} + self._traverse_atoms(fh, path=_MP4._meta_data_tree) + + def _traverse_atoms(self, + fh: BinaryIO, + path: _DataTreeDict, + stop_pos: int | None = None, + curr_path: list[bytes] | None = None) -> None: + header_len = 8 + atom_header = fh.read(header_len) + while len(atom_header) == header_len: + atom_size = unpack('>I', atom_header[:4])[0] - header_len atom_type = atom_header[4:] if curr_path is None: # keep track how we traversed in the tree curr_path = [atom_type] if atom_size <= 0: # empty atom, jump to next one - atom_header = fh.read(header_size) + atom_header = fh.read(header_len) continue - if DEBUG: - stderr('%s pos: %d atom: %s len: %d' % (' ' * 4 * len(curr_path), fh.tell() - header_size, atom_type, atom_size + header_size)) - if atom_type in self.VERSIONED_ATOMS: # jump atom version for now - fh.seek(4, os.SEEK_CUR) - if atom_type in self.FLAGGED_ATOMS: # jump atom flags for now - fh.seek(4, os.SEEK_CUR) + if _DEBUG: + print(f'{" " * 4 * len(curr_path)} ' + f'pos: {fh.tell() - header_len} ' + f'atom: {atom_type!r} len: {atom_size + header_len}') + if atom_type in self._VERSIONED_ATOMS: # jump atom version for now + fh.seek(4, SEEK_CUR) + if atom_type in self._FLAGGED_ATOMS: # jump atom flags for now + fh.seek(4, SEEK_CUR) sub_path = path.get(atom_type, None) # if the path leaf is a dict, traverse deeper into the tree: - if issubclass(type(sub_path), MutableMapping): + if isinstance(sub_path, dict): atom_end_pos = fh.tell() + atom_size self._traverse_atoms(fh, path=sub_path, stop_pos=atom_end_pos, curr_path=curr_path + [atom_type]) # if the path-leaf is a callable, call it on the atom data elif callable(sub_path): for fieldname, value in sub_path(fh.read(atom_size)).items(): - if DEBUG: - stderr(' ' * 4 * len(curr_path), 'FIELD: ', fieldname) - if fieldname: + if _DEBUG: + print(' ' * 4 * len(curr_path), 'FIELD: ', fieldname) + if isinstance(value, Image): + if self._load_image: + # pylint: disable=protected-access + self.images._set_field( + fieldname[len('images.'):], value) + elif isinstance(value, list): + for subval in value: + self._set_field(fieldname, subval) + else: self._set_field(fieldname, value) + # unknown data atom, try to parse it + elif curr_path == self._ILST_PATH: + atom_end_pos = fh.tell() + atom_size + field_name = self._OTHER_PREFIX + atom_type.decode( + 'utf-8', 'replace') + fh.seek(-header_len, SEEK_CUR) + self._traverse_atoms( + fh, + path={atom_type: {b'data': self._data_parser(field_name)}}, + stop_pos=atom_end_pos, curr_path=curr_path + [atom_type]) # if no action was specified using dict or callable, jump over atom else: - fh.seek(atom_size, os.SEEK_CUR) + fh.seek(atom_size, SEEK_CUR) # check if we have reached the end of this branch: if stop_pos and fh.tell() >= stop_pos: return # return to parent (next parent node in tree) - atom_header = fh.read(header_size) # read next atom + atom_header = fh.read(header_len) # read next atom + + @classmethod + def _data_parser(cls, fieldname: str) -> Callable[[bytes], dict[str, str]]: + def _parse_data_atom(data_atom: bytes) -> dict[str, str]: + data_type = unpack('>I', data_atom[:4])[0] + data = data_atom[8:] + value = None + if data_type == 1: # UTF-8 string + value = data.decode('utf-8', 'replace') + elif data_type == 21: # BE signed integer + fmts = cls._UNPACK_FORMATS + data_len = len(data) + if data_len in fmts: + value = str(unpack(fmts[data_len], data)[0]) + if value: + return {fieldname: value} + return {} + return _parse_data_atom + + @classmethod + def _nums_parser( + cls, fieldname1: str, fieldname2: str + ) -> Callable[[bytes], dict[str, int]]: + def _parse_nums(data_atom: bytes) -> dict[str, int]: + number_data = data_atom[8:14] + numbers = unpack('>3H', number_data) + # for some reason the first number is always irrelevant. + return {fieldname1: numbers[1], fieldname2: numbers[2]} + return _parse_nums + + @classmethod + def _parse_id3v1_genre(cls, data_atom: bytes) -> dict[str, str]: + # dunno why genre is offset by -1 but that's how mutagen does it + idx = unpack('>H', data_atom[8:])[0] - 1 + result = {} + # pylint: disable=protected-access + if idx < len(_ID3._ID3V1_GENRES): + result['genre'] = _ID3._ID3V1_GENRES[idx] + return result + + @classmethod + def _parse_cover_image(cls, data_atom: bytes) -> dict[str, Image]: + data_type = unpack('>I', data_atom[:4])[0] + image = Image( + 'front_cover', data_atom[8:], cls._IMAGE_MIME_TYPES.get(data_type)) + return {'images.front_cover': image} + + @classmethod + def _read_extended_descriptor(cls, esds_atom: BinaryIO) -> None: + for _i in range(4): + if esds_atom.read(1) != b'\x80': + break + + @classmethod + def _parse_custom_field(cls, data: bytes) -> dict[str, list[str]]: + fh = BytesIO(data) + header_len = 8 + field_name = None + values = [] + atom_header = fh.read(header_len) + while len(atom_header) == header_len: + atom_size = unpack('>I', atom_header[:4])[0] - header_len + atom_type = atom_header[4:] + if atom_type == b'name': + atom_value = fh.read(atom_size)[4:].lower() + field_name = atom_value.decode('utf-8', 'replace') + # pylint: disable=protected-access + field_name = cls._CUSTOM_FIELD_NAME_MAPPING.get( + field_name, TinyTag._OTHER_PREFIX + field_name) + elif atom_type == b'data' and field_name: + data_atom = fh.read(atom_size) + parser = cls._data_parser(field_name) + atom_values = parser(data_atom) + if field_name in atom_values: + values.append(atom_values[field_name]) + else: + fh.seek(atom_size, SEEK_CUR) + atom_header = fh.read(header_len) # read next atom + if field_name and values: + return {field_name: values} + return {} + + @classmethod + def _parse_audio_sample_entry_mp4a(cls, data: bytes) -> dict[str, int]: + # this atom also contains the esds atom: + # https://ffmpeg.org/doxygen/0.6/mov_8c-source.html + # http://xhelmboyx.tripod.com/formats/mp4-layout.txt + # http://sasperger.tistory.com/103 + + # jump over version and flags + channels = unpack('>H', data[16:18])[0] + # jump over bit_depth, QT compr id & pkt size + sr = unpack('>I', data[22:26])[0] + + # ES Description Atom + esds_atom_size = unpack('>I', data[28:32])[0] + esds_atom = BytesIO(data[36:36 + esds_atom_size]) + esds_atom.seek(5, SEEK_CUR) # jump over version, flags and tag + + # ES Descriptor + cls._read_extended_descriptor(esds_atom) + esds_atom.seek(4, SEEK_CUR) # jump over ES id, flags and tag + + # Decoder Config Descriptor + cls._read_extended_descriptor(esds_atom) + esds_atom.seek(9, SEEK_CUR) + avg_br = unpack('>I', esds_atom.read(4))[0] / 1000 # kbit/s + return {'channels': channels, 'samplerate': sr, 'bitrate': avg_br} + @classmethod + def _parse_audio_sample_entry_alac(cls, data: bytes) -> dict[str, int]: + # https://github.com/macosforge/alac/blob/master/ALACMagicCookieDescription.txt + bitdepth = data[45] + channels = data[49] + avg_br, sr = unpack('>II', data[56:64]) + avg_br /= 1000 # kbit/s + return {'channels': channels, 'samplerate': sr, 'bitrate': avg_br, + 'bitdepth': bitdepth} -class ID3(TinyTag): - FRAME_ID_TO_FIELD = { # Mapping from Frame ID to a field of the TinyTag + @classmethod + def _parse_mvhd(cls, data: bytes) -> dict[str, float]: + # http://stackoverflow.com/a/3639993/1191373 + version = data[0] + # jump over flags, create & mod times + if version == 0: # uses 32 bit integers for timestamps + time_scale, duration = unpack('>II', data[12:20]) + else: # version == 1: # uses 64-bit integers for timestamps + time_scale, duration = unpack('>IQ', data[20:32]) + return {'duration': duration / time_scale} + + +class _ID3(TinyTag): + """MP3 Parser.""" + + _ID3_MAPPING = { + # Mapping from Frame ID to a field of the TinyTag + # https://exiftool.org/TagNames/ID3.html 'COMM': 'comment', 'COM': 'comment', - 'TRCK': 'track', 'TRK': 'track', - 'TYER': 'year', 'TYE': 'year', - 'TALB': 'album', 'TAL': 'album', + 'TRCK': 'track', 'TRK': 'track', + 'TYER': 'year', 'TYE': 'year', 'TDRC': 'year', + 'TALB': 'album', 'TAL': 'album', 'TPE1': 'artist', 'TP1': 'artist', - 'TIT2': 'title', 'TT2': 'title', - 'TCON': 'genre', 'TCO': 'genre', - 'TPOS': 'disc', - 'TPE2': 'albumartist', 'TCOM': 'composer', - 'WXXX': 'extra.url', - 'TXXX': 'extra.text', - 'TKEY': 'extra.initial_key', - 'USLT': 'extra.lyrics', + 'TIT2': 'title', 'TT2': 'title', + 'TCON': 'genre', 'TCO': 'genre', + 'TPOS': 'disc', 'TPA': 'disc', + 'TPE2': 'albumartist', 'TP2': 'albumartist', + 'TCOM': 'composer', 'TCM': 'composer', + 'WOAR': 'other.url', 'WAR': 'other.url', + 'TSRC': 'other.isrc', 'TRC': 'other.isrc', + 'TCOP': 'other.copyright', 'TCR': 'other.copyright', + 'TBPM': 'other.bpm', 'TBP': 'other.bpm', + 'TKEY': 'other.initial_key', 'TKE': 'other.initial_key', + 'TLAN': 'other.language', 'TLA': 'other.language', + 'TPUB': 'other.publisher', 'TPB': 'other.publisher', + 'USLT': 'other.lyrics', 'ULT': 'other.lyrics', + 'TPE3': 'other.conductor', 'TP3': 'other.conductor', + 'TEXT': 'other.lyricist', 'TXT': 'other.lyricist', + 'TSST': 'other.set_subtitle', + 'TENC': 'other.encoded_by', 'TEN': 'other.encoded_by', + 'TSSE': 'other.encoder_settings', 'TSS': 'other.encoder_settings', + 'TMED': 'other.media', 'TMT': 'other.media', + 'WCOP': 'other.license', + } + _ID3_MAPPING_CUSTOM = { + 'artists': 'artist', + 'director': 'other.director', + 'license': 'other.license', + 'barcode': 'other.barcode', + 'catalognumber': 'other.catalog_number', + } + _IMAGE_FRAME_IDS = {'APIC', 'PIC'} + _CUSTOM_FRAME_IDS = {'TXXX', 'TXX'} + _IGNORED_FRAME_IDS = { + 'AENC', 'CRA', + 'ATXT', + 'CHAP', + 'COMR', + 'CRM', + 'CTOC', + 'ENCR', + 'GEOB', 'GEO', + 'GRID', + 'MCDI', 'MCI', + 'PRIV', + 'RGAD', + 'STC', 'SYTC' } - IMAGE_FRAME_IDS = {'APIC', 'PIC'} - PARSABLE_FRAME_IDS = set(FRAME_ID_TO_FIELD.keys()).union(IMAGE_FRAME_IDS) - _MAX_ESTIMATION_SEC = 30 + _ID3V1_TAG_SIZE = 128 + _MAX_ESTIMATION_SEC = 30.0 _CBR_DETECTION_FRAME_COUNT = 5 _USE_XING_HEADER = True # much faster, but can be deactivated for testing - ID3V1_GENRES = [ + _ID3V1_GENRES = ( 'Blues', 'Classic Rock', 'Country', 'Dance', 'Disco', 'Funk', 'Grunge', 'Hip-Hop', 'Jazz', 'Metal', 'New Age', 'Oldies', 'Other', 'Pop', 'R&B', 'Rap', 'Reggae', 'Rock', 'Techno', 'Industrial', @@ -486,23 +809,20 @@ class ID3(TinyTag): 'Native American', 'Cabaret', 'New Wave', 'Psychadelic', 'Rave', 'Showtunes', 'Trailer', 'Lo-Fi', 'Tribal', 'Acid Punk', 'Acid Jazz', 'Polka', 'Retro', 'Musical', 'Rock & Roll', 'Hard Rock', - # Wimamp Extended Genres 'Folk', 'Folk-Rock', 'National Folk', 'Swing', 'Fast Fusion', 'Bebob', 'Latin', 'Revival', 'Celtic', 'Bluegrass', 'Avantgarde', 'Gothic Rock', 'Progressive Rock', 'Psychedelic Rock', 'Symphonic Rock', 'Slow Rock', - 'Big Band', 'Chorus', 'Easy Listening', 'Acoustic', 'Humour', 'Speech', - 'Chanson', 'Opera', 'Chamber Music', 'Sonata', 'Symphony', 'Booty Bass', - 'Primus', 'Porn Groove', 'Satire', 'Slow Jam', 'Club', 'Tango', 'Samba', - 'Folklore', 'Ballad', 'Power Ballad', 'Rhythmic Soul', 'Freestyle', - 'Duet', 'Punk Rock', 'Drum Solo', 'A capella', 'Euro-House', - 'Dance Hall', 'Goa', 'Drum & Bass', - - # according to https://de.wikipedia.org/wiki/Liste_der_ID3v1-Genres: + 'Big Band', 'Chorus', 'Easy listening', 'Acoustic', 'Humour', 'Speech', + 'Chanson', 'Opera', 'Chamber Music', 'Sonata', 'Symphony', + 'Booty Bass', 'Primus', 'Porn Groove', 'Satire', 'Slow Jam', 'Club', + 'Tango', 'Samba', 'Folklore', 'Ballad', 'Power Ballad', + 'Rhythmic Soul', 'Freestyle', 'Duet', 'Punk Rock', 'Drum Solo', + 'A capella', 'Euro-House', 'Dance Hall', 'Goa', 'Drum & Bass', 'Club-House', 'Hardcore Techno', 'Terror', 'Indie', 'BritPop', - '', # don't use ethnic slur ("Negerpunk", WTF!) - 'Polsk Punk', 'Beat', 'Christian Gangsta Rap', 'Heavy Metal', - 'Black Metal', 'Contemporary Christian', 'Christian Rock', + 'Afro-Punk', 'Polsk Punk', 'Beat', 'Christian Gangsta Rap', + 'Heavy Metal', 'Black Metal', 'Contemporary Christian', + 'Christian Rock', # WinAmp 1.91 'Merengue', 'Salsa', 'Thrash Metal', 'Anime', 'Jpop', 'Synthpop', # WinAmp 5.6 @@ -513,77 +833,121 @@ class ID3(TinyTag): 'Math Rock', 'New Romantic', 'Nu-Breakz', 'Post-Punk', 'Post-Rock', 'Psytrance', 'Shoegaze', 'Space Rock', 'Trop Rock', 'World Music', 'Neoclassical', 'Audiobook', 'Audio Theatre', 'Neue Deutsche Welle', - 'Podcast', 'Indie Rock', 'G-Funk', 'Dubstep', 'Garage Rock', 'Psybient', - ] - - def __init__(self, filehandler, filesize, *args, **kwargs): - TinyTag.__init__(self, filehandler, filesize, *args, **kwargs) - # save position after the ID3 tag for duration mesurement speedup - self._bytepos_after_id3v2 = 0 - - @classmethod - def set_estimation_precision(cls, estimation_in_seconds): - cls._MAX_ESTIMATION_SEC = estimation_in_seconds + 'Podcast', 'Indie Rock', 'G-Funk', 'Dubstep', 'Garage Rock', + 'Psybient', + ) + _ID3V2_2_IMAGE_FORMATS = { + 'bmp': 'image/bmp', + 'jpg': 'image/jpeg', + 'png': 'image/png', + } + _IMAGE_TYPES = ( + 'other.generic', + 'other.icon', + 'other.alt_icon', + 'front_cover', + 'back_cover', + 'other.leaflet', + 'media', + 'other.lead_artist', + 'other.artist', + 'other.conductor', + 'other.band', + 'other.composer', + 'other.lyricist', + 'other.recording_location', + 'other.during_recording', + 'other.during_performance', + 'other.screen_capture', + 'other.bright_colored_fish', + 'other.illustration', + 'other.band_logo', + 'other.publisher_logo', + ) + _UNKNOWN_IMAGE_TYPE = 'other.unknown' # see this page for the magic values used in mp3: # http://www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm - samplerates = [ - [11025, 12000, 8000], # MPEG 2.5 - [], # reserved - [22050, 24000, 16000], # MPEG 2 - [44100, 48000, 32000], # MPEG 1 - ] - v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0] - v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0] - v1l3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0] - v2l1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0] - v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0] - v2l3 = v2l2 - bitrate_by_version_by_layer = [ - [None, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go - None, # reserved # from 3 to 1 by design. - [None, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is - [None, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved - ] - samples_per_frame = 1152 # the default frame size for mp3 - channels_per_channel_mode = [ + _SAMPLE_RATES = ( + (11025, 12000, 8000), # MPEG 2.5 + (0, 0, 0), # reserved + (22050, 24000, 16000), # MPEG 2 + (44100, 48000, 32000), # MPEG 1 + ) + _V1L1 = (0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, + 448, 0) + _V1L2 = (0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, + 384, 0) + _V1L3 = (0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, + 320, 0) + _V2L1 = (0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, + 256, 0) + _V2L2 = (0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0) + _V2L3 = _V2L2 + _NONE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + _BITRATE_VERSION_LAYERS = ( + # note that layers go from 3 to 1 by design, first layer id is reserved + (_NONE, _V2L3, _V2L2, _V2L1), # MPEG Version 2.5 + (_NONE, _NONE, _NONE, _NONE), # reserved + (_NONE, _V2L3, _V2L2, _V2L1), # MPEG Version 2 + (_NONE, _V1L3, _V1L2, _V1L1), # MPEG Version 1 + ) + _SAMPLES_PER_FRAME = 1152 # the default frame size for mp3 + _CHANNELS_PER_CHANNEL_MODE = ( 2, # 00 Stereo 2, # 01 Joint stereo (Stereo) 2, # 10 Dual channel (2 mono channels) 1, # 11 Single channel (Mono) - ] + ) + + def __init__(self) -> None: + super().__init__() + # save position after the ID3 tag for duration measurement speedup + self._bytepos_after_id3v2 = -1 @staticmethod - def _parse_xing_header(fh): + def _parse_xing_header(fh: BinaryIO) -> tuple[int, int]: # see: http://www.mp3-tech.org/programmer/sources/vbrheadersdk.zip - fh.seek(4, os.SEEK_CUR) # read over Xing header - header_flags = struct.unpack('>i', fh.read(4))[0] - frames = byte_count = toc = vbr_scale = None + fh.seek(4, SEEK_CUR) # read over Xing header + header_flags = unpack('>i', fh.read(4))[0] + frames = byte_count = 0 if header_flags & 1: # FRAMES FLAG - frames = struct.unpack('>i', fh.read(4))[0] + frames = unpack('>i', fh.read(4))[0] if header_flags & 2: # BYTES FLAG - byte_count = struct.unpack('>i', fh.read(4))[0] + byte_count = unpack('>i', fh.read(4))[0] if header_flags & 4: # TOC FLAG - toc = [struct.unpack('>i', fh.read(4))[0] for _ in range(100)] + fh.seek(100, SEEK_CUR) if header_flags & 8: # VBR SCALE FLAG - vbr_scale = struct.unpack('>i', fh.read(4))[0] - return frames, byte_count, toc, vbr_scale + fh.seek(4, SEEK_CUR) + return frames, byte_count - def _determine_duration(self, fh): - max_estimation_frames = (ID3._MAX_ESTIMATION_SEC * 44100) // ID3.samples_per_frame + def _determine_duration(self, fh: BinaryIO) -> None: + # if tag reading was disabled, find start position of audio data + if self._bytepos_after_id3v2 == -1: + self._parse_id3v2_header(fh) + + max_estimation_frames = ( + (self._MAX_ESTIMATION_SEC * 44100) // self._SAMPLES_PER_FRAME) frame_size_accu = 0 - header_bytes = 4 + audio_offset = 0 frames = 0 # count frames for determining mp3 duration bitrate_accu = 0 # add up bitrates to find average bitrate to detect - last_bitrates = [] # CBR mp3s (multiple frames with same bitrates) + last_bitrates = set() # CBR mp3s (multiple frames with same bitrates) # seek to first position after id3 tag (speedup for large header) + first_mpeg_id = None fh.seek(self._bytepos_after_id3v2) + file_offset = fh.tell() + walker = BytesIO(fh.read()) while True: # reading through garbage until 11 '1' sync-bits are found - b = fh.peek(4) - if len(b) < 4: + header = walker.read(4) + header_len = len(header) + walker.seek(-header_len, SEEK_CUR) + if header_len < 4: + if frames: + self.bitrate = bitrate_accu / frames break # EOF - sync, conf, bitrate_freq, rest = struct.unpack('BBBB', b[0:4]) + _sync, conf, bitrate_freq, rest = unpack('4B', header) br_id = (bitrate_freq >> 4) & 0x0F # biterate id sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id padding = 1 if bitrate_freq & 0x02 > 0 else 0 @@ -591,420 +955,703 @@ def _determine_duration(self, fh): layer_id = (conf >> 1) & 0x03 channel_mode = (rest >> 6) & 0x03 # check for eleven 1s, validate bitrate and sample rate - if not b[:2] > b'\xFF\xE0' or br_id > 14 or br_id == 0 or sr_id == 3 or layer_id == 0 or mpeg_id == 1: - idx = b.find(b'\xFF', 1) # invalid frame, find next sync header + if (header[:2] <= b'\xFF\xE0' + or (first_mpeg_id is not None and first_mpeg_id != mpeg_id) + or br_id > 14 or br_id == 0 or sr_id == 3 or layer_id == 0 + or mpeg_id == 1): + # invalid frame, find next sync header + idx = header.find(b'\xFF', 1) if idx == -1: - idx = len(b) # not found: jump over the current peek buffer - fh.seek(max(idx, 1), os.SEEK_CUR) + # not found: jump over the current peek buffer + idx = header_len + walker.seek(max(idx, 1), SEEK_CUR) continue - try: - self.channels = self.channels_per_channel_mode[channel_mode] - frame_bitrate = ID3.bitrate_by_version_by_layer[mpeg_id][layer_id][br_id] - self.samplerate = ID3.samplerates[mpeg_id][sr_id] - except (IndexError, TypeError): - raise TinyTagException('mp3 parsing failed') + if first_mpeg_id is None: + first_mpeg_id = mpeg_id + self.channels = self._CHANNELS_PER_CHANNEL_MODE[channel_mode] + frame_br = self._BITRATE_VERSION_LAYERS[mpeg_id][layer_id][br_id] + self.samplerate = samplerate = self._SAMPLE_RATES[mpeg_id][sr_id] + frame_length = (144000 * frame_br) // samplerate + padding # There might be a xing header in the first frame that contains # all the info we need, otherwise parse multiple frames to find the # accurate average bitrate - if frames == 0 and ID3._USE_XING_HEADER: - xing_header_offset = b.find(b'Xing') + if frames == 0 and self._USE_XING_HEADER: + walker_offset = walker.tell() + frame_content = walker.read(frame_length) + xing_header_offset = frame_content.find(b'Xing') if xing_header_offset != -1: - fh.seek(xing_header_offset, os.SEEK_CUR) - xframes, byte_count, toc, vbr_scale = ID3._parse_xing_header(fh) - if xframes and xframes != 0 and byte_count: - self.duration = xframes * ID3.samples_per_frame / float(self.samplerate) / self.channels - self.bitrate = int(byte_count * 8 / self.duration / 1000) - self.audio_offset = fh.tell() + walker.seek(walker_offset + xing_header_offset) + xframes, byte_count = self._parse_xing_header(walker) + if xframes > 0 and byte_count > 0: + # MPEG-2 Audio Layer III uses 576 samples per frame + samples_pf = self._SAMPLES_PER_FRAME + if mpeg_id <= 2: + samples_pf = 576 + self.duration = dur = xframes * samples_pf / samplerate + self.bitrate = byte_count * 8 / dur / 1000 return - continue + walker.seek(walker_offset) - frames += 1 # it's most probably an mp3 frame - bitrate_accu += frame_bitrate + frames += 1 # it's most probably a mp3 frame + bitrate_accu += frame_br if frames == 1: - self.audio_offset = fh.tell() - if frames <= ID3._CBR_DETECTION_FRAME_COUNT: - last_bitrates.append(frame_bitrate) - fh.seek(4, os.SEEK_CUR) # jump over peeked bytes + audio_offset = file_offset + walker.tell() + if frames <= self._CBR_DETECTION_FRAME_COUNT: + last_bitrates.add(frame_br) - frame_length = (144000 * frame_bitrate) // self.samplerate + padding frame_size_accu += frame_length # if bitrate does not change over time its probably CBR - is_cbr = (frames == ID3._CBR_DETECTION_FRAME_COUNT and - len(set(last_bitrates)) == 1) + is_cbr = (frames == self._CBR_DETECTION_FRAME_COUNT + and len(last_bitrates) == 1) if frames == max_estimation_frames or is_cbr: # try to estimate duration - fh.seek(-128, 2) # jump to last byte (leaving out id3v1 tag) - audio_stream_size = fh.tell() - self.audio_offset - est_frame_count = audio_stream_size / (frame_size_accu / float(frames)) - samples = est_frame_count * ID3.samples_per_frame - self.duration = samples / float(self.samplerate) - self.bitrate = int(bitrate_accu / frames) + stream_size = ( + self.filesize - audio_offset - self._ID3V1_TAG_SIZE) + est_frame_count = stream_size / (frame_size_accu / frames) + samples = est_frame_count * self._SAMPLES_PER_FRAME + self.duration = samples / samplerate + self.bitrate = bitrate_accu / frames return if frame_length > 1: # jump over current frame body - fh.seek(frame_length - header_bytes, os.SEEK_CUR) + walker.seek(frame_length, SEEK_CUR) if self.samplerate: - self.duration = frames * ID3.samples_per_frame / float(self.samplerate) + self.duration = frames * self._SAMPLES_PER_FRAME / self.samplerate - def _parse_tag(self, fh): + def _parse_tag(self, fh: BinaryIO) -> None: self._parse_id3v2(fh) - attrs = ['track', 'track_total', 'title', 'artist', 'album', 'albumartist', 'year', 'genre'] - has_all_tags = all(getattr(self, attr) for attr in attrs) - if not has_all_tags and self.filesize > 128: - fh.seek(-128, os.SEEK_END) # try parsing id3v1 in last 128 bytes + if self.filesize >= self._ID3V1_TAG_SIZE: + # try parsing id3v1 at the end of file + fh.seek(self.filesize - self._ID3V1_TAG_SIZE) self._parse_id3v1(fh) - def _parse_id3v2(self, fh): + def _parse_id3v2_header(self, fh: BinaryIO) -> tuple[int, bool, int]: + size = major = 0 + extended = False # for info on the specs, see: http://id3.org/Developer%20Information - header = struct.unpack('3sBBB4B', _read(fh, 10)) - tag = codecs.decode(header[0], 'ISO-8859-1') + header = fh.read(10) # check if there is an ID3v2 tag at the beginning of the file - if tag == 'ID3': - major, rev = header[1:3] - if DEBUG: - stderr('Found id3 v2.%s' % major) - # unsync = (header[3] & 0x80) > 0 - extended = (header[3] & 0x40) > 0 - # experimental = (header[3] & 0x20) > 0 - # footer = (header[3] & 0x10) > 0 - size = self._calc_size(header[4:8], 7) - self._bytepos_after_id3v2 = size - end_pos = fh.tell() + size - parsed_size = 0 - if extended: # just read over the extended header. - size_bytes = struct.unpack('4B', _read(fh, 6)[0:4]) - extd_size = self._calc_size(size_bytes, 7) - fh.seek(extd_size - 6, os.SEEK_CUR) # jump over extended_header - while parsed_size < size: - frame_size = self._parse_frame(fh, id3version=major) - if frame_size == 0: - break - parsed_size += frame_size - fh.seek(end_pos, os.SEEK_SET) - - def _parse_id3v1(self, fh): - if fh.read(3) == b'TAG': # check if this is an ID3 v1 tag - def asciidecode(x): - return self._unpad(codecs.decode(x, 'latin1')) - fields = fh.read(30 + 30 + 30 + 4 + 30 + 1) - self._set_field('title', fields[:30], transfunc=asciidecode) - self._set_field('artist', fields[30:60], transfunc=asciidecode) - self._set_field('album', fields[60:90], transfunc=asciidecode) - self._set_field('year', fields[90:94], transfunc=asciidecode) - comment = fields[94:124] - if b'\x00\x00' < comment[-2:] < b'\x01\x00': - self._set_field('track', str(ord(comment[-1:]))) - comment = comment[:-2] - self._set_field('comment', comment, transfunc=asciidecode) + if header.startswith(b'ID3'): + major = header[3] + if _DEBUG: + print(f'Found id3 v2.{major}') + extended = (header[5] & 0x40) > 0 + size = self._unsynchsafe(unpack('4B', header[6:10])) + self._bytepos_after_id3v2 = size + return size, extended, major + + def _parse_id3v2(self, fh: BinaryIO) -> None: + size, extended, major = self._parse_id3v2_header(fh) + if size <= 0: + return + end_pos = fh.tell() + size + parsed_size = 0 + if extended: # just read over the extended header. + extd_size = self._unsynchsafe(unpack('4B', fh.read(6)[:4])) + fh.seek(extd_size - 6, SEEK_CUR) # jump over extended_header + while parsed_size < size: + frame_size = self._parse_frame(fh, size, id3version=major) + if frame_size == 0: + break + parsed_size += frame_size + fh.seek(end_pos) + + def _parse_id3v1(self, fh: BinaryIO) -> None: + if fh.read(3) != b'TAG': # check if this is an ID3 v1 tag + return + + def asciidecode(x: bytes) -> str: + return self._unpad( + x.decode(self._default_encoding or 'latin1', 'replace')) + # Only set fields that were not set by ID3v2 tags, as ID3v1 + # tags are more likely to be outdated or have encoding issues + fields = fh.read(30 + 30 + 30 + 4 + 30 + 1) + if not self.title: + value = asciidecode(fields[:30]) + if value: + self._set_field('title', value) + if not self.artist: + value = asciidecode(fields[30:60]) + if value: + self._set_field('artist', value) + if not self.album: + value = asciidecode(fields[60:90]) + if value: + self._set_field('album', value) + if not self.year: + value = asciidecode(fields[90:94]) + if value: + self._set_field('year', value) + comment = fields[94:124] + if b'\x00\x00' < comment[-2:] < b'\x01\x00': + if self.track is None: + self._set_field('track', ord(comment[-1:])) + comment = comment[:-2] + if not self.comment: + value = asciidecode(comment) + if value: + self._set_field('comment', value) + if not self.genre: genre_id = ord(fields[124:125]) - if genre_id < len(ID3.ID3V1_GENRES): - self.genre = ID3.ID3V1_GENRES[genre_id] + if genre_id < len(self._ID3V1_GENRES): + self._set_field('genre', self._ID3V1_GENRES[genre_id]) + + def __parse_custom_field(self, content: str) -> bool: + custom_field_name, separator, value = content.partition('\x00') + custom_field_name_lower = custom_field_name.lower() + value = value.lstrip('\ufeff') + if custom_field_name_lower and separator and value: + field_name = self._ID3_MAPPING_CUSTOM.get( + custom_field_name_lower, + self._OTHER_PREFIX + custom_field_name_lower) + self._set_field(field_name, value) + return True + return False - def _parse_frame(self, fh, id3version=False): + @classmethod + def _create_tag_image(cls, + data: bytes, + pic_type: int, + mime_type: str | None = None, + description: str | None = None) -> tuple[str, Image]: + field_name = cls._UNKNOWN_IMAGE_TYPE + if 0 <= pic_type <= len(cls._IMAGE_TYPES): + field_name = cls._IMAGE_TYPES[pic_type] + name = field_name + if field_name.startswith(cls._OTHER_PREFIX): + name = field_name[len(cls._OTHER_PREFIX):] + image = Image(name, data) + if mime_type: + image.mime_type = mime_type + if description: + image.description = description + return field_name, image + + def _parse_frame(self, + fh: BinaryIO, + total_size: int, + id3version: int | None = None) -> int: # ID3v2.2 especially ugly. see: http://id3.org/id3v2-00 - frame_header_size = 6 if id3version == 2 else 10 + header_len = 6 if id3version == 2 else 10 frame_size_bytes = 3 if id3version == 2 else 4 - binformat = '3s3B' if id3version == 2 else '4s4B2B' - bits_per_byte = 7 if id3version == 4 else 8 # only id3v2.4 is synchsafe - frame_header_data = fh.read(frame_header_size) - if len(frame_header_data) != frame_header_size: + is_synchsafe_int = id3version == 4 + header = fh.read(header_len) + if len(header) != header_len: + return 0 + frame_id = self._decode_string(header[:frame_size_bytes]) + frame_size: int + if frame_size_bytes == 3: + frame_size = unpack('>I', b'\x00' + header[3:6])[0] + elif is_synchsafe_int: + frame_size = self._unsynchsafe(unpack('4B', header[4:8])) + else: + frame_size = unpack('>I', header[4:8])[0] + if _DEBUG: + print(f'Found id3 Frame {frame_id} at ' + f'{fh.tell()}-{fh.tell() + frame_size} of {self.filesize}') + if frame_size > total_size: + # invalid frame size, stop here return 0 - frame = struct.unpack(binformat, frame_header_data) - frame_id = self._decode_string(frame[0]) - frame_size = self._calc_size(frame[1:1+frame_size_bytes], bits_per_byte) - if DEBUG: - stderr('Found id3 Frame %s at %d-%d of %d' % (frame_id, fh.tell(), fh.tell() + frame_size, self.filesize)) - if frame_size > 0: - # flags = frame[1+frame_size_bytes:] # dont care about flags. - if frame_id not in ID3.PARSABLE_FRAME_IDS: # jump over unparsable frames - fh.seek(frame_size, os.SEEK_CUR) + content = fh.read(frame_size) + fieldname = self._ID3_MAPPING.get(frame_id) + should_set_field = True + if fieldname: + if not self._parse_tags: + return frame_size + language = fieldname in {'comment', 'other.lyrics'} + value = self._decode_string(content, language) + if not value: return frame_size - content = fh.read(frame_size) - fieldname = ID3.FRAME_ID_TO_FIELD.get(frame_id) - if fieldname: - self._set_field(fieldname, content, self._decode_string) - elif frame_id in self.IMAGE_FRAME_IDS and self._load_image: + if fieldname == "comment": + # check if comment is a key-value pair (used by iTunes) + should_set_field = not self.__parse_custom_field(value) + elif fieldname in {'track', 'disc'}: + if '/' in value: + value, total = value.split('/')[:2] + if total.isdecimal(): + self._set_field(f'{fieldname}_total', int(total)) + if value.isdecimal(): + self._set_field(fieldname, int(value)) + should_set_field = False + elif fieldname == 'genre': + genre_id = 255 + # funky: id3v1 genre hidden in a id3v2 field + if value.isdecimal(): + genre_id = int(value) + # funkier: the TCO may contain genres in parens, e.g '(13)' + elif value.startswith('('): + end_pos = value.find(')') + parens_text = value[1:end_pos] + if end_pos > 0 and parens_text.isdecimal(): + genre_id = int(parens_text) + if 0 <= genre_id < len(self._ID3V1_GENRES): + value = self._ID3V1_GENRES[genre_id] + if should_set_field: + self._set_field(fieldname, value) + elif frame_id in self._CUSTOM_FRAME_IDS: + # custom fields + if self._parse_tags: + value = self._decode_string(content) + if value: + self.__parse_custom_field(value) + elif frame_id in self._IMAGE_FRAME_IDS: + if self._load_image: # See section 4.14: http://id3.org/id3v2.4.0-frames + encoding = content[:1] if frame_id == 'PIC': # ID3 v2.2: - desc_end_pos = content.index(b'\x00', 1) + 1 + imgformat = self._decode_string(content[1:4]).lower() + mime_type = self._ID3V2_2_IMAGE_FORMATS.get(imgformat) + # skip encoding (1), imgformat (3), pictype(1) + desc_start_pos = 5 else: # ID3 v2.3+ - mimetype_end_pos = content.index(b'\x00', 1) + 1 - desc_start_pos = mimetype_end_pos + 1 # jump over picture type - desc_end_pos = content.index(b'\x00', desc_start_pos) + 1 - if content[desc_end_pos:desc_end_pos+1] == b'\x00': - desc_end_pos += 1 # the description ends with 1 or 2 null bytes - self._image_data = content[desc_end_pos:] - return frame_size - return 0 - - def _decode_string(self, bytestr): - try: # it's not my fault, this is the spec. - first_byte = bytestr[:1] - if first_byte == b'\x00': # ISO-8859-1 - bytestr = bytestr[1:] - encoding = 'ISO-8859-1' - elif first_byte == b'\x01': # UTF-16 with BOM - bytestr = bytestr[1:] - if bytestr[:5] == b'eng\xff\xfe': - bytestr = bytestr[3:] # remove language (but leave BOM) - if bytestr[:5] == b'eng\xfe\xff': - bytestr = bytestr[3:] # remove language (but leave BOM) - if bytestr[:4] == b'eng\x00': - bytestr = bytestr[4:] # remove language - if bytestr[:1] == b'\x00': - bytestr = bytestr[1:] # strip optional additional null byte - # read byte order mark to determine endianess - encoding = 'UTF-16be' if bytestr[0:2] == b'\xfe\xff' else 'UTF-16le' - # strip the bom and optional null bytes - bytestr = bytestr[2:] if len(bytestr) % 2 == 0 else bytestr[2:-1] - # remove ADDITIONAL EXTRA BOM :facepalm: - if bytestr[:4] == b'\x00\x00\xff\xfe': - bytestr = bytestr[4:] - elif first_byte == b'\x02': # UTF-16LE - # strip optional null byte, if byte count uneven - bytestr = bytestr[1:-1] if len(bytestr) % 2 == 0 else bytestr[1:] - encoding = 'UTF-16le' - elif first_byte == b'\x03': # UTF-8 - bytestr = bytestr[1:] - encoding = 'UTF-8' - else: - bytestr = bytestr - encoding = 'ISO-8859-1' # wild guess - if bytestr[:4] == b'eng\x00': - bytestr = bytestr[4:] # remove language - errors = 'ignore' if self._ignore_errors else 'strict' - return self._unpad(codecs.decode(bytestr, encoding, errors)) - except UnicodeDecodeError: - raise TinyTagException('Error decoding ID3 Tag!') - - def _calc_size(self, bytestr, bits_per_byte): - # length of some mp3 header fields is described by 7 or 8-bit-bytes - return reduce(lambda accu, elem: (accu << bits_per_byte) + elem, bytestr, 0) - - -class Ogg(TinyTag): - def __init__(self, filehandler, filesize, *args, **kwargs): - TinyTag.__init__(self, filehandler, filesize, *args, **kwargs) - self._tags_parsed = False - self._max_samplenum = 0 # maximum sample position ever read + mime_end_pos = content.index(b'\x00', 1) + mime_type = self._decode_string( + content[1:mime_end_pos]).lower() + # skip mtype, pictype(1) + desc_start_pos = mime_end_pos + 2 + pic_type = content[desc_start_pos - 1] + # latin1 and utf-8 are 1 byte + if encoding in {b'\x00', b'\x03'}: + desc_end_pos = content.find(b'\x00', desc_start_pos) + 1 + else: + desc_end_pos = 0 + for i in range(desc_start_pos, len(content), 2): + if content[i:i + 2] == b'\x00\x00': + desc_end_pos = i + 2 + break + desc = self._decode_string( + encoding + content[desc_start_pos:desc_end_pos]) + field_name, image = self._create_tag_image( + content[desc_end_pos:], pic_type, mime_type, desc) + # pylint: disable=protected-access + self.images._set_field(field_name, image) + elif frame_id not in self._IGNORED_FRAME_IDS: + # unknown, try to add to other dict + if self._parse_tags: + value = self._decode_string(content) + if value: + self._set_field( + self._OTHER_PREFIX + frame_id.lower(), value) + return frame_size + + def _decode_string(self, value: bytes, language: bool = False) -> str: + default_encoding = 'ISO-8859-1' + if self._default_encoding: + default_encoding = self._default_encoding + # it's not my fault, this is the spec. + first_byte = value[:1] + if first_byte == b'\x00': # ISO-8859-1 + value = value[1:] + encoding = default_encoding + elif first_byte == b'\x01': # UTF-16 with BOM + value = value[1:] + # remove language (but leave BOM) + if language: + if value[3:5] in {b'\xfe\xff', b'\xff\xfe'}: + value = value[3:] + if value[:3].isalpha(): + value = value[3:] # remove language + # strip optional additional null bytes + value = value.lstrip(b'\x00') + # read byte order mark to determine endianness + encoding = ('UTF-16be' if value.startswith(b'\xfe\xff') + else 'UTF-16le') + # strip the bom if it exists + if value.startswith(b'\xfe\xff') or value.startswith(b'\xff\xfe'): + value = value[2:] if len(value) % 2 == 0 else value[2:-1] + # remove ADDITIONAL OTHER BOM :facepalm: + if value.startswith(b'\x00\x00\xff\xfe'): + value = value[4:] + elif first_byte == b'\x02': # UTF-16 without BOM + # strip optional null byte, if byte count uneven + value = value[1:-1] if len(value) % 2 == 0 else value[1:] + encoding = 'UTF-16be' + elif first_byte == b'\x03': # UTF-8 + value = value[1:] + encoding = 'UTF-8' + else: + encoding = default_encoding # wild guess + if language and value[:3].isalpha(): + value = value[3:] # remove language + return self._unpad(value.decode(encoding, 'replace')) + + @staticmethod + def _unsynchsafe(ints: tuple[int, ...]) -> int: + return (ints[0] << 21) + (ints[1] << 14) + (ints[2] << 7) + ints[3] + + +class _Ogg(TinyTag): + """OGG Parser.""" + + _VORBIS_MAPPING = { + 'album': 'album', + 'albumartist': 'albumartist', + 'title': 'title', + 'artist': 'artist', + 'artists': 'artist', + 'author': 'artist', + 'date': 'year', + 'tracknumber': 'track', + 'tracktotal': 'track_total', + 'totaltracks': 'track_total', + 'discnumber': 'disc', + 'disctotal': 'disc_total', + 'totaldiscs': 'disc_total', + 'genre': 'genre', + 'description': 'comment', + 'comment': 'comment', + 'comments': 'comment', + 'composer': 'composer', + 'bpm': 'other.bpm', + 'copyright': 'other.copyright', + 'isrc': 'other.isrc', + 'lyrics': 'other.lyrics', + 'publisher': 'other.publisher', + 'language': 'other.language', + 'director': 'other.director', + 'website': 'other.url', + 'conductor': 'other.conductor', + 'lyricist': 'other.lyricist', + 'discsubtitle': 'other.set_subtitle', + 'setsubtitle': 'other.set_subtitle', + 'initialkey': 'other.initial_key', + 'key': 'other.initial_key', + 'encodedby': 'other.encoded_by', + 'encodersettings': 'other.encoder_settings', + 'media': 'other.media', + 'license': 'other.license', + 'barcode': 'other.barcode', + 'catalognumber': 'other.catalog_number', + } + + def __init__(self) -> None: + super().__init__() + self._granule_pos = 0 + self._pre_skip = 0 # number of samples to skip in opus stream + self._audio_size: int | None = None # size of opus audio stream - def _determine_duration(self, fh): - max_page_size = 65536 # https://xiph.org/ogg/doc/libogg/ogg_page.html + def _determine_duration(self, fh: BinaryIO) -> None: if not self._tags_parsed: self._parse_tag(fh) # determine sample rate - fh.seek(0) # and rewind to start - if self.filesize > max_page_size: - fh.seek(-max_page_size, 2) # go to last possible page position - while True: - b = fh.peek(4) - if len(b) == 0: - return # EOF - if b[:4] == b'OggS': # look for an ogg header - for _ in self._parse_pages(fh): - pass # parse all remaining pages - self.duration = self._max_samplenum / float(self.samplerate) - else: - idx = b.find(b'OggS') # try to find header in peeked data - seekpos = idx if idx != -1 else len(b) - 3 - fh.seek(max(seekpos, 1), os.SEEK_CUR) - - def _parse_tag(self, fh): - page_start_pos = fh.tell() # set audio_offest later if its audio data + if self.duration is not None or not self.samplerate: + return # either ogg flac or invalid file + self.duration = max( + (self._granule_pos - self._pre_skip) / self.samplerate, 0 + ) + if self._audio_size is None or not self.duration: + return # not an opus file + self.bitrate = self._audio_size * 8 / self.duration / 1000 + + def _parse_tag(self, fh: BinaryIO) -> None: + check_flac_second_packet = False + check_speex_second_packet = False for packet in self._parse_pages(fh): - walker = BytesIO(packet) - if packet[0:7] == b"\x01vorbis": - (channels, self.samplerate, max_bitrate, bitrate, - min_bitrate) = struct.unpack(" None: # for the spec, see: http://xiph.org/vorbis/doc/v-comment.html # discnumber tag based on: https://en.wikipedia.org/wiki/Vorbis_comment # https://sno.phy.queensu.ca/~phil/exiftool/TagNames/Vorbis.html - comment_type_to_attr_mapping = { - 'album': 'album', - 'albumartist': 'albumartist', - 'title': 'title', - 'artist': 'artist', - 'date': 'year', - 'tracknumber': 'track', - 'totaltracks': 'track_total', - 'discnumber': 'disc', - 'totaldiscs': 'disc_total', - 'genre': 'genre', - 'description': 'comment', - 'composer': 'composer', - } - vendor_length = struct.unpack('I', fh.read(4))[0] - fh.seek(vendor_length, os.SEEK_CUR) # jump over vendor - elements = struct.unpack('I', fh.read(4))[0] - for i in range(elements): - length = struct.unpack('I', fh.read(4))[0] - try: - keyvalpair = codecs.decode(fh.read(length), 'UTF-8') - except UnicodeDecodeError: - continue + if has_vendor: + vendor_length = unpack('I', fh.read(4))[0] + fh.seek(vendor_length, SEEK_CUR) # jump over vendor + elements = unpack('I', fh.read(4))[0] + for _i in range(elements): + length = unpack('I', fh.read(4))[0] + keyvalpair = fh.read(length).decode('utf-8', 'replace') if '=' in keyvalpair: key, value = keyvalpair.split('=', 1) - if DEBUG: - stderr('Found Vorbis Comment', key, value[:64]) - fieldname = comment_type_to_attr_mapping.get(key.lower()) - if fieldname: - self._set_field(fieldname, value) + key_lower = key.lower() + if key_lower == "metadata_block_picture": + if self._load_image: + if _DEBUG: + print('Found Vorbis Image', key, value[:64]) + # pylint: disable=protected-access + fieldname, fieldvalue = _Flac._parse_image( + BytesIO(a2b_base64(value))) + self.images._set_field(fieldname, fieldvalue) + else: + if _DEBUG: + print('Found Vorbis Comment', key, value[:64]) + fieldname = self._VORBIS_MAPPING.get( + key_lower, self._OTHER_PREFIX + key_lower) + if fieldname in { + 'track', 'disc', 'track_total', 'disc_total' + }: + if fieldname in {'track', 'disc'} and '/' in value: + value, total = value.split('/')[:2] + if total.isdecimal(): + self._set_field( + f'{fieldname}_total', int(total)) + if value.isdecimal(): + self._set_field(fieldname, int(value)) + elif value: + self._set_field(fieldname, value) - def _parse_pages(self, fh): + def _parse_pages(self, fh: BinaryIO) -> Iterator[bytearray]: # for the spec, see: https://wiki.xiph.org/Ogg - previous_page = b'' # contains data from previous (continuing) pages - header_data = fh.read(27) # read ogg page header - while len(header_data) != 0: - header = struct.unpack('<4sBBqIIiB', header_data) + packet_data = bytearray() + current_serial = None + last_granule_pos = 0 + last_audio_size = 0 + header_len = 27 + page_header = fh.read(header_len) # read ogg page header + while len(page_header) == header_len: + version = page_header[4] + if page_header[:4] != b'OggS' or version != 0: + raise ParseError('Invalid OGG header') # https://xiph.org/ogg/doc/framing.html - oggs, version, flags, pos, serial, pageseq, crc, segments = header - self._max_samplenum = max(self._max_samplenum, pos) - if oggs != b'OggS' or version != 0: - raise TinyTagException('Not a valid ogg file!') - segsizes = struct.unpack('B'*segments, fh.read(segments)) - total = 0 - for segsize in segsizes: # read all segments - total += segsize - if total < 255: # less than 255 bytes means end of page - yield previous_page + fh.read(total) - previous_page = b'' - total = 0 - if total != 0: - if total % 255 == 0: - previous_page += fh.read(total) + header_type = page_header[5] + eos = header_type & 0x04 + granule_pos, serial = unpack(' 0: + if eos: + self._granule_pos = granule_pos else: - yield previous_page + fh.read(total) - previous_page = b'' - header_data = fh.read(27) + self._granule_pos = last_granule_pos + last_granule_pos = granule_pos + segments = page_header[26] + seg_sizes = unpack('B' * segments, fh.read(segments)) + read_size = 0 + audio_size = 0 + for seg_size in seg_sizes: # read all segments + read_size += seg_size + if self._audio_size is not None: + audio_size += seg_size + # less than 255 bytes means end of packet + if seg_size < 255 and serial_match and not self._tags_parsed: + packet_data += fh.read(read_size) + yield packet_data + packet_data.clear() + read_size = 0 + if read_size: + if not serial_match or self._tags_parsed: + fh.seek(read_size, SEEK_CUR) + else: # packet continues on next page + packet_data += fh.read(read_size) + if serial_match and self._audio_size is not None: + if eos: + self._audio_size += last_audio_size + audio_size + else: + self._audio_size += last_audio_size + last_audio_size = audio_size + page_header = fh.read(header_len) + +class _Wave(TinyTag): + """WAVE Parser. -class Wave(TinyTag): - # https://sno.phy.queensu.ca/~phil/exiftool/TagNames/RIFF.html - riff_mapping = { + https://sno.phy.queensu.ca/~phil/exiftool/TagNames/RIFF.html + """ + + _RIFF_MAPPING = { b'INAM': 'title', b'TITL': 'title', + b'IPRD': 'album', b'IART': 'artist', + b'IBPM': 'other.bpm', b'ICMT': 'comment', + b'IMUS': 'composer', + b'ICOP': 'other.copyright', b'ICRD': 'year', b'IGNR': 'genre', + b'ILNG': 'other.language', + b'ISRC': 'other.isrc', + b'IPUB': 'other.publisher', + b'IPRT': 'track', + b'ITRK': 'track', b'TRCK': 'track', - b'PRT1': 'track', - b'PRT2': 'track_number', + b'IBSU': 'other.url', b'YEAR': 'year', - # riff format is lacking the composer field. + b'IWRI': 'other.lyricist', + b'IENC': 'other.encoded_by', + b'IMED': 'other.media', } - def __init__(self, filehandler, filesize, *args, **kwargs): - TinyTag.__init__(self, filehandler, filesize, *args, **kwargs) - self._duration_parsed = False - - def _determine_duration(self, fh): - # see: https://ccrma.stanford.edu/courses/422/projects/WaveFormat/ - # and: https://en.wikipedia.org/wiki/WAV - riff, size, fformat = struct.unpack('4sI4s', fh.read(12)) - if riff != b'RIFF' or fformat != b'WAVE': - raise TinyTagException('not a wave file!') - bitdepth = 16 # assume 16bit depth (CD quality) - chunk_header = fh.read(8) - while len(chunk_header) == 8: - subchunkid, subchunksize = struct.unpack('4sI', chunk_header) - if subchunkid == b'fmt ': - _, self.channels, self.samplerate = struct.unpack('HHI', fh.read(8)) - _, _, bitdepth = struct.unpack(' None: + if not self._tags_parsed: + self._parse_tag(fh) + + def _parse_tag(self, fh: BinaryIO) -> None: + # http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html + # https://en.wikipedia.org/wiki/WAV + header = fh.read(12) + if header[:4] != b'RIFF' or header[8:12] != b'WAVE': + raise ParseError('Invalid WAV header') + if self._parse_duration: + self.bitdepth = 16 # assume 16bit depth (CD quality) + header_len = 8 + chunk_header = fh.read(header_len) + while len(chunk_header) == header_len: + subchunk_id = chunk_header[:4] + subchunk_size = unpack('I', chunk_header[4:])[0] + # IFF chunks are padded to an even number of bytes + subchunk_size += subchunk_size % 2 + if subchunk_id == b'fmt ' and self._parse_duration: + chunk = fh.read(subchunk_size) + _format_tag, channels, samplerate = unpack(' None: + if not self._tags_parsed: + self._parse_tag(fh) - def _determine_duration(self, fh, skip_tags=False): + def _parse_tag(self, fh: BinaryIO) -> None: + id3 = None + header = fh.read(4) + if header.startswith(b'ID3'): # parse ID3 header if it exists + fh.seek(-4, SEEK_CUR) + # pylint: disable=protected-access + id3 = _ID3() + id3._parse_tags = self._parse_tags + id3._load_image = self._load_image + id3._parse_id3v2(fh) + header = fh.read(4) # after ID3 should be fLaC + if header[:4] != b'fLaC': + raise ParseError('Invalid FLAC header') # for spec, see https://xiph.org/flac/ogg_mapping.html - header_data = fh.read(4) - while len(header_data): - meta_header = struct.unpack('B3B', header_data) - block_type = meta_header[0] & 0x7f - is_last_block = meta_header[0] & 0x80 - size = _bytes_to_int(meta_header[1:4]) + header_len = 4 + block_header = fh.read(header_len) + while len(block_header) == header_len: + block_type = block_header[0] & 0x7f + is_last_block = block_header[0] & 0x80 + size = unpack('>I', b'\x00' + block_header[1:])[0] # http://xiph.org/flac/format.html#metadata_block_streaminfo - if block_type == Flac.METADATA_STREAMINFO: - stream_info_header = fh.read(size) - if len(stream_info_header) < 34: # invalid streaminfo - return - header = struct.unpack('HH3s3s8B16s', stream_info_header) - # From the ciph documentation: + if block_type == self._STREAMINFO and self._parse_duration: + head = fh.read(size) + if len(head) < 34: # invalid streaminfo + break + # From the xiph documentation: # py | # ---------------------------------------------- # H | <16> The minimum block size (in samples) @@ -1016,284 +1663,260 @@ def _determine_duration(self, fh, skip_tags=False): # | <5> (bits per sample)-1. # | <36> Total samples in stream. # 16s| <128> MD5 signature - # min_blk, max_blk, min_frm, max_frm = header[0:4] - # min_frm = _bytes_to_int(struct.unpack('3B', min_frm)) - # max_frm = _bytes_to_int(struct.unpack('3B', max_frm)) # channels--. bits total samples # |----- samplerate -----| |-||----| |---------~ ~----| # 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 # #---4---# #---5---# #---6---# #---7---# #--8-~ ~-12-# - self.samplerate = _bytes_to_int(header[4:7]) >> 4 - self.channels = ((header[6] >> 1) & 0x07) + 1 - # bit_depth = ((header[6] & 1) << 4) + ((header[7] & 0xF0) >> 4) - # bit_depth = (bit_depth + 1) - total_sample_bytes = [(header[7] & 0x0F)] + list(header[8:12]) - total_samples = _bytes_to_int(total_sample_bytes) - self.duration = float(total_samples) / self.samplerate - if self.duration > 0: - self.bitrate = self.filesize / self.duration * 8 / 1024 - elif block_type == Flac.METADATA_VORBIS_COMMENT and not skip_tags: - oggtag = Ogg(fh, 0) - oggtag._parse_vorbis_comment(fh) - self.update(oggtag) - elif block_type == Flac.METADATA_PICTURE and self._load_image: - # https://xiph.org/flac/format.html#metadata_block_picture - pic_type, mime_len = struct.unpack('>2I', fh.read(8)) - mime = fh.read(mime_len) - description_len = struct.unpack('>I', fh.read(4))[0] - description = fh.read(description_len) - width, height, depth, colors, pic_len = struct.unpack('>5I', fh.read(20)) - self._image_data = fh.read(pic_len) - elif block_type >= 127: - return # invalid block type + sr = unpack('>I', b'\x00' + head[10:13])[0] >> 4 + self.channels = ((head[12] >> 1) & 0x07) + 1 + self.bitdepth = ( + ((head[12] & 1) << 4) + ((head[13] & 0xF0) >> 4) + 1) + tot_samples_b = bytes([head[13] & 0x0F]) + head[14:18] + tot_samples = unpack('>Q', b'\x00\x00\x00' + tot_samples_b)[0] + self.duration = duration = tot_samples / sr + self.samplerate = sr + if duration > 0: + self.bitrate = self.filesize * 8 / duration / 1000 + elif block_type == self._VORBIS_COMMENT and self._parse_tags: + # pylint: disable=protected-access + walker = BytesIO(fh.read(size)) + oggtag = _Ogg() + oggtag._parse_vorbis_comment(walker) + self._update(oggtag) + elif block_type == self._PICTURE and self._load_image: + fieldname, value = self._parse_image(fh) + # pylint: disable=protected-access + self.images._set_field(fieldname, value) else: - if DEBUG: - stderr('Unknown FLAC block type', block_type) - fh.seek(size, 1) # seek over this block - + fh.seek(size, SEEK_CUR) # seek over this block if is_last_block: - return - header_data = fh.read(4) - - -class Wma(TinyTag): - ASF_CONTENT_DESCRIPTION_OBJECT = b'3&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel' - ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT = b'@\xa4\xd0\xd2\x07\xe3\xd2\x11\x97\xf0\x00\xa0\xc9^\xa8P' - STREAM_BITRATE_PROPERTIES_OBJECT = b'\xceu\xf8{\x8dF\xd1\x11\x8d\x82\x00`\x97\xc9\xa2\xb2' - ASF_FILE_PROPERTY_OBJECT = b'\xa1\xdc\xab\x8cG\xa9\xcf\x11\x8e\xe4\x00\xc0\x0c Se' - ASF_STREAM_PROPERTIES_OBJECT = b'\x91\x07\xdc\xb7\xb7\xa9\xcf\x11\x8e\xe6\x00\xc0\x0c Se' - STREAM_TYPE_ASF_AUDIO_MEDIA = b'@\x9ei\xf8M[\xcf\x11\xa8\xfd\x00\x80_\\D+' - # see: - # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx - # and (japanese, but none the less helpful) - # http://uguisu.skr.jp/Windows/format_asf.html - - def __init__(self, filehandler, filesize, *args, **kwargs): - TinyTag.__init__(self, filehandler, filesize, *args, **kwargs) - self.__tag_parsed = False - - def _determine_duration(self, fh): - if not self.__tag_parsed: + break + block_header = fh.read(header_len) + if id3 is not None: # apply ID3 tags after vorbis + self._update(id3) + self._tags_parsed = True + + @classmethod + def _parse_image(cls, fh: BinaryIO) -> tuple[str, Image]: + # https://xiph.org/flac/format.html#metadata_block_picture + pic_type, mime_type_len = unpack('>II', fh.read(8)) + mime_type = fh.read(mime_type_len).decode('utf-8', 'replace') + description_len = unpack('>I', fh.read(4))[0] + description = fh.read(description_len).decode('utf-8', 'replace') + fh.seek(16, SEEK_CUR) # jump over width, height, depth, colors + pic_len = unpack('>I', fh.read(4))[0] + # pylint: disable=protected-access + return _ID3._create_tag_image( + fh.read(pic_len), pic_type, mime_type, description) + + +class _Wma(TinyTag): + """WMA Parser. + + http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx + http://uguisu.skr.jp/Windows/format_asf.html + """ + + _ASF_MAPPING = { + 'WM/ARTISTS': 'artist', + 'WM/TrackNumber': 'track', + 'WM/PartOfSet': 'disc', + 'WM/Year': 'year', + 'WM/AlbumArtist': 'albumartist', + 'WM/Genre': 'genre', + 'WM/AlbumTitle': 'album', + 'WM/Composer': 'composer', + 'WM/Publisher': 'other.publisher', + 'WM/BeatsPerMinute': 'other.bpm', + 'WM/InitialKey': 'other.initial_key', + 'WM/Lyrics': 'other.lyrics', + 'WM/Language': 'other.language', + 'WM/Director': 'other.director', + 'WM/AuthorURL': 'other.url', + 'WM/ISRC': 'other.isrc', + 'WM/Conductor': 'other.conductor', + 'WM/Writer': 'other.lyricist', + 'WM/SetSubTitle': 'other.set_subtitle', + 'WM/EncodedBy': 'other.encoded_by', + 'WM/EncodingSettings': 'other.encoder_settings', + 'WM/Media': 'other.media', + 'WM/Barcode': 'other.barcode', + 'WM/CatalogNo': 'other.catalog_number', + } + _UNPACK_FORMATS = { + 1: ' None: + if not self._tags_parsed: self._parse_tag(fh) - def read_blocks(self, fh, blocks): - # blocks are a list(tuple('fieldname', byte_count, cast_int), ...) - decoded = {} - for block in blocks: - val = fh.read(block[1]) - if block[2]: - val = _bytes_to_int_le(val) - decoded[block[0]] = val - return decoded - - def __bytes_to_guid(self, obj_id_bytes): - return '-'.join([ - hex(_bytes_to_int_le(obj_id_bytes[:-12]))[2:].zfill(6), - hex(_bytes_to_int_le(obj_id_bytes[-12:-10]))[2:].zfill(4), - hex(_bytes_to_int_le(obj_id_bytes[-10:-8]))[2:].zfill(4), - hex(_bytes_to_int(obj_id_bytes[-8:-6]))[2:].zfill(4), - hex(_bytes_to_int(obj_id_bytes[-6:]))[2:].zfill(12), - ]) - - def __decode_string(self, bytestring): - return self._unpad(codecs.decode(bytestring, 'utf-16')) - - def __decode_ext_desc(self, value_type, value): - """ decode ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT values""" - if value_type == 0: # Unicode string - return self.__decode_string(value) - elif value_type == 1: # BYTE array - return value - elif 1 < value_type < 6: # DWORD / QWORD / WORD - return _bytes_to_int_le(value) - - def _parse_tag(self, fh): - self.__tag_parsed = True - guid = fh.read(16) # 128 bit GUID - if guid != b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel': - return # not a valid ASF container! see: http://www.garykessler.net/library/file_sigs.html - struct.unpack('Q', fh.read(8))[0] # size - struct.unpack('I', fh.read(4))[0] # obj_count - if fh.read(2) != b'\x01\x02': - # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc521913958 - return # not a valid asf header! - while True: - object_id = fh.read(16) - object_size = _bytes_to_int_le(fh.read(8)) + def _parse_tag(self, fh: BinaryIO) -> None: + # http://www.garykessler.net/library/file_sigs.html + # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc521913958 + header = fh.read(30) + if (header[:16] != b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel' + or header[-1:] != b'\x02'): + raise ParseError('Invalid WMA header') + header_len = 24 + object_header = fh.read(header_len) + while len(object_header) == header_len: + object_size = unpack(' self.filesize: break # invalid object, stop parsing. - if object_id == Wma.ASF_CONTENT_DESCRIPTION_OBJECT: - len_blocks = self.read_blocks(fh, [ - ('title_length', 2, True), - ('author_length', 2, True), - ('copyright_length', 2, True), - ('description_length', 2, True), - ('rating_length', 2, True), - ]) - data_blocks = self.read_blocks(fh, [ - ('title', len_blocks['title_length'], False), - ('artist', len_blocks['author_length'], False), - ('', len_blocks['copyright_length'], True), - ('comment', len_blocks['description_length'], False), - ('', len_blocks['rating_length'], True), - ]) - for field_name, bytestring in data_blocks.items(): - if field_name: - self._set_field(field_name, bytestring, self.__decode_string) - elif object_id == Wma.ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT: - mapping = { - 'WM/TrackNumber': 'track', - 'WM/PartOfSet': 'disc', - 'WM/Year': 'year', - 'WM/AlbumArtist': 'albumartist', - 'WM/Genre': 'genre', - 'WM/AlbumTitle': 'album', - 'WM/Composer': 'composer', + object_id = object_header[:16] + if object_id == self._ASF_CONTENT_DESC and self._parse_tags: + walker = BytesIO(fh.read(object_size - header_len)) + (title_length, author_length, + copyright_length, description_length, + rating_length) = unpack('<5H', walker.read(10)) + data_blocks = { + 'title': title_length, + 'artist': author_length, + 'other.copyright': copyright_length, + 'comment': description_length, + '_rating': rating_length, } - # see: http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc509555195 - descriptor_count = _bytes_to_int_le(fh.read(2)) + for i_field_name, length in data_blocks.items(): + value = self._unpad( + walker.read(length).decode('utf-16', 'replace')) + if not i_field_name.startswith('_') and value: + self._set_field(i_field_name, value) + elif object_id == self._ASF_EXT_CONTENT_DESC and self._parse_tags: + # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc509555195 + walker = BytesIO(fh.read(object_size - header_len)) + descriptor_count = unpack(' None: + header = fh.read(12) + if header[:4] != b'FORM' or header[8:12] not in {b'AIFC', b'AIFF'}: + raise ParseError('Invalid AIFF header') + header_len = 8 + chunk_header = fh.read(header_len) + while len(chunk_header) == header_len: + subchunk_id = chunk_header[:4] + subchunk_size = unpack('>I', chunk_header[4:])[0] + # IFF chunks are padded to an even number of bytes + subchunk_size += subchunk_size % 2 + if subchunk_id in self._AIFF_MAPPING and self._parse_tags: + value = self._unpad( + fh.read(subchunk_size).decode('utf-8', 'replace')) + self._set_field(self._AIFF_MAPPING[subchunk_id], value) + elif subchunk_id == b'COMM' and self._parse_duration: + chunk = fh.read(subchunk_size) + channels, num_frames, bitdepth = unpack('>hLh', chunk[:8]) + self.channels, self.bitdepth = channels, bitdepth + try: + # Extended precision + exp, mantissa = unpack('>HQ', chunk[8:18]) + sr = int(mantissa * (2 ** (exp - 0x3FFF - 63))) + duration = num_frames / sr + bitrate = sr * channels * bitdepth / 1000 + self.samplerate, self.duration, self.bitrate = ( + sr, duration, bitrate) + except OverflowError: + pass + elif subchunk_id in {b'id3 ', b'ID3 '} and self._parse_tags: + # pylint: disable=protected-access + id3 = _ID3() + id3._filehandler = fh + id3._load(tags=True, duration=False, image=self._load_image) + self._update(id3) + else: # some other chunk, just skip the data + fh.seek(subchunk_size, SEEK_CUR) + chunk_header = fh.read(header_len) + self._tags_parsed = True - chunkname = chunk.getname() - if chunkname == b'NAME': - # "Name Chunk text contains the name of the sampled sound." - self.title = self._unpad(chunk.read().decode('ascii')) - elif chunkname == b'AUTH': - # "Author Chunk text contains one or more author names. An author in - # this case is the creator of a sampled sound." - self.artist = self._unpad(chunk.read().decode('ascii')) - elif chunkname == b'ANNO': - # "Annotation Chunk text contains a comment. Use of this chunk is - # discouraged within FORM AIFC." Some tools: "hold my beer" - self._set_field('comment', self._unpad(chunk.read().decode('ascii'))) - elif chunkname == b'(c) ': - # "The Copyright Chunk contains a copyright notice for the sound. text - # contains a date followed by the copyright owner. The chunk ID '[c] ' - # serves as the copyright character. " Some tools: "hold my beer" - field = chunk.read().decode('utf-8') - self._set_field('extra.copyright', field) - elif chunkname == b'ID3 ': - super(Aiff, self)._parse_tag(fh) - elif chunkname == b'SSND': - # probably the closest equivalent, but this isn't particular viable - # for AIFF - self.audio_offset = fh.tell() - chunk.skip() - else: - chunk.skip() + def _determine_duration(self, fh: BinaryIO) -> None: + if not self._tags_parsed: + self._parse_tag(fh)