diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03fc854..be76d96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ jobs: name: cargo check runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: @@ -21,11 +22,16 @@ jobs: name: cargo test runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable + - name: set up PATH on Windows + # Needed to use GNU's patch.exe instead of Strawberry Perl patch + if: runner.os == 'Windows' + run: echo "C:\Program Files\Git\usr\bin" >> $env:GITHUB_PATH - run: cargo test fmt: @@ -41,6 +47,7 @@ jobs: name: cargo clippy -- -D warnings runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: @@ -49,11 +56,28 @@ jobs: - run: rustup component add clippy - run: cargo clippy -- -D warnings + gnu-testsuite: + name: GNU test suite + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo build --release + # do not fail, the report is merely informative (at least until all tests pass reliably) + - run: ./tests/run-upstream-testsuite.sh release || true + env: + TERM: xterm + - uses: actions/upload-artifact@v4 + with: + name: test-results.json + path: tests/test-results.json + - run: ./tests/print-test-results.sh tests/test-results.json + coverage: name: Code Coverage runs-on: ${{ matrix.job.os }} strategy: - fail-fast: true + fail-fast: false matrix: job: - { os: ubuntu-latest , features: unix } @@ -83,6 +107,10 @@ jobs: - name: rust toolchain ~ install uses: dtolnay/rust-toolchain@nightly + - name: set up PATH on Windows + # Needed to use GNU's patch.exe instead of Strawberry Perl patch + if: runner.os == 'Windows' + run: echo "C:\Program Files\Git\usr\bin" >> $env:GITHUB_PATH - name: Test run: cargo test ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-fail-fast env: @@ -122,10 +150,9 @@ jobs: grcov . --output-type lcov --output-path "${COVERAGE_REPORT_FILE}" --branch --ignore build.rs --ignore "vendor/*" --ignore "/*" --ignore "[a-zA-Z]:/*" --excl-br-line "^\s*((debug_)?assert(_eq|_ne)?!|#\[derive\()" echo "report=${COVERAGE_REPORT_FILE}" >> $GITHUB_OUTPUT - name: Upload coverage results (to Codecov.io) - uses: codecov/codecov-action@v3 - # if: steps.vars.outputs.HAS_CODECOV_TOKEN + uses: codecov/codecov-action@v4 with: - # token: ${{ secrets.CODECOV_TOKEN }} + token: ${{ secrets.CODECOV_TOKEN }} file: ${{ steps.coverage.outputs.report }} ## flags: IntegrationTests, UnitTests, ${{ steps.vars.outputs.CODECOV_FLAGS }} flags: ${{ steps.vars.outputs.CODECOV_FLAGS }} diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml new file mode 100644 index 0000000..589b952 --- /dev/null +++ b/.github/workflows/fuzzing.yml @@ -0,0 +1,72 @@ +name: Fuzzing + +# spell-checker:ignore fuzzer + +on: + pull_request: + push: + branches: + - main + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + fuzz-build: + name: Build the fuzzers + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - name: Install `cargo-fuzz` + run: cargo install cargo-fuzz + - uses: Swatinem/rust-cache@v2 + with: + shared-key: "cargo-fuzz-cache-key" + cache-directories: "fuzz/target" + - name: Run `cargo-fuzz build` + run: cargo +nightly fuzz build + + fuzz-run: + needs: fuzz-build + name: Run the fuzzers + runs-on: ubuntu-latest + timeout-minutes: 5 + env: + RUN_FOR: 60 + strategy: + matrix: + test-target: + - { name: fuzz_ed, should_pass: true } + - { name: fuzz_normal, should_pass: true } + - { name: fuzz_patch, should_pass: true } + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - name: Install `cargo-fuzz` + run: cargo install cargo-fuzz + - uses: Swatinem/rust-cache@v2 + with: + shared-key: "cargo-fuzz-cache-key" + cache-directories: "fuzz/target" + - name: Restore Cached Corpus + uses: actions/cache/restore@v4 + with: + key: corpus-cache-${{ matrix.test-target.name }} + path: | + fuzz/corpus/${{ matrix.test-target.name }} + - name: Run ${{ matrix.test-target.name }} for XX seconds + shell: bash + continue-on-error: ${{ !matrix.test-target.name.should_pass }} + run: | + cargo +nightly fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0 + - name: Save Corpus Cache + uses: actions/cache/save@v4 + with: + key: corpus-cache-${{ matrix.test-target.name }} + path: | + fuzz/corpus/${{ matrix.test-target.name }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..502a5ff --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,266 @@ +# Copyright 2022-2023, axodotdev +# SPDX-License-Identifier: MIT or Apache-2.0 +# +# CI that: +# +# * checks for a Git Tag that looks like a release +# * builds artifacts with cargo-dist (archives, installers, hashes) +# * uploads those artifacts to temporary workflow zip +# * on success, uploads the artifacts to a Github Release +# +# Note that the Github Release will be created with a generated +# title/body based on your changelogs. + +name: Release + +permissions: + contents: write + +# This task will run whenever you push a git tag that looks like a version +# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. +# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where +# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION +# must be a Cargo-style SemVer Version (must have at least major.minor.patch). +# +# If PACKAGE_NAME is specified, then the announcement will be for that +# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# +# If PACKAGE_NAME isn't specified, then the announcement will be for all +# (cargo-dist-able) packages in the workspace with that version (this mode is +# intended for workspaces with only one dist-able package, or with all dist-able +# packages versioned/released in lockstep). +# +# If you push multiple tags at once, separate instances of this workflow will +# spin up, creating an independent announcement for each one. However Github +# will hard limit this to 3 tags per commit, as it will assume more tags is a +# mistake. +# +# If there's a prerelease-style suffix to the version, then the release(s) +# will be marked as a prerelease. +on: + push: + tags: + - '**[0-9]+.[0-9]+.[0-9]+*' + pull_request: + +jobs: + # Run 'cargo dist plan' (or host) to determine what tasks we need to do + plan: + runs-on: ubuntu-latest + outputs: + val: ${{ steps.plan.outputs.manifest }} + tag: ${{ !github.event.pull_request && github.ref_name || '' }} + tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} + publishing: ${{ !github.event.pull_request }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cargo-dist + # we specify bash to get pipefail; it guards against the `curl` command + # failing. otherwise `sh` won't catch that `curl` returned non-0 + shell: bash + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.12.0/cargo-dist-installer.sh | sh" + # sure would be cool if github gave us proper conditionals... + # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible + # functionality based on whether this is a pull_request, and whether it's from a fork. + # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* + # but also really annoying to build CI around when it needs secrets to work right.) + - id: plan + run: | + cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "cargo dist ran successfully" + cat plan-dist-manifest.json + echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" + - name: "Upload dist-manifest.json" + uses: actions/upload-artifact@v4 + with: + name: artifacts-plan-dist-manifest + path: plan-dist-manifest.json + + # Build and packages all the platform-specific things + build-local-artifacts: + name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) + # Let the initial task tell us to not run (currently very blunt) + needs: + - plan + if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} + strategy: + fail-fast: false + # Target platforms/runners are computed by cargo-dist in create-release. + # Each member of the matrix has the following arguments: + # + # - runner: the github runner + # - dist-args: cli flags to pass to cargo dist + # - install-dist: expression to run to install cargo-dist on the runner + # + # Typically there will be: + # - 1 "global" task that builds universal installers + # - N "local" tasks that build each platform's binaries and platform-specific installers + matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} + runs-on: ${{ matrix.runner }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: swatinem/rust-cache@v2 + - name: Install cargo-dist + run: ${{ matrix.install_dist }} + # Get the dist-manifest + - name: Fetch local artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + - name: Install dependencies + run: | + ${{ matrix.packages_install }} + - name: Build artifacts + run: | + # Actually do builds and make zips and whatnot + cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "cargo dist ran successfully" + - id: cargo-dist + name: Post-build + # We force bash here just because github makes it really hard to get values up + # to "real" actions without writing to env-vars, and writing to env-vars has + # inconsistent syntax between shell and powershell. + shell: bash + run: | + # Parse out what we just built and upload it to scratch storage + echo "paths<> "$GITHUB_OUTPUT" + jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + + cp dist-manifest.json "$BUILD_MANIFEST_NAME" + - name: "Upload artifacts" + uses: actions/upload-artifact@v4 + with: + name: artifacts-build-local-${{ join(matrix.targets, '_') }} + path: | + ${{ steps.cargo-dist.outputs.paths }} + ${{ env.BUILD_MANIFEST_NAME }} + + # Build and package all the platform-agnostic(ish) things + build-global-artifacts: + needs: + - plan + - build-local-artifacts + runs-on: "ubuntu-20.04" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cargo-dist + shell: bash + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.12.0/cargo-dist-installer.sh | sh" + # Get all the local artifacts for the global tasks to use (for e.g. checksums) + - name: Fetch local artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + - id: cargo-dist + shell: bash + run: | + cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "cargo dist ran successfully" + + # Parse out what we just built and upload it to scratch storage + echo "paths<> "$GITHUB_OUTPUT" + jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + + cp dist-manifest.json "$BUILD_MANIFEST_NAME" + - name: "Upload artifacts" + uses: actions/upload-artifact@v4 + with: + name: artifacts-build-global + path: | + ${{ steps.cargo-dist.outputs.paths }} + ${{ env.BUILD_MANIFEST_NAME }} + # Determines if we should publish/announce + host: + needs: + - plan + - build-local-artifacts + - build-global-artifacts + # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) + if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + runs-on: "ubuntu-20.04" + outputs: + val: ${{ steps.host.outputs.manifest }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cargo-dist + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.12.0/cargo-dist-installer.sh | sh" + # Fetch artifacts from scratch-storage + - name: Fetch artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + # This is a harmless no-op for Github Releases, hosting for that happens in "announce" + - id: host + shell: bash + run: | + cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + echo "artifacts uploaded and released successfully" + cat dist-manifest.json + echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" + - name: "Upload dist-manifest.json" + uses: actions/upload-artifact@v4 + with: + # Overwrite the previous copy + name: artifacts-dist-manifest + path: dist-manifest.json + + # Create a Github Release while uploading all files to it + announce: + needs: + - plan + - host + # use "always() && ..." to allow us to wait for all publish jobs while + # still allowing individual publish jobs to skip themselves (for prereleases). + # "host" however must run to completion, no skipping allowed! + if: ${{ always() && needs.host.result == 'success' }} + runs-on: "ubuntu-20.04" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: "Download Github Artifacts" + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: artifacts + merge-multiple: true + - name: Cleanup + run: | + # Remove the granular manifests + rm -f artifacts/*-dist-manifest.json + - name: Create Github Release + uses: ncipollo/release-action@v1 + with: + tag: ${{ needs.plan.outputs.tag }} + name: ${{ fromJson(needs.host.outputs.val).announcement_title }} + body: ${{ fromJson(needs.host.outputs.val).announcement_github_body }} + prerelease: ${{ fromJson(needs.host.outputs.val).announcement_is_prerelease }} + artifacts: "artifacts/*" diff --git a/Cargo.lock b/Cargo.lock index 2ae28d9..d70d307 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,18 +2,275 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "assert_cmd" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "bstr" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" + +[[package]] +name = "cc" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "diffutils" -version = "0.3.0" +version = "0.4.0" dependencies = [ + "assert_cmd", + "chrono", "diff", + "predicates", "pretty_assertions", + "regex", + "same-file", + "tempfile", + "unicode-width", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "predicates" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", ] [[package]] @@ -26,6 +283,305 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys", +] + +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98532992affa02e52709d5b4d145a3668ae10d9081eea4a7f26f719a8476f71" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7269c1442e75af9fa59290383f7665b828efc76c429cc0b7f2ecb33cf51ebae" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f70ab2cebf332b7ecbdd98900c2da5298a8c862472fb35c75fc297eabb9d89b8" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "679f235acf6b1639408c0f6db295697a19d103b0cdc88146aa1b992c580c647d" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3480ac194b55ae274a7e135c21645656825da4a7f5b6e9286291b2113c94a78b" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c46bab241c121402d1cb47d028ea3680ee2f359dcc287482dcf7fdddc73363" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc885a4332ee1afb9a1bacf11514801011725570d35675abc229ce7e3afe4d20" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e440c60457f84b0bee09208e62acc7ade264b38c4453f6312b8c9ab1613e73c" + [[package]] name = "yansi" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 984e4d9..0e8dab9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "diffutils" -version = "0.3.0" +version = "0.4.0" edition = "2021" description = "A CLI app for generating diff files" license = "MIT OR Apache-2.0" @@ -15,7 +15,32 @@ name = "diffutils" path = "src/main.rs" [dependencies] -diff = "0.1.10" +chrono = "0.4.38" +diff = "0.1.13" +regex = "1.10.4" +same-file = "1.0.6" +unicode-width = "0.1.11" [dev-dependencies] pretty_assertions = "1" +assert_cmd = "2.0.14" +predicates = "3.1.0" +tempfile = "3.10.1" + +# The profile that 'cargo dist' will build with +[profile.dist] +inherits = "release" +lto = "thin" + +# Config for 'cargo dist' +[workspace.metadata.dist] +# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.12.0" +# CI backends to support +ci = ["github"] +# The installers to generate for each app +installers = [] +# Target platforms to build apps for (Rust target-triple syntax) +targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] +# Publish jobs to run in CI +pr-run-mode = "plan" diff --git a/README.md b/README.md index 009f66a..552df09 100644 --- a/README.md +++ b/README.md @@ -1,58 +1,56 @@ -The goal of this package is to be a dropped in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) in Rust. +[![Crates.io](https://img.shields.io/crates/v/diffutils.svg)](https://crates.io/crates/diffutils) +[![Discord](https://img.shields.io/badge/discord-join-7289DA.svg?logo=discord&longCache=true&style=flat)](https://discord.gg/wQVJbvJ) +[![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/uutils/diffutils/blob/main/LICENSE) +[![dependency status](https://deps.rs/repo/github/uutils/diffutils/status.svg)](https://deps.rs/repo/github/uutils/diffutils) +[![CodeCov](https://codecov.io/gh/uutils/diffutils/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/diffutils) + +The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) in Rust. Based on the incomplete diff generator in https://github.com/rust-lang/rust/blob/master/src/tools/compiletest/src/runtest.rs, and made to be compatible with GNU's diff and patch tools. + +## Installation + +Ensure you have Rust installed on your system. You can install Rust through [rustup](https://rustup.rs/). + +Clone the repository and build the project using Cargo: + +```bash +git clone https://github.com/uutils/diffutils.git +cd diffutils +cargo build --release ``` -~/diffutils$ cargo run -- diff -u3 Cargo.lock Cargo.toml + +## Example + +```bash + +cat <fruits_old.txt +Apple +Banana +Cherry +EOF + +cat <fruits_new.txt +Apple +Fig +Cherry +EOF + +$ cargo run -- -u fruits_old.txt fruits_new.txt Finished dev [unoptimized + debuginfo] target(s) in 0.00s - Running `target/debug/diff -u3 Cargo.lock Cargo.toml` ---- Cargo.lock -+++ Cargo.toml -@@ -1,39 +1,7 @@ --# This file is automatically @generated by Cargo. --# It is not intended for manual editing. --version = 3 -- --[[package]] --name = "context-diff" --version = "0.1.0" --dependencies = [ -- "diff 0.1.12", --] -- --[[package]] --name = "diff" --version = "0.1.0" --dependencies = [ -- "context-diff", -- "normal-diff", -- "unified-diff", --] -- --[[package]] --name = "diff" --version = "0.1.12" --source = "registry+https://github.com/rust-lang/crates.io-index" --checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" -- --[[package]] --name = "normal-diff" --version = "0.1.0" --dependencies = [ -- "diff 0.1.12", --] -- --[[package]] --name = "unified-diff" --version = "0.3.0" --dependencies = [ -- "diff 0.1.12", -+[workspace] -+members = [ -+ "lib/unified-diff", -+ "lib/context-diff", -+ "lib/normal-diff", -+ "bin/diff", - ] + Running `target/debug/diffutils -u fruits_old.txt fruits_new.txt` +--- fruits_old.txt ++++ fruits_new.txt +@@ -1,3 +1,3 @@ + Apple +-Banana ++Fig + Cherry + ``` + +## License + +diffutils is licensed under the MIT and Apache Licenses - see the `LICENSE-MIT` or `LICENSE-APACHE` files for details diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index c1bebc9..650e1d4 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,7 +9,7 @@ edition = "2018" cargo-fuzz = true [dependencies] -libfuzzer-sys = "0.3" +libfuzzer-sys = "0.4" diffutils = { path = "../" } # Prevent this from interfering with workspaces diff --git a/fuzz/fuzz_targets/fuzz_ed.rs b/fuzz/fuzz_targets/fuzz_ed.rs index e46908d..7c38fda 100644 --- a/fuzz/fuzz_targets/fuzz_ed.rs +++ b/fuzz/fuzz_targets/fuzz_ed.rs @@ -1,11 +1,19 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutils::{ed_diff, normal_diff, unified_diff}; +use diffutilslib::ed_diff; +use diffutilslib::ed_diff::DiffError; +use diffutilslib::params::Params; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { + let mut output = ed_diff::diff(expected, actual, &Params::default())?; + writeln!(&mut output, "w {filename}").unwrap(); + Ok(output) +} + fuzz_target!(|x: (Vec, Vec)| { let (mut from, mut to) = x; from.push(b'\n'); @@ -30,7 +38,7 @@ fuzz_target!(|x: (Vec, Vec)| { } else { return; } - let diff = ed_diff::diff_w(&from, &to, "target/fuzz.file").unwrap(); + let diff = diff_w(&from, &to, "target/fuzz.file").unwrap(); File::create("target/fuzz.file.original") .unwrap() .write_all(&from) diff --git a/fuzz/fuzz_targets/fuzz_normal.rs b/fuzz/fuzz_targets/fuzz_normal.rs index 4e114d2..6b1e6b9 100644 --- a/fuzz/fuzz_targets/fuzz_normal.rs +++ b/fuzz/fuzz_targets/fuzz_normal.rs @@ -1,7 +1,8 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutils::{normal_diff, unified_diff}; +use diffutilslib::normal_diff; +use diffutilslib::params::Params; use std::fs::{self, File}; use std::io::Write; @@ -21,7 +22,7 @@ fuzz_target!(|x: (Vec, Vec)| { } else { return }*/ - let diff = normal_diff::diff(&from, &to); + let diff = normal_diff::diff(&from, &to, &Params::default()); File::create("target/fuzz.file.original") .unwrap() .write_all(&from) diff --git a/fuzz/fuzz_targets/fuzz_patch.rs b/fuzz/fuzz_targets/fuzz_patch.rs index c190d76..4dea4b5 100644 --- a/fuzz/fuzz_targets/fuzz_patch.rs +++ b/fuzz/fuzz_targets/fuzz_patch.rs @@ -1,7 +1,8 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutils::{normal_diff, unified_diff}; +use diffutilslib::params::Params; +use diffutilslib::unified_diff; use std::fs::{self, File}; use std::io::Write; use std::process::Command; @@ -22,10 +23,13 @@ fuzz_target!(|x: (Vec, Vec, u8)| { }*/ let diff = unified_diff::diff( &from, - "a/fuzz.file", &to, - "target/fuzz.file", - context as usize, + &Params { + from: "a/fuzz.file".into(), + to: "target/fuzz.file".into(), + context_count: context as usize, + ..Default::default() + } ); File::create("target/fuzz.file.original") .unwrap() diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} diff --git a/src/context_diff.rs b/src/context_diff.rs index 1c9d44f..e276ce5 100644 --- a/src/context_diff.rs +++ b/src/context_diff.rs @@ -6,6 +6,10 @@ use std::collections::VecDeque; use std::io::Write; +use crate::params::Params; +use crate::utils::do_write_line; +use crate::utils::get_modification_time; + #[derive(Debug, PartialEq)] pub enum DiffLine { Context(Vec), @@ -41,7 +45,12 @@ impl Mismatch { } // Produces a diff between the expected output and actual output. -fn make_diff(expected: &[u8], actual: &[u8], context_size: usize) -> Vec { +fn make_diff( + expected: &[u8], + actual: &[u8], + context_size: usize, + stop_early: bool, +) -> Vec { let mut line_number_expected = 1; let mut line_number_actual = 1; let mut context_queue: VecDeque<&[u8]> = VecDeque::with_capacity(context_size); @@ -191,6 +200,10 @@ fn make_diff(expected: &[u8], actual: &[u8], context_size: usize) -> Vec Vec Vec { - let mut output = format!("*** {expected_filename}\t\n--- {actual_filename}\t\n").into_bytes(); - let diff_results = make_diff(expected, actual, context_size); +pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { + let from_modified_time = get_modification_time(¶ms.from.to_string_lossy()); + let to_modified_time = get_modification_time(¶ms.to.to_string_lossy()); + let mut output = format!( + "*** {0}\t{1}\n--- {2}\t{3}\n", + params.from.to_string_lossy(), + from_modified_time, + params.to.to_string_lossy(), + to_modified_time + ) + .into_bytes(); + let diff_results = make_diff(expected, actual, params.context_count, params.brief); if diff_results.is_empty() { return Vec::new(); - }; + } + if params.brief { + return output; + } for result in diff_results { let mut line_number_expected = result.line_number_expected; let mut line_number_actual = result.line_number_actual; @@ -301,17 +320,20 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "- ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } } @@ -328,17 +350,20 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "+ ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } } @@ -404,8 +429,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alef", &bet, &format!("{target}/alef"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alef".into(), + to: (&format!("{target}/alef")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/ab.diff")) .unwrap() .write_all(&diff) @@ -422,7 +455,7 @@ mod tests { .stdin(File::open(&format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alef")).unwrap(); @@ -477,8 +510,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alef_", &bet, &format!("{target}/alef_"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alef_".into(), + to: (&format!("{target}/alef_")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/ab_.diff")) .unwrap() .write_all(&diff) @@ -495,7 +536,7 @@ mod tests { .stdin(File::open(&format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alef_")).unwrap(); @@ -553,8 +594,16 @@ mod tests { }; // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alefx", &bet, &format!("{target}/alefx"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alefx".into(), + to: (&format!("{target}/alefx")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/abx.diff")) .unwrap() .write_all(&diff) @@ -571,7 +620,7 @@ mod tests { .stdin(File::open(&format!("{target}/abx.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefx")).unwrap(); @@ -632,8 +681,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alefr", &bet, &format!("{target}/alefr"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alefr".into(), + to: (&format!("{target}/alefr")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/abr.diff")) .unwrap() .write_all(&diff) @@ -650,7 +707,7 @@ mod tests { .stdin(File::open(&format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefr")).unwrap(); @@ -662,4 +719,78 @@ mod tests { } } } + + #[test] + fn test_stop_early() { + use crate::assert_diff_eq; + + let from_filename = "foo"; + let from = ["a", "b", "c", ""].join("\n"); + let to_filename = "bar"; + let to = ["a", "d", "c", ""].join("\n"); + + let diff_full = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + ..Default::default() + }, + ); + + let expected_full = [ + "*** foo\tTIMESTAMP", + "--- bar\tTIMESTAMP", + "***************", + "*** 1,3 ****", + " a", + "! b", + " c", + "--- 1,3 ----", + " a", + "! d", + " c", + "", + ] + .join("\n"); + assert_diff_eq!(diff_full, expected_full); + + let diff_brief = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + brief: true, + ..Default::default() + }, + ); + + let expected_brief = ["*** foo\tTIMESTAMP", "--- bar\tTIMESTAMP", ""].join("\n"); + assert_diff_eq!(diff_brief, expected_brief); + + let nodiff_full = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + ..Default::default() + }, + ); + assert!(nodiff_full.is_empty()); + + let nodiff_brief = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + brief: true, + ..Default::default() + }, + ); + assert!(nodiff_brief.is_empty()); + } } diff --git a/src/ed_diff.rs b/src/ed_diff.rs index 2f8c0dd..304a26f 100644 --- a/src/ed_diff.rs +++ b/src/ed_diff.rs @@ -5,6 +5,9 @@ use std::io::Write; +use crate::params::Params; +use crate::utils::do_write_line; + #[derive(Debug, PartialEq)] struct Mismatch { pub line_number_expected: usize, @@ -42,7 +45,7 @@ impl Mismatch { } // Produces a diff between the expected output and actual output. -fn make_diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { +fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result, DiffError> { let mut line_number_expected = 1; let mut line_number_actual = 1; let mut results = Vec::new(); @@ -94,6 +97,10 @@ fn make_diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> } } } + if stop_early && !results.is_empty() { + // Optimization: stop analyzing the files as soon as there are any differences + return Ok(results); + } } if !mismatch.actual.is_empty() || !mismatch.expected.is_empty() { @@ -103,9 +110,13 @@ fn make_diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> Ok(results) } -pub fn diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { +pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Result, DiffError> { let mut output = Vec::new(); - let diff_results = make_diff(expected, actual)?; + let diff_results = make_diff(expected, actual, params.brief)?; + if params.brief && !diff_results.is_empty() { + write!(&mut output, "\0").unwrap(); + return Ok(output); + } let mut lines_offset = 0; for result in diff_results { let line_number_expected: isize = result.line_number_expected as isize + lines_offset; @@ -122,6 +133,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { expected_count + line_number_expected - 1 ) .unwrap(), + (1, _) => writeln!(&mut output, "{line_number_expected}c").unwrap(), _ => writeln!( &mut output, "{},{}c", @@ -136,7 +148,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { if actual == b"." { writeln!(&mut output, "..\n.\ns/.//\na").unwrap(); } else { - output.write_all(actual).unwrap(); + do_write_line(&mut output, actual, params.expand_tabs, params.tabsize).unwrap(); writeln!(&mut output).unwrap(); } } @@ -151,11 +163,20 @@ mod tests { use super::*; use pretty_assertions::assert_eq; pub fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { - let mut output = diff(expected, actual)?; + let mut output = diff(expected, actual, &Params::default())?; writeln!(&mut output, "w {filename}").unwrap(); Ok(output) } + #[test] + fn test_basic() { + let from = b"a\n"; + let to = b"b\n"; + let diff = diff(from, to, &Params::default()).unwrap(); + let expected = ["1c", "b", ".", ""].join("\n"); + assert_eq!(diff, expected.as_bytes()); + } + #[test] fn test_permutations() { let target = "target/ed-diff/"; @@ -167,9 +188,8 @@ mod tests { for &d in &[0, 1, 2] { for &e in &[0, 1, 2] { for &f in &[0, 1, 2] { - use std::fs::{self, File}; + use std::fs::File; use std::io::Write; - use std::process::Command; let mut alef = Vec::new(); let mut bet = Vec::new(); alef.write_all(if a == 0 { b"a\n" } else { b"b\n" }) @@ -205,7 +225,7 @@ mod tests { // This test diff is intentionally reversed. // We want it to turn the alef into bet. let diff = diff_w(&alef, &bet, &format!("{target}/alef")).unwrap(); - File::create("target/ab.ed") + File::create(&format!("{target}/ab.ed")) .unwrap() .write_all(&diff) .unwrap(); @@ -215,16 +235,20 @@ mod tests { fb.write_all(&bet[..]).unwrap(); let _ = fa; let _ = fb; - let output = Command::new("ed") - .arg(&format!("{target}/alef")) - .stdin(File::open("target/ab.ed").unwrap()) - .output() - .unwrap(); - assert!(output.status.success(), "{:?}", output); - //println!("{}", String::from_utf8_lossy(&output.stdout)); - //println!("{}", String::from_utf8_lossy(&output.stderr)); - let alef = fs::read(&format!("{target}/alef")).unwrap(); - assert_eq!(alef, bet); + #[cfg(not(windows))] // there's no ed on windows + { + use std::process::Command; + let output = Command::new("ed") + .arg(&format!("{target}/alef")) + .stdin(File::open(&format!("{target}/ab.ed")).unwrap()) + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = std::fs::read(&format!("{target}/alef")).unwrap(); + assert_eq!(alef, bet); + } } } } @@ -244,9 +268,8 @@ mod tests { for &d in &[0, 1, 2] { for &e in &[0, 1, 2] { for &f in &[0, 1, 2] { - use std::fs::{self, File}; + use std::fs::File; use std::io::Write; - use std::process::Command; let mut alef = Vec::new(); let mut bet = Vec::new(); alef.write_all(if a == 0 { b"\n" } else { b"b\n" }).unwrap(); @@ -275,27 +298,31 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff_w(&alef, &bet, "target/alef_").unwrap(); - File::create("target/ab_.ed") + let diff = diff_w(&alef, &bet, &format!("{target}/alef_")).unwrap(); + File::create(&format!("{target}/ab_.ed")) .unwrap() .write_all(&diff) .unwrap(); - let mut fa = File::create("target/alef_").unwrap(); + let mut fa = File::create(&format!("{target}/alef_")).unwrap(); fa.write_all(&alef[..]).unwrap(); let mut fb = File::create(&format!("{target}/bet_")).unwrap(); fb.write_all(&bet[..]).unwrap(); let _ = fa; let _ = fb; - let output = Command::new("ed") - .arg("target/alef_") - .stdin(File::open("target/ab_.ed").unwrap()) - .output() - .unwrap(); - assert!(output.status.success(), "{:?}", output); - //println!("{}", String::from_utf8_lossy(&output.stdout)); - //println!("{}", String::from_utf8_lossy(&output.stderr)); - let alef = fs::read("target/alef_").unwrap(); - assert_eq!(alef, bet); + #[cfg(not(windows))] // there's no ed on windows + { + use std::process::Command; + let output = Command::new("ed") + .arg(&format!("{target}/alef_")) + .stdin(File::open(&format!("{target}/ab_.ed")).unwrap()) + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = std::fs::read(&format!("{target}/alef_")).unwrap(); + assert_eq!(alef, bet); + } } } } @@ -315,9 +342,8 @@ mod tests { for &d in &[0, 1, 2] { for &e in &[0, 1, 2] { for &f in &[0, 1, 2] { - use std::fs::{self, File}; + use std::fs::File; use std::io::Write; - use std::process::Command; let mut alef = Vec::new(); let mut bet = Vec::new(); alef.write_all(if a == 0 { b"a\n" } else { b"f\n" }) @@ -353,7 +379,7 @@ mod tests { // This test diff is intentionally reversed. // We want it to turn the alef into bet. let diff = diff_w(&alef, &bet, &format!("{target}/alefr")).unwrap(); - File::create("target/abr.ed") + File::create(&format!("{target}/abr.ed")) .unwrap() .write_all(&diff) .unwrap(); @@ -363,16 +389,20 @@ mod tests { fb.write_all(&bet[..]).unwrap(); let _ = fa; let _ = fb; - let output = Command::new("ed") - .arg(&format!("{target}/alefr")) - .stdin(File::open("target/abr.ed").unwrap()) - .output() - .unwrap(); - assert!(output.status.success(), "{:?}", output); - //println!("{}", String::from_utf8_lossy(&output.stdout)); - //println!("{}", String::from_utf8_lossy(&output.stderr)); - let alef = fs::read(&format!("{target}/alefr")).unwrap(); - assert_eq!(alef, bet); + #[cfg(not(windows))] // there's no ed on windows + { + use std::process::Command; + let output = Command::new("ed") + .arg(&format!("{target}/alefr")) + .stdin(File::open(&format!("{target}/abr.ed")).unwrap()) + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = std::fs::read(&format!("{target}/alefr")).unwrap(); + assert_eq!(alef, bet); + } } } } @@ -380,4 +410,40 @@ mod tests { } } } + + #[test] + fn test_stop_early() { + let from = ["a", "b", "c", ""].join("\n"); + let to = ["a", "d", "c", ""].join("\n"); + + let diff_full = diff(from.as_bytes(), to.as_bytes(), &Params::default()).unwrap(); + let expected_full = ["2c", "d", ".", ""].join("\n"); + assert_eq!(diff_full, expected_full.as_bytes()); + + let diff_brief = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + brief: true, + ..Default::default() + }, + ) + .unwrap(); + let expected_brief = "\0".as_bytes(); + assert_eq!(diff_brief, expected_brief); + + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), &Params::default()).unwrap(); + assert!(nodiff_full.is_empty()); + + let nodiff_brief = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + brief: true, + ..Default::default() + }, + ) + .unwrap(); + assert!(nodiff_brief.is_empty()); + } } diff --git a/src/lib.rs b/src/lib.rs index a78b64d..0bb911b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,10 @@ pub mod context_diff; pub mod ed_diff; +pub mod macros; pub mod normal_diff; +pub mod params; pub mod unified_diff; +pub mod utils; // Re-export the public functions/types you need pub use context_diff::diff as context_diff; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..90a4eaa --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,25 @@ +// asserts equality of the actual diff and expected diff +// considering datetime varitations +// +// It replaces the modification time in the actual diff +// with placeholder "TIMESTAMP" and then asserts the equality +// +// For eg. +// let brief = "*** fruits_old.txt\t2024-03-24 23:43:05.189597645 +0530\n +// --- fruits_new.txt\t2024-03-24 23:35:08.922581904 +0530\n"; +// +// replaced = "*** fruits_old.txt\tTIMESTAMP\n +// --- fruits_new.txt\tTIMESTAMP\n"; +#[macro_export] +macro_rules! assert_diff_eq { + ($actual:expr, $expected:expr) => {{ + use regex::Regex; + use std::str; + + let diff = str::from_utf8(&$actual).unwrap(); + let re = Regex::new(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ [+-]\d{4}").unwrap(); + let actual = re.replacen(diff, 2, "TIMESTAMP"); + + assert_eq!(actual, $expected); + }}; +} diff --git a/src/main.rs b/src/main.rs index eaa7d55..f1bd1e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,58 +3,94 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::params::{parse_params, Format, Params}; +use crate::params::{parse_params, Format}; use std::env; - +use std::ffi::OsString; use std::fs; -use std::io::{self, Write}; +use std::io::{self, Read, Write}; +use std::process::{exit, ExitCode}; mod context_diff; mod ed_diff; +mod macros; mod normal_diff; mod params; mod unified_diff; +mod utils; -fn main() -> Result<(), String> { +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +fn main() -> ExitCode { let opts = env::args_os(); - let Params { - from, - to, - context_count, - format, - } = parse_params(opts)?; + let params = parse_params(opts).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }); + // if from and to are the same file, no need to perform any comparison + let maybe_report_identical_files = || { + if params.report_identical_files { + println!( + "Files {} and {} are identical", + params.from.to_string_lossy(), + params.to.to_string_lossy(), + ); + } + }; + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + maybe_report_identical_files(); + return ExitCode::SUCCESS; + } // read files - let from_content = match fs::read(&from) { + fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } + } + let from_content = match read_file_contents(¶ms.from) { Ok(from_content) => from_content, Err(e) => { - return Err(format!("Failed to read from-file: {e}")); + eprintln!("Failed to read from-file: {e}"); + return ExitCode::from(2); } }; - let to_content = match fs::read(&to) { + let to_content = match read_file_contents(¶ms.to) { Ok(to_content) => to_content, Err(e) => { - return Err(format!("Failed to read from-file: {e}")); + eprintln!("Failed to read to-file: {e}"); + return ExitCode::from(2); } }; // run diff - let result: Vec = match format { - Format::Normal => normal_diff::diff(&from_content, &to_content), - Format::Unified => unified_diff::diff( - &from_content, - &from.to_string_lossy(), - &to_content, - &to.to_string_lossy(), - context_count, - ), - Format::Context => context_diff::diff( - &from_content, - &from.to_string_lossy(), - &to_content, - &to.to_string_lossy(), - context_count, - ), - Format::Ed => ed_diff::diff(&from_content, &to_content)?, + let result: Vec = match params.format { + Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), + Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), + Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), + Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }), }; - io::stdout().write_all(&result).unwrap(); - Ok(()) + if params.brief && !result.is_empty() { + println!( + "Files {} and {} differ", + params.from.to_string_lossy(), + params.to.to_string_lossy() + ); + } else { + io::stdout().write_all(&result).unwrap(); + } + if result.is_empty() { + maybe_report_identical_files(); + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } } diff --git a/src/normal_diff.rs b/src/normal_diff.rs index eda9d8c..d6f8297 100644 --- a/src/normal_diff.rs +++ b/src/normal_diff.rs @@ -5,6 +5,9 @@ use std::io::Write; +use crate::params::Params; +use crate::utils::do_write_line; + #[derive(Debug, PartialEq)] struct Mismatch { pub line_number_expected: usize, @@ -29,7 +32,7 @@ impl Mismatch { } // Produces a diff between the expected output and actual output. -fn make_diff(expected: &[u8], actual: &[u8]) -> Vec { +fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec { let mut line_number_expected = 1; let mut line_number_actual = 1; let mut results = Vec::new(); @@ -100,6 +103,10 @@ fn make_diff(expected: &[u8], actual: &[u8]) -> Vec { } } } + if stop_early && !results.is_empty() { + // Optimization: stop analyzing the files as soon as there are any differences + return results; + } } if !mismatch.actual.is_empty() || !mismatch.expected.is_empty() { @@ -110,9 +117,15 @@ fn make_diff(expected: &[u8], actual: &[u8]) -> Vec { } #[must_use] -pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { +pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { + // See https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Normal.html + // for details on the syntax of the normal format. let mut output = Vec::new(); - let diff_results = make_diff(expected, actual); + let diff_results = make_diff(expected, actual, params.brief); + if params.brief && !diff_results.is_empty() { + write!(&mut output, "\0").unwrap(); + return output; + } for result in diff_results { let line_number_expected = result.line_number_expected; let line_number_actual = result.line_number_actual; @@ -121,6 +134,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { match (expected_count, actual_count) { (0, 0) => unreachable!(), (0, _) => writeln!( + // 'a' stands for "Add lines" &mut output, "{}a{},{}", line_number_expected - 1, @@ -129,6 +143,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { ) .unwrap(), (_, 0) => writeln!( + // 'd' stands for "Delete lines" &mut output, "{},{}d{}", line_number_expected, @@ -136,7 +151,33 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { line_number_actual - 1 ) .unwrap(), + (1, 1) => writeln!( + // 'c' stands for "Change lines" + // exactly one line replaced by one line + &mut output, + "{line_number_expected}c{line_number_actual}" + ) + .unwrap(), + (1, _) => writeln!( + // one line replaced by multiple lines + &mut output, + "{}c{},{}", + line_number_expected, + line_number_actual, + actual_count + line_number_actual - 1 + ) + .unwrap(), + (_, 1) => writeln!( + // multiple lines replaced by one line + &mut output, + "{},{}c{}", + line_number_expected, + expected_count + line_number_expected - 1, + line_number_actual + ) + .unwrap(), _ => writeln!( + // general case: multiple lines replaced by multiple lines &mut output, "{},{}c{},{}", line_number_expected, @@ -148,7 +189,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { } for expected in &result.expected { write!(&mut output, "< ").unwrap(); - output.write_all(expected).unwrap(); + do_write_line(&mut output, expected, params.expand_tabs, params.tabsize).unwrap(); writeln!(&mut output).unwrap(); } if result.expected_missing_nl { @@ -159,7 +200,7 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { } for actual in &result.actual { write!(&mut output, "> ").unwrap(); - output.write_all(actual).unwrap(); + do_write_line(&mut output, actual, params.expand_tabs, params.tabsize).unwrap(); writeln!(&mut output).unwrap(); } if result.actual_missing_nl { @@ -173,6 +214,18 @@ pub fn diff(expected: &[u8], actual: &[u8]) -> Vec { mod tests { use super::*; use pretty_assertions::assert_eq; + + #[test] + fn test_basic() { + let mut a = Vec::new(); + a.write_all(b"a\n").unwrap(); + let mut b = Vec::new(); + b.write_all(b"b\n").unwrap(); + let diff = diff(&a, &b, &Params::default()); + let expected = b"1c1\n< a\n---\n> b\n".to_vec(); + assert_eq!(diff, expected); + } + #[test] fn test_permutations() { let target = "target/normal-diff/"; @@ -221,7 +274,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet); + let diff = diff(&alef, &bet, &Params::default()); File::create(&format!("{target}/ab.diff")) .unwrap() .write_all(&diff) @@ -238,7 +291,7 @@ mod tests { .stdin(File::open(&format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alef")).unwrap(); @@ -313,7 +366,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet); + let diff = diff(&alef, &bet, &Params::default()); File::create(&format!("{target}/abn.diff")) .unwrap() .write_all(&diff) @@ -331,7 +384,7 @@ mod tests { .stdin(File::open(&format!("{target}/abn.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefn")).unwrap(); @@ -387,7 +440,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet); + let diff = diff(&alef, &bet, &Params::default()); File::create(&format!("{target}/ab_.diff")) .unwrap() .write_all(&diff) @@ -404,7 +457,7 @@ mod tests { .stdin(File::open(&format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alef_")).unwrap(); @@ -465,7 +518,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet); + let diff = diff(&alef, &bet, &Params::default()); File::create(&format!("{target}/abr.diff")) .unwrap() .write_all(&diff) @@ -482,7 +535,7 @@ mod tests { .stdin(File::open(&format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefr")).unwrap(); @@ -494,4 +547,38 @@ mod tests { } } } + + #[test] + fn test_stop_early() { + let from = ["a", "b", "c"].join("\n"); + let to = ["a", "d", "c"].join("\n"); + + let diff_full = diff(from.as_bytes(), to.as_bytes(), &Params::default()); + let expected_full = ["2c2", "< b", "---", "> d", ""].join("\n"); + assert_eq!(diff_full, expected_full.as_bytes()); + + let diff_brief = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + brief: true, + ..Default::default() + }, + ); + let expected_brief = "\0".as_bytes(); + assert_eq!(diff_brief, expected_brief); + + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), &Params::default()); + assert!(nodiff_full.is_empty()); + + let nodiff_brief = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + brief: true, + ..Default::default() + }, + ); + assert!(nodiff_brief.is_empty()); + } } diff --git a/src/params.rs b/src/params.rs index b118be7..7e9cc78 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,7 +1,10 @@ use std::ffi::{OsStr, OsString}; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +use regex::Regex; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum Format { + #[default] Normal, Unified, Context, @@ -25,6 +28,25 @@ pub struct Params { pub to: OsString, pub format: Format, pub context_count: usize, + pub report_identical_files: bool, + pub brief: bool, + pub expand_tabs: bool, + pub tabsize: usize, +} + +impl Default for Params { + fn default() -> Self { + Self { + from: OsString::default(), + to: OsString::default(), + format: Format::default(), + context_count: 3, + report_identical_files: false, + brief: false, + expand_tabs: false, + tabsize: 8, + } + } } pub fn parse_params>(opts: I) -> Result { @@ -34,24 +56,53 @@ pub fn parse_params>(opts: I) -> Result ".to_string()); }; + let mut params = Params::default(); let mut from = None; let mut to = None; let mut format = None; - let mut context_count = 3; + let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); while let Some(param) = opts.next() { if param == "--" { break; } if param == "-" { if from.is_none() { - from = Some(OsString::from("/dev/stdin")); + from = Some(param); } else if to.is_none() { - to = Some(OsString::from("/dev/stdin")); + to = Some(param); } else { return Err(format!("Usage: {} ", exe.to_string_lossy())); } continue; } + if param == "-s" || param == "--report-identical-files" { + params.report_identical_files = true; + continue; + } + if param == "-q" || param == "--brief" { + params.brief = true; + continue; + } + if param == "-t" || param == "--expand-tabs" { + params.expand_tabs = true; + continue; + } + if tabsize_re.is_match(param.to_string_lossy().as_ref()) { + // Because param matches the regular expression, + // it is safe to assume it is valid UTF-8. + let param = param.into_string().unwrap(); + let tabsize_str = tabsize_re + .captures(param.as_str()) + .unwrap() + .name("num") + .unwrap() + .as_str(); + params.tabsize = match tabsize_str.parse::() { + Ok(num) => num, + Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")), + }; + continue; + } let p = osstr_bytes(¶m); if p.first() == Some(&b'-') && p.get(1) != Some(&b'-') { let mut bit = p[1..].iter().copied().peekable(); @@ -60,10 +111,10 @@ pub fn parse_params>(opts: I) -> Result { - context_count = (b - b'0') as usize; + params.context_count = (b - b'0') as usize; while let Some(b'0'..=b'9') = bit.peek() { - context_count *= 10; - context_count += (bit.next().unwrap() - b'0') as usize; + params.context_count *= 10; + params.context_count += (bit.next().unwrap() - b'0') as usize; } } b'c' => { @@ -97,7 +148,7 @@ pub fn parse_params>(opts: I) -> Result>(opts: I) -> Result ", exe.to_string_lossy())); } } - let from = if let Some(from) = from { + params.from = if let Some(from) = from { from } else if let Some(param) = opts.next() { param } else { return Err(format!("Usage: {} ", exe.to_string_lossy())); }; - let to = if let Some(to) = to { + params.to = if let Some(to) = to { to } else if let Some(param) = opts.next() { param } else { return Err(format!("Usage: {} ", exe.to_string_lossy())); }; - let format = format.unwrap_or(Format::Normal); - Ok(Params { - from, - to, - format, - context_count, - }) + params.format = format.unwrap_or(Format::default()); + Ok(params) } #[cfg(test)] @@ -148,8 +194,7 @@ mod tests { Ok(Params { from: os("foo"), to: os("bar"), - format: Format::Normal, - context_count: 3, + ..Default::default() }), parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) ); @@ -161,7 +206,7 @@ mod tests { from: os("foo"), to: os("bar"), format: Format::Ed, - context_count: 3, + ..Default::default() }), parse_params([os("diff"), os("-e"), os("foo"), os("bar")].iter().cloned()) ); @@ -174,6 +219,7 @@ mod tests { to: os("bar"), format: Format::Unified, context_count: 54, + ..Default::default() }), parse_params( [os("diff"), os("-u54"), os("foo"), os("bar")] @@ -187,6 +233,7 @@ mod tests { to: os("bar"), format: Format::Unified, context_count: 54, + ..Default::default() }), parse_params( [os("diff"), os("-U54"), os("foo"), os("bar")] @@ -200,6 +247,7 @@ mod tests { to: os("bar"), format: Format::Unified, context_count: 54, + ..Default::default() }), parse_params( [os("diff"), os("-U"), os("54"), os("foo"), os("bar")] @@ -213,6 +261,7 @@ mod tests { to: os("bar"), format: Format::Context, context_count: 54, + ..Default::default() }), parse_params( [os("diff"), os("-c54"), os("foo"), os("bar")] @@ -222,18 +271,226 @@ mod tests { ); } #[test] + fn report_identical_files() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + report_identical_files: true, + ..Default::default() + }), + parse_params([os("diff"), os("-s"), os("foo"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + report_identical_files: true, + ..Default::default() + }), + parse_params( + [ + os("diff"), + os("--report-identical-files"), + os("foo"), + os("bar"), + ] + .iter() + .cloned() + ) + ); + } + #[test] + fn brief() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + brief: true, + ..Default::default() + }), + parse_params([os("diff"), os("-q"), os("foo"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + brief: true, + ..Default::default() + }), + parse_params( + [os("diff"), os("--brief"), os("foo"), os("bar"),] + .iter() + .cloned() + ) + ); + } + #[test] + fn expand_tabs() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + ); + for option in ["-t", "--expand-tabs"] { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + expand_tabs: true, + ..Default::default() + }), + parse_params( + [os("diff"), os(option), os("foo"), os("bar")] + .iter() + .cloned() + ) + ); + } + } + #[test] + fn tabsize() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + tabsize: 0, + ..Default::default() + }), + parse_params( + [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] + .iter() + .cloned() + ) + ); + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + tabsize: 42, + ..Default::default() + }), + parse_params( + [os("diff"), os("--tabsize=42"), os("foo"), os("bar")] + .iter() + .cloned() + ) + ); + assert!(parse_params( + [os("diff"), os("--tabsize"), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize="), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + assert!(parse_params( + [ + os("diff"), + os("--tabsize=92233720368547758088"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + ) + .is_err()); + } + #[test] fn double_dash() { assert_eq!( Ok(Params { from: os("-g"), to: os("-h"), - format: Format::Normal, - context_count: 3, + ..Default::default() }), parse_params([os("diff"), os("--"), os("-g"), os("-h")].iter().cloned()) ); } #[test] + fn default_to_stdin() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("-"), + ..Default::default() + }), + parse_params([os("diff"), os("foo"), os("-")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("-"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("diff"), os("-"), os("bar")].iter().cloned()) + ); + assert_eq!( + Ok(Params { + from: os("-"), + to: os("-"), + ..Default::default() + }), + parse_params([os("diff"), os("-"), os("-")].iter().cloned()) + ); + assert!(parse_params([os("diff"), os("foo"), os("bar"), os("-")].iter().cloned()).is_err()); + assert!(parse_params([os("diff"), os("-"), os("-"), os("-")].iter().cloned()).is_err()); + } + #[test] + fn missing_arguments() { + assert!(parse_params([os("diff")].iter().cloned()).is_err()); + assert!(parse_params([os("diff"), os("foo")].iter().cloned()).is_err()); + } + #[test] fn unknown_argument() { assert!( parse_params([os("diff"), os("-g"), os("foo"), os("bar")].iter().cloned()).is_err() @@ -245,4 +502,15 @@ mod tests { fn empty() { assert!(parse_params([].iter().cloned()).is_err()); } + #[test] + fn conflicting_output_styles() { + for (arg1, arg2) in [("-u", "-c"), ("-u", "-e"), ("-c", "-u"), ("-c", "-U42")] { + assert!(parse_params( + [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] + .iter() + .cloned() + ) + .is_err()); + } + } } diff --git a/src/unified_diff.rs b/src/unified_diff.rs index 2be092b..11299d7 100644 --- a/src/unified_diff.rs +++ b/src/unified_diff.rs @@ -6,6 +6,10 @@ use std::collections::VecDeque; use std::io::Write; +use crate::params::Params; +use crate::utils::do_write_line; +use crate::utils::get_modification_time; + #[derive(Debug, PartialEq)] pub enum DiffLine { Context(Vec), @@ -32,7 +36,12 @@ impl Mismatch { } // Produces a diff between the expected output and actual output. -fn make_diff(expected: &[u8], actual: &[u8], context_size: usize) -> Vec { +fn make_diff( + expected: &[u8], + actual: &[u8], + context_size: usize, + stop_early: bool, +) -> Vec { let mut line_number_expected = 1; let mut line_number_actual = 1; let mut context_queue: VecDeque<&[u8]> = VecDeque::with_capacity(context_size); @@ -180,6 +189,10 @@ fn make_diff(expected: &[u8], actual: &[u8], context_size: usize) -> Vec Vec Vec { - let mut output = format!("--- {expected_filename}\t\n+++ {actual_filename}\t\n").into_bytes(); - let diff_results = make_diff(expected, actual, context_size); +pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { + let from_modified_time = get_modification_time(¶ms.from.to_string_lossy()); + let to_modified_time = get_modification_time(¶ms.to.to_string_lossy()); + let mut output = format!( + "--- {0}\t{1}\n+++ {2}\t{3}\n", + params.from.to_string_lossy(), + from_modified_time, + params.to.to_string_lossy(), + to_modified_time + ) + .into_bytes(); + let diff_results = make_diff(expected, actual, params.context_count, params.brief); if diff_results.is_empty() { return Vec::new(); - }; + } + if params.brief { + return output; + } for result in diff_results { let mut line_number_expected = result.line_number_expected; let mut line_number_actual = result.line_number_actual; @@ -358,17 +377,20 @@ pub fn diff( match line { DiffLine::Expected(e) => { write!(output, "-").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Context(c) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&c).expect("write to Vec is infallible"); + do_write_line(&mut output, &c, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Actual(r) => { write!(output, "+",).expect("write to Vec is infallible"); - output.write_all(&r).expect("write to Vec is infallible"); + do_write_line(&mut output, &r, params.expand_tabs, params.tabsize) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::MissingNL => { @@ -434,8 +456,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alef", &bet, &format!("{target}/alef"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alef".into(), + to: (&format!("{target}/alef")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/ab.diff")) .unwrap() .write_all(&diff) @@ -469,7 +499,7 @@ mod tests { .unwrap(); println!("{}", String::from_utf8_lossy(&output.stdout)); println!("{}", String::from_utf8_lossy(&output.stderr)); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); let alef = fs::read(&format!("{target}/alef")).unwrap(); assert_eq!(alef, bet); } @@ -542,8 +572,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alefn", &bet, &format!("{target}/alefn"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alefn".into(), + to: (&format!("{target}/alefn")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/abn.diff")) .unwrap() .write_all(&diff) @@ -559,7 +597,7 @@ mod tests { .stdin(File::open(&format!("{target}/abn.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefn")).unwrap(); @@ -630,8 +668,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alef_", &bet, &format!("{target}/alef_"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alef_".into(), + to: (&format!("{target}/alef_")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/ab_.diff")) .unwrap() .write_all(&diff) @@ -647,7 +693,7 @@ mod tests { .stdin(File::open(&format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alef_")).unwrap(); @@ -703,8 +749,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alefx", &bet, &format!("{target}/alefx"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alefx".into(), + to: (&format!("{target}/alefx")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/abx.diff")) .unwrap() .write_all(&diff) @@ -720,7 +774,7 @@ mod tests { .stdin(File::open(&format!("{target}/abx.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefx")).unwrap(); @@ -781,8 +835,16 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = - diff(&alef, "a/alefr", &bet, &format!("{target}/alefr"), 2); + let diff = diff( + &alef, + &bet, + &Params { + from: "a/alefr".into(), + to: (&format!("{target}/alefr")).into(), + context_count: 2, + ..Default::default() + }, + ); File::create(&format!("{target}/abr.diff")) .unwrap() .write_all(&diff) @@ -798,7 +860,7 @@ mod tests { .stdin(File::open(&format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); - assert!(output.status.success(), "{:?}", output); + assert!(output.status.success(), "{output:?}"); //println!("{}", String::from_utf8_lossy(&output.stdout)); //println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(&format!("{target}/alefr")).unwrap(); @@ -810,4 +872,74 @@ mod tests { } } } + + #[test] + fn test_stop_early() { + use crate::assert_diff_eq; + + let from_filename = "foo"; + let from = ["a", "b", "c", ""].join("\n"); + let to_filename = "bar"; + let to = ["a", "d", "c", ""].join("\n"); + + let diff_full = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + ..Default::default() + }, + ); + + let expected_full = [ + "--- foo\tTIMESTAMP", + "+++ bar\tTIMESTAMP", + "@@ -1,3 +1,3 @@", + " a", + "-b", + "+d", + " c", + "", + ] + .join("\n"); + assert_diff_eq!(diff_full, expected_full); + + let diff_brief = diff( + from.as_bytes(), + to.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + brief: true, + ..Default::default() + }, + ); + + let expected_brief = ["--- foo\tTIMESTAMP", "+++ bar\tTIMESTAMP", ""].join("\n"); + assert_diff_eq!(diff_brief, expected_brief); + + let nodiff_full = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + ..Default::default() + }, + ); + assert!(nodiff_full.is_empty()); + + let nodiff_brief = diff( + from.as_bytes(), + from.as_bytes(), + &Params { + from: from_filename.into(), + to: to_filename.into(), + brief: true, + ..Default::default() + }, + ); + assert!(nodiff_brief.is_empty()); + } } diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..561f2b9 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,179 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use std::io::Write; + +use unicode_width::UnicodeWidthStr; + +/// Replace tabs by spaces in the input line. +/// Correctly handle multi-bytes characters. +/// This assumes that line does not contain any line breaks (if it does, the result is undefined). +#[must_use] +pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { + let tab = b'\t'; + let ntabs = line.iter().filter(|c| **c == tab).count(); + if ntabs == 0 { + return line.to_vec(); + } + let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut offset = 0; + + let mut iter = line.split(|c| *c == tab).peekable(); + while let Some(chunk) = iter.next() { + match String::from_utf8(chunk.to_vec()) { + Ok(s) => offset += UnicodeWidthStr::width(s.as_str()), + Err(_) => offset += chunk.len(), + } + result.extend_from_slice(chunk); + if iter.peek().is_some() { + result.resize(result.len() + tabsize - offset % tabsize, b' '); + offset = 0; + } + } + + result +} + +/// Write a single line to an output stream, expanding tabs to space if necessary. +/// This assumes that line does not contain any line breaks +/// (if it does and tabs are to be expanded to spaces, the result is undefined). +pub fn do_write_line( + output: &mut Vec, + line: &[u8], + expand_tabs: bool, + tabsize: usize, +) -> std::io::Result<()> { + if expand_tabs { + output.write_all(do_expand_tabs(line, tabsize).as_slice()) + } else { + output.write_all(line) + } +} + +/// Retrieves the modification time of the input file specified by file path +/// If an error occurs, it returns the current system time +pub fn get_modification_time(file_path: &str) -> String { + use chrono::{DateTime, Local}; + use std::fs; + use std::time::SystemTime; + + let modification_time: SystemTime = fs::metadata(file_path) + .and_then(|m| m.modified()) + .unwrap_or(SystemTime::now()); + + let modification_time: DateTime = modification_time.into(); + let modification_time: String = modification_time + .format("%Y-%m-%d %H:%M:%S%.9f %z") + .to_string(); + + modification_time +} + +#[cfg(test)] +mod tests { + use super::*; + + mod expand_tabs { + use super::*; + use pretty_assertions::assert_eq; + + fn assert_tab_expansion(line: &str, tabsize: usize, expected: &str) { + assert_eq!( + do_expand_tabs(line.as_bytes(), tabsize), + expected.as_bytes() + ); + } + + #[test] + fn basics() { + assert_tab_expansion("foo barr baz", 8, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 8, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 5, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 2, "foo barr baz"); + } + + #[test] + fn multibyte_chars() { + assert_tab_expansion("foo\tépée\tbaz", 8, "foo épée baz"); + assert_tab_expansion("foo\t😉\tbaz", 5, "foo 😉 baz"); + + // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining + // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms + // it is displayed as a single emoji and should have a print size of 2 columns, + // but terminal emulators tend to not support this, and display the two emojis + // side by side, thus accounting for a print size of 4 columns. + assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); + } + + #[test] + fn invalid_utf8() { + // [240, 240, 152, 137] is an invalid UTF-8 sequence, so it is handled as 4 bytes + assert_eq!( + do_expand_tabs(&[240, 240, 152, 137, 9, 102, 111, 111], 8), + &[240, 240, 152, 137, 32, 32, 32, 32, 102, 111, 111] + ); + } + } + + mod write_line { + use super::*; + use pretty_assertions::assert_eq; + + fn assert_line_written(line: &str, expand_tabs: bool, tabsize: usize, expected: &str) { + let mut output: Vec = Vec::new(); + assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs, tabsize).is_ok()); + assert_eq!(output, expected.as_bytes()); + } + + #[test] + fn basics() { + assert_line_written("foo bar baz", false, 8, "foo bar baz"); + assert_line_written("foo bar\tbaz", false, 8, "foo bar\tbaz"); + assert_line_written("foo bar\tbaz", true, 8, "foo bar baz"); + } + } + + mod modification_time { + use super::*; + + #[test] + fn set_time() { + use chrono::{DateTime, Local}; + use std::time::SystemTime; + use tempfile::NamedTempFile; + + let temp = NamedTempFile::new().unwrap(); + // set file modification time equal to current time + let current = SystemTime::now(); + let _ = temp.as_file().set_modified(current); + + // format current time + let current: DateTime = current.into(); + let current: String = current.format("%Y-%m-%d %H:%M:%S%.9f %z").to_string(); + + // verify + assert_eq!( + current, + get_modification_time(&temp.path().to_string_lossy()) + ); + } + + #[test] + fn invalid_file() { + use chrono::{DateTime, Local}; + use std::time::SystemTime; + + let invalid_file = "target/utils/invalid-file"; + + // store current time before calling `get_modification_time` + // Because the file is invalid, it will return SystemTime::now() + // which will be greater than previously saved time + let current_time: DateTime = SystemTime::now().into(); + let m_time: DateTime = get_modification_time(invalid_file).parse().unwrap(); + + assert!(m_time > current_time); + } + } +} diff --git a/tests/integration.rs b/tests/integration.rs new file mode 100644 index 0000000..853ba4d --- /dev/null +++ b/tests/integration.rs @@ -0,0 +1,236 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use assert_cmd::cmd::Command; +use diffutilslib::assert_diff_eq; +use predicates::prelude::*; +use std::io::Write; +use tempfile::NamedTempFile; + +// Integration tests for the diffutils command + +#[test] +fn unknown_param() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Usage: ")); + Ok(()) +} + +#[test] +fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("foo.txt").arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Failed to read from-file")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(file.path()).arg("foo.txt"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Failed to read to-file")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("foo.txt").arg("foo.txt"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Failed to read from-file")); + + Ok(()) +} + +#[test] +fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) +} + +#[test] +fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) +} + +#[test] +fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) +} + +#[test] +fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); + } + Ok(()) +} + +#[test] +fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("-e").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("No newline at end of file")); + Ok(()) +} + +#[test] +fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("-u").arg("-").arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) +} diff --git a/tests/print-test-results.sh b/tests/print-test-results.sh new file mode 100755 index 0000000..23136d6 --- /dev/null +++ b/tests/print-test-results.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Print the test results written to a JSON file by run-upstream-testsuite.sh +# in a markdown format. The printout includes the name of the test, the result, +# the URL to the test script and the contents of stdout and stderr. +# It can be used verbatim as the description when filing an issue for a test +# with an unexpected result. + +json="test-results.json" +[[ -n $1 ]] && json="$1" + +codeblock () { echo -e "\`\`\`\n$1\n\`\`\`"; } + +jq -c '.tests[]' "$json" | while read -r test +do + name=$(echo "$test" | jq -r '.test') + echo "# test: $name" + result=$(echo "$test" | jq -r '.result') + echo "result: $result" + url=$(echo "$test" | jq -r '.url') + echo "url: $url" + if [[ "$result" != "SKIP" ]] + then + stdout=$(echo "$test" | jq -r '.stdout' | base64 -d) + if [[ -n "$stdout" ]] + then + echo "## stdout" + codeblock "$stdout" + fi + stderr=$(echo "$test" | jq -r '.stderr' | base64 -d) + if [[ -n "$stderr" ]] + then + echo "## stderr" + codeblock "$stderr" + fi + fi + echo "" +done diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh new file mode 100755 index 0000000..cb59834 --- /dev/null +++ b/tests/run-upstream-testsuite.sh @@ -0,0 +1,141 @@ +#!/bin/bash + +# Run the GNU upstream test suite for diffutils against a local build of the +# Rust implementation, print out a summary of the test results, and writes a +# JSON file ('test-results.json') containing detailed information about the +# test run. + +# The JSON file contains metadata about the test run, and for each test the +# result as well as the contents of stdout, stderr, and of all the files +# written by the test script, if any (excluding subdirectories). + +# The script takes a shortcut to fetch only the test suite from the upstream +# repository and carefully avoids running the autotools machinery which is +# time-consuming and resource-intensive, and doesn't offer the option to not +# build the upstream binaries. As a consequence, the environment in which the +# tests are run might not match exactly that used when the upstream tests are +# run through the autotools. + +# By default it expects a release build of the diffutils binary, but a +# different build profile can be specified as an argument +# (e.g. 'dev' or 'test'). +# Unless overridden by the $TESTS environment variable, all tests in the test +# suite will be run. Tests targeting a command that is not yet implemented +# (e.g. cmp, diff3 or sdiff) are skipped. + +scriptpath=$(dirname "$(readlink -f "$0")") +rev=$(git rev-parse HEAD) + +# Allow passing a specific profile as parameter (default to "release") +profile="release" +[[ -n $1 ]] && profile="$1" + +# Verify that the diffutils binary was built for the requested profile +binary="$scriptpath/../target/$profile/diffutils" +if [[ ! -x "$binary" ]] +then + echo "Missing build for profile $profile" + exit 1 +fi + +# Work in a temporary directory +tempdir=$(mktemp -d) +cd "$tempdir" + +# Check out the upstream test suite +gitserver="https://git.savannah.gnu.org" +testsuite="$gitserver/git/diffutils.git" +echo "Fetching upstream test suite from $testsuite" +git clone -n --depth=1 --filter=tree:0 "$testsuite" &> /dev/null +cd diffutils +git sparse-checkout set --no-cone tests &> /dev/null +git checkout &> /dev/null +upstreamrev=$(git rev-parse HEAD) + +# Ensure that calling `diff` invokes the built `diffutils` binary instead of +# the upstream `diff` binary that is most likely installed on the system +mkdir src +cd src +ln -s "$binary" diff +cd ../tests + +if [[ -n "$TESTS" ]] +then + tests="$TESTS" +else + # Get a list of all upstream tests (default if $TESTS isn't set) + echo -e '\n\nprinttests:\n\t@echo "${TESTS}"' >> Makefile.am + tests=$(make -f Makefile.am printtests) +fi +total=$(echo "$tests" | wc -w) +echo "Running $total tests" +export LC_ALL=C +export KEEP=yes +exitcode=0 +timestamp=$(date -Iseconds) +urlroot="$gitserver/cgit/diffutils.git/tree/tests/" +passed=0 +failed=0 +skipped=0 +normal="$(tput sgr0)" +for test in $tests +do + result="FAIL" + url="$urlroot$test?id=$upstreamrev" + # Run only the tests that invoke `diff`, + # because other binaries aren't implemented yet + if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" + then + sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 + json+="{\"test\":\"$test\",\"result\":\"$result\"," + json+="\"url\":\"$url\"," + json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\"," + json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\"," + json+="\"files\":{" + cd gt-$test.* + # Note: this doesn't include the contents of subdirectories, + # but there isn't much value added in doing so + for file in * + do + [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\"," + done + json="${json%,}}}," + cd - > /dev/null + [[ "$result" = "PASS" ]] && (( passed++ )) + [[ "$result" = "FAIL" ]] && (( failed++ )) + else + result="SKIP" + (( skipped++ )) + json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"}," + fi + color=2 # green + [[ "$result" = "FAIL" ]] && color=1 # red + [[ "$result" = "SKIP" ]] && color=3 # yellow + printf " %-40s $(tput setaf $color)$result$(tput sgr0)\n" "$test" +done +echo "" +echo -n "Summary: TOTAL: $total / " +echo -n "$(tput setaf 2)PASS$normal: $passed / " +echo -n "$(tput setaf 1)FAIL$normal: $failed / " +echo "$(tput setaf 3)SKIP$normal: $skipped" +echo "" + +json="\"tests\":[${json%,}]" +metadata="\"timestamp\":\"$timestamp\"," +metadata+="\"revision\":\"$rev\"," +metadata+="\"upstream-revision\":\"$upstreamrev\"," +if [[ -n "$GITHUB_ACTIONS" ]] +then + metadata+="\"branch\":\"$GITHUB_REF\"," +fi +json="{$metadata $json}" + +# Clean up +cd "$scriptpath" +rm -rf "$tempdir" + +resultsfile="test-results.json" +echo "$json" | jq > "$resultsfile" +echo "Results written to $scriptpath/$resultsfile" + +exit $exitcode