diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 5faffabf..8faa9907 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -11,26 +11,27 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Cache (rust) uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true - if: matrix.os != 'macos-latest' # Cache causes errors on macOS with: path: | - ~/.cargo/registry - ~/.cargo/git + ~/.cargo target key: ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}- - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Build pdu run: | @@ -68,18 +69,17 @@ jobs: - name: Cache (pnpm) uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true with: path: ~/.pnpm-store/v3 - key: pnpm-${{ matrix.vars.id }}-${{ hashFiles('**/pnpm-lock.yaml') }} + key: pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | - pnpm-${{ matrix.vars.id }}-${{ hashFiles('**/pnpm-lock.yaml') }} - pnpm-${{ matrix.vars.id }}- + pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} pnpm- - name: Setup pnpm - uses: pnpm/action-setup@v4.0.0 + uses: pnpm/action-setup@v4.1.0 with: version: '7.9.0' run_install: 'true' diff --git a/.github/workflows/clippy.yaml b/.github/workflows/clippy.yaml index f8b0bff8..50b51276 100644 --- a/.github/workflows/clippy.yaml +++ b/.github/workflows/clippy.yaml @@ -18,26 +18,28 @@ jobs: - macos-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Cache uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true if: matrix.os != 'macos-latest' # Cache causes errors on macOS with: path: | - ~/.cargo/registry - ~/.cargo/git + ~/.cargo target key: ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}- - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) --component clippy -y - name: Use clippy to lint code (dev) env: diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 750de635..58de85ef 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -20,13 +20,17 @@ jobs: - macos-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy, rustfmt + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Test (dev) + shell: bash env: FMT: 'true' LINT: 'true' @@ -38,6 +42,7 @@ jobs: run: ./test.sh - name: Test (release) + shell: bash env: FMT: 'true' LINT: 'true' @@ -61,14 +66,18 @@ jobs: - x86_64-unknown-linux-musl steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Install compilation target run: rustup target add ${{ matrix.target }} - - name: Build run: cargo build --target ${{ matrix.target }} --release --all-features @@ -93,9 +102,14 @@ jobs: - x86_64-apple-darwin steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Install compilation target run: rustup target add ${{ matrix.target }} @@ -125,9 +139,14 @@ jobs: - x86_64-pc-windows-msvc steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Install compilation target run: rustup target add ${{ matrix.target }} @@ -161,7 +180,7 @@ jobs: release_tag: ${{ steps.release_type.outputs.release_tag }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install APT packages run: sudo apt install -y python3 python3-toml @@ -194,7 +213,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Upload Tab-Completion file for Bash uses: actions/upload-release-asset@v1.0.2 @@ -271,10 +290,10 @@ jobs: - x86_64-apple-darwin steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Download artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: name: pdu-${{ matrix.target }} @@ -315,10 +334,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Download all artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: path: ./downloads @@ -376,9 +395,14 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Login run: cargo login ${{ secrets.CRATE_AUTH_TOKEN }} @@ -406,26 +430,28 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Cache (rust) uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true if: matrix.os != 'macos-latest' # Cache causes errors on macOS with: path: | - ~/.cargo/registry - ~/.cargo/git + ~/.cargo target key: ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} ${{ github.job }}-Linux-${{ hashFiles('rust-toolchain') }}- - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Build pdu run: | @@ -435,7 +461,7 @@ jobs: - name: Install dust env: REPO: https://github.com/bootandy/dust - VERSION: '1.1.1' + VERSION: '1.2.1' run: | mkdir -p DUST.tmp archive_name="dust-v${VERSION}-x86_64-unknown-linux-gnu" @@ -463,7 +489,7 @@ jobs: - name: Install dua env: REPO: https://github.com/Byron/dua-cli - VERSION: '2.29.4' + VERSION: '2.30.1' run: | mkdir -p DUA.tmp archive_name="dua-v${VERSION}-x86_64-unknown-linux-musl" @@ -478,7 +504,7 @@ jobs: - name: Install gdu env: REPO: https://github.com/dundee/gdu - VERSION: '5.29.0' + VERSION: '5.31.0' run: | mkdir -p GDU.tmp curl -L "${REPO}/releases/download/v${VERSION}/gdu_linux_amd64.tgz" > tmp.gdu.tar.gz @@ -534,7 +560,7 @@ jobs: - name: Cache (pnpm) uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true with: path: ~/.pnpm-store/v3 @@ -545,7 +571,7 @@ jobs: pnpm- - name: Setup pnpm - uses: pnpm/action-setup@v4.0.0 + uses: pnpm/action-setup@v4.1.0 with: version: '7.9.0' run_install: 'true' diff --git a/.github/workflows/fmt.yaml b/.github/workflows/fmt.yaml index 19335436..9f5e4a63 100644 --- a/.github/workflows/fmt.yaml +++ b/.github/workflows/fmt.yaml @@ -11,11 +11,14 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - - uses: dtolnay/rust-toolchain@stable - with: - components: rustfmt + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) --component rustfmt -y - name: Check code formatting run: cargo fmt -- --check diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index db61efa1..e6829096 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,26 +19,31 @@ jobs: - macos-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Cache uses: actions/cache@v4 - timeout-minutes: 1 + timeout-minutes: 2 continue-on-error: true if: matrix.os != 'macos-latest' # Cache causes errors on macOS with: path: | - ~/.cargo/registry - ~/.cargo/git + ~/.cargo target key: ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}-${{ hashFiles('**/Cargo.lock') }} ${{ github.job }}-${{ runner.os }}-${{ hashFiles('rust-toolchain') }}- - - uses: dtolnay/rust-toolchain@stable + - name: Install Rust + shell: bash + run: | + installer=$(mktemp -d)/install-rustup + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer + bash $installer --default-toolchain $(cat rust-toolchain) -y - name: Test (dev) + shell: bash env: FMT: 'false' LINT: 'false' @@ -50,6 +55,7 @@ jobs: run: ./test.sh - name: Test (release) + shell: bash env: RUSTFLAGS: '-D warnings' RUSTDOCFLAGS: '-D warnings' diff --git a/.gitignore b/.gitignore index 0d50de6e..153806f6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ tmp.* .vscode *.sublime-* node_modules +/perf.data +/perf.data.* +flamegraph.svg diff --git a/Cargo.lock b/Cargo.lock index f3a1407b..a4cc95db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,66 +1,74 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "anstream" -version = "0.6.11" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.3" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.2" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "once_cell_polyfill", + "windows-sys 0.59.0", ] [[package]] name = "assert-cmp" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "737bf4aa6df38f69a17efc233b4d0343cc5aa0d2c3b53e7007bd4c9866038ffd" +checksum = "73a4ebf3897cf94a964581808aa2aba1cedf55ac6df387c2db0d90acffc5bf48" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" -version = "2.4.2" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "build-fs-tree" @@ -77,15 +85,15 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "clap" -version = "4.5.20" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" dependencies = [ "clap_builder", "clap_derive", @@ -105,48 +113,48 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim 0.11.0", + "strsim 0.11.1", ] [[package]] name = "clap_complete" -version = "4.5.36" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86bc73de94bc81e52f3bebec71bc4463e9748f7a59166663e32044669577b0e2" +checksum = "a5abde44486daf70c5be8b8f8f1b66c49f86236edf6fa2abadb4d961c4c6229a" dependencies = [ "clap", ] [[package]] name = "clap_derive" -version = "4.5.18" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", ] [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "command-extra" @@ -162,24 +170,18 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "convert_case" -version = "0.6.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" dependencies = [ "unicode-segmentation", ] -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -196,9 +198,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "darling" @@ -206,8 +208,18 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f2c43f534ea4b0b049015d00269734195e6d3f0f6635cb692251aca6f9f8b3c" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.12.4", + "darling_macro 0.12.4", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", ] [[package]] @@ -224,17 +236,56 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.103", +] + [[package]] name = "darling_macro" version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29b5acf0dea37a7f66f7b25d2c5e93fd46f8f6968b1a5d7a3e02e97768afc95a" dependencies = [ - "darling_core", + "darling_core 0.12.4", "quote", "syn 1.0.109", ] +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.103", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "derive_builder" version = "0.10.2" @@ -250,7 +301,7 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66e616858f6187ed828df7c64a6d71720d83767a7f19740b2d1b6fe6327b36e5" dependencies = [ - "darling", + "darling 0.12.4", "proc-macro2", "quote", "syn 1.0.109", @@ -268,15 +319,15 @@ dependencies = [ [[package]] name = "derive_more" -version = "0.99.17" +version = "0.99.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" dependencies = [ "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", - "syn 1.0.109", + "syn 2.0.103", ] [[package]] @@ -285,7 +336,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" dependencies = [ - "derive_more-impl", + "derive_more-impl 1.0.0", +] + +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl 2.0.1", ] [[package]] @@ -294,13 +354,37 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ - "convert_case 0.6.0", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", + "unicode-xid", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "convert_case 0.7.1", + "proc-macro2", + "quote", + "syn 2.0.103", "unicode-xid", ] +[[package]] +name = "derive_setters" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae5c625eda104c228c06ecaf988d1c60e542176bd7a490e60eeda3493244c0c9" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.103", +] + [[package]] name = "diff" version = "0.1.13" @@ -309,24 +393,24 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "either" -version = "1.10.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -335,7 +419,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0b9289d76691c7084d8830f1d0a29ddefbad768f8b5f276e012840bb0fca610" dependencies = [ - "derive_more 0.99.17", + "derive_more 0.99.20", "itertools 0.10.5", ] @@ -347,20 +431,27 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "getrandom" -version = "0.2.12" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", + "r-efi", "wasi", ] [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] name = "heck" @@ -376,14 +467,26 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" -version = "2.2.2" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.4", ] +[[package]] +name = "into-sorted" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01b0686852f0dc0a48a830e141633c8a411078dbe98c7a817d2f42ec4c2e936" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -395,30 +498,40 @@ dependencies = [ [[package]] name = "itertools" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "lock_api" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] [[package]] name = "maplit" @@ -428,9 +541,15 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "normalize-path" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5438dd2b2ff4c6df6e1ce22d825ed2fa93ee2922235cc45186991717f0a892d" [[package]] name = "ntapi" @@ -441,9 +560,40 @@ dependencies = [ "winapi", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "parallel-disk-usage" -version = "0.11.0" +version = "0.21.0" dependencies = [ "assert-cmp", "build-fs-tree", @@ -451,10 +601,14 @@ dependencies = [ "clap-utilities", "clap_complete", "command-extra", - "derive_more 1.0.0", + "dashmap", + "derive_more 2.0.1", + "derive_setters", "fmt-iter", - "itertools 0.13.0", + "into-sorted", + "itertools 0.14.0", "maplit", + "normalize-path", "pipe-trait", "pretty_assertions", "rand", @@ -469,6 +623,19 @@ dependencies = [ "zero-copy-pads", ] +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "pipe-trait" version = "0.4.0" @@ -477,9 +644,12 @@ checksum = "c1be1ec9e59f0360aefe84efa6f699198b685ab0d5718081e9f72aa2344289e2" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] [[package]] name = "pretty_assertions" @@ -493,38 +663,43 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.35" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rand" -version = "0.8.5" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" -version = "0.3.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", @@ -532,9 +707,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.4" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom", ] @@ -559,71 +734,86 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +dependencies = [ + "bitflags", +] + [[package]] name = "rounded-div" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464c8fb0a126d6a0326baf6abf1aa62c2da0d5780aa781a81451d64f543f5e2f" +checksum = "e162464649a7f0550e372a6f7b4d0cae6b74163a2e2e8b2ed6517909445b4b90" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.31" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.21" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "serde" -version = "1.0.214" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", ] [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" dependencies = [ "itoa", "memchr", @@ -644,6 +834,12 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "smart-default" version = "0.7.1" @@ -652,7 +848,7 @@ checksum = "0eb01866308440fc64d6c44d9e86c5cc17adfe33c4d6eed55da9145044d0ffc1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", ] [[package]] @@ -663,9 +859,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" @@ -680,9 +876,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.86" +version = "2.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" dependencies = [ "proc-macro2", "quote", @@ -691,23 +887,23 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.32.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b5ae3f4f7d64646c46c4cae4e3f01d1c5d255c7406fdd7c7f999a94e488791" +checksum = "252800745060e7b9ffb7b2badbd8b31cfa4aa2e61af879d0a3bf2a317c20217d" dependencies = [ - "core-foundation-sys", "libc", "memchr", "ntapi", - "rayon", + "objc2-core-foundation", + "objc2-io-kit", "windows", ] [[package]] name = "terminal_size" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" +checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ "rustix", "windows-sys 0.59.0", @@ -727,29 +923,29 @@ checksum = "f1ecd6317fa97541e6cfcdf79c1917ccb8f7a10ec9ece27aaadf5b2b6ccad3e8" [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", ] [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" @@ -759,9 +955,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-xid" @@ -777,15 +973,18 @@ checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] [[package]] name = "winapi" @@ -811,64 +1010,104 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.57.0" +version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-link", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ "windows-core", - "windows-targets", ] [[package]] name = "windows-core" -version = "0.57.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", + "windows-link", "windows-result", - "windows-targets", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", ] [[package]] name = "windows-implement" -version = "0.57.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", ] [[package]] name = "windows-interface" -version = "0.57.0" +version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.103", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core", + "windows-link", ] [[package]] name = "windows-result" -version = "0.1.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-targets", + "windows-link", ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "windows-strings" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-targets", + "windows-link", ] [[package]] @@ -877,7 +1116,16 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", ] [[package]] @@ -886,14 +1134,39 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link", ] [[package]] @@ -902,48 +1175,105 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "yansi" version = "1.0.1" @@ -957,7 +1287,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5649a5dce1370c707880332f781f6566883736a41861a5749890f4671d5746b6" dependencies = [ "derive_builder", - "derive_more 0.99.17", + "derive_more 0.99.20", "fmt-iter", "unicode-width", ] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.103", +] diff --git a/Cargo.toml b/Cargo.toml index 8d0712d1..c0c8ea66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "parallel-disk-usage" description = "Highly parallelized, blazing fast directory tree analyzer" -version = "0.11.0" +version = "0.21.0" authors = ["khai96_ "] edition = "2021" readme = "README.md" @@ -49,27 +49,31 @@ cli = ["clap/derive", "clap_complete", "clap-utilities", "json"] cli-completions = ["cli"] [dependencies] -pipe-trait = "^0.4.0" -smart-default = "^0.7.1" -derive_more = { version = "^1.0.0", features = ["full"] } -rayon = "^1.10.0" -text-block-macros = "^0.2.0" -rounded-div = "^0.1.2" -fmt-iter = "^0.2.1" -itertools = "^0.13.0" -assert-cmp = "^0.2.1" -zero-copy-pads = "^0.2.0" -terminal_size = "^0.4.0" -clap = { version = "^4.3.8", optional = true } -clap_complete = { version = "^4.5.36", optional = true } -clap-utilities = { version = "^0.2.0", optional = true } -serde = { version = "^1.0.214", optional = true } -serde_json = { version = "^1.0.132", optional = true } -sysinfo = "0.32.0" +assert-cmp = "0.3.0" +clap = { version = "4.5.42", optional = true } +clap_complete = { version = "4.5.55", optional = true } +clap-utilities = { version = "0.2.0", optional = true } +dashmap = "6.1.0" +derive_more = { version = "2.0.1", features = ["full"] } +derive_setters = "0.1.8" +fmt-iter = "0.2.1" +into-sorted = "0.0.3" +itertools = "0.14.0" +pipe-trait = "0.4.0" +rayon = "1.10.0" +rounded-div = "0.1.2" +serde = { version = "1.0.219", optional = true } +serde_json = { version = "1.0.142", optional = true } +smart-default = "0.7.1" +sysinfo = "0.36.1" +terminal_size = "0.4.0" +text-block-macros = "0.2.0" +zero-copy-pads = "0.2.0" [dev-dependencies] -build-fs-tree = "^0.7.1" -command-extra = "^1.0.0" -maplit = "^1.0.2" -pretty_assertions = "^1.4.1" -rand = "^0.8.5" +build-fs-tree = "0.7.1" +command-extra = "1.0.0" +maplit = "1.0.2" +normalize-path = "0.2.1" +pretty_assertions = "1.4.1" +rand = "0.9.2" diff --git a/README.md b/README.md index 5a4ea036..891accc5 100644 --- a/README.md +++ b/README.md @@ -14,28 +14,29 @@ Highly parallelized, blazing fast directory tree analyzer. ## Benchmark -The benchmark was generated by [a GitHub Workflow](https://github.com/KSXGitHub/parallel-disk-usage/blob/0.5.2/.github/workflows/deploy.yaml#L476-L658) and uploaded to the release page. +The benchmark was generated by [a GitHub Workflow](https://github.com/KSXGitHub/parallel-disk-usage/blob/0.20.0/.github/workflows/deploy.yaml#L431-L601) and uploaded to the release page.
Programs -* `pdu` v0.8.1 -* [`dust`](https://github.com/bootandy/dust) v0.8.1 -* [`dua`](https://github.com/Byron/dua-cli) v2.19.2 +* `pdu` v0.20.0 +* [`dust`](https://github.com/bootandy/dust) v1.2.1 +* [`dua`](https://github.com/Byron/dua-cli) v2.30.1 * [`ncdu`](https://dev.yorhel.nl/ncdu) -* [`gdu`](https://github.com/dundee/gdu) v5.15.0 +* [`gdu`](https://github.com/dundee/gdu) v5.31.0 * `du`
- + +
benchmark results (lower is better)
-[_(See more)_](https://github.com/KSXGitHub/parallel-disk-usage-0.10.0-benchmarks/blob/master/tmp.benchmark-report.CHARTS.md) +[_(See more)_](https://github.com/KSXGitHub/parallel-disk-usage-0.20.0-benchmarks/blob/master/tmp.benchmark-report.CHARTS.md) ## Demo @@ -47,16 +48,18 @@ The benchmark was generated by [a GitHub Workflow](https://github.com/KSXGitHub/ ## Features -* Fast. +* Very fast. * Relative comparison of separate files. * Extensible via the library crate or JSON interface. -* Optional progress report. +* Unbiased regarding hardlinks: All hardlinks are treated as equally real. +* Optional hardlink detection and deduplication (would make `pdu` proportionally slower). +* Optional progress report (would make `pdu` slightly slower). * Customize tree depth. * Customize chart size. ## Limitations -* Ignorant of hard links: All hard links are counted as real files. +* Ignorant of reflinks (from COW filesystems such as BTRFS and ZFS). * Do not follow symbolic links. * Do not differentiate filesystem: Mounted folders are counted as normal folders. * The runtime is optimized at the expense of binary size. @@ -164,14 +167,6 @@ Follow the [installation instruction](https://github.com/KSXGitHub/pacman-repo#i sudo pacman -S parallel-disk-usage ``` --> -#### From [Arch Linux CN Repository](https://github.com/archlinuxcn/repo) - -Follow the [installation instruction](https://github.com/archlinuxcn/repo#usage) then run the following command: - -```sh -sudo pacman -S parallel-disk-usage -``` - ## Distributions [![Packaging Status](https://repology.org/badge/vertical-allrepos/parallel-disk-usage.svg)](https://repology.org/project/parallel-disk-usage/versions) diff --git a/ci/github-actions/benchmark/matrix.ts b/ci/github-actions/benchmark/matrix.ts index 8ec028e9..41059394 100644 --- a/ci/github-actions/benchmark/matrix.ts +++ b/ci/github-actions/benchmark/matrix.ts @@ -39,7 +39,7 @@ export function parseSelfBenchmarkCategory(category: SelfBenchmarkCategory) { } export const RELEASED_PDU_VERSIONS = [ - '0.10.0', + '0.20.0', ] as const export const ACCEPTABLE_PERFORMANCE_REGRESSION = 1.1 // 10% @@ -93,7 +93,7 @@ export const COMPETING_BENCHMARK_MATRIX: readonly CompetingBenchmarkCategory[] = id: 'apparent-size', pduCliArgs: ['--quantity=apparent-size'], competitors: [ - ['dust', '--apparent-size'], + ['dust', '--no-progress', '--apparent-size'], ['dua', '--count-hard-links', '--apparent-size'], ['ncdu', '-o', '/dev/stdout', '-0'], ['gdu', '--show-apparent-size', '--non-interactive', '--no-progress'], @@ -104,18 +104,29 @@ export const COMPETING_BENCHMARK_MATRIX: readonly CompetingBenchmarkCategory[] = id: 'block-size', pduCliArgs: ['--quantity=block-size'], competitors: [ - ['dust'], + ['dust', '--no-progress'], ['dua', '--count-hard-links'], ['ncdu', '-o', '/dev/stdout', '-0'], ['gdu', '--non-interactive', '--no-progress'], ['du', '--count-links'], ], }, + { + id: 'deduplicate-hardlinks', + pduCliArgs: ['--deduplicate-hardlinks'], + competitors: [ + ['dust', '--no-progress'], + ['dua'], + ['ncdu', '-o', '/dev/stdout', '-0'], + ['gdu', '--non-interactive', '--no-progress'], + ['du'], + ], + }, { id: 'top-down', pduCliArgs: ['--top-down'], competitors: [ - ['dust', '--reverse'], + ['dust', '--no-progress', '--reverse'], ], }, { @@ -160,6 +171,7 @@ export const COMPETING_BENCHMARK_MATRIX: readonly CompetingBenchmarkCategory[] = id: 'progress', pduCliArgs: ['--progress'], competitors: [ + ['dust'], ['ncdu', '-o', '/dev/stdout', '-1'], ['gdu', '--non-interactive'], ], diff --git a/exports/completion.bash b/exports/completion.bash index 39c2935b..8b06a03b 100644 --- a/exports/completion.bash +++ b/exports/completion.bash @@ -1,12 +1,16 @@ _pdu() { local i cur prev opts cmd COMPREPLY=() - cur="${COMP_WORDS[COMP_CWORD]}" - prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ "${BASH_VERSINFO[0]}" -ge 4 ]]; then + cur="$2" + else + cur="${COMP_WORDS[COMP_CWORD]}" + fi + prev="$3" cmd="" opts="" - for i in ${COMP_WORDS[@]} + for i in "${COMP_WORDS[@]:0:COMP_CWORD}" do case "${cmd},${i}" in ",$1") @@ -19,7 +23,7 @@ _pdu() { case "${cmd}" in pdu) - opts="-h -V --json-input --json-output --bytes-format --top-down --align-right --quantity --max-depth --total-width --column-width --min-ratio --no-sort --silent-errors --progress --threads --help --version [FILES]..." + opts="-b -H -q -d -w -m -s -p -h -V --json-input --json-output --bytes-format --detect-links --dedupe-links --deduplicate-hardlinks --top-down --align-right --quantity --depth --max-depth --width --total-width --column-width --min-ratio --no-sort --no-errors --silent-errors --progress --threads --omit-json-shared-details --omit-json-shared-summary --help --version [FILES]..." if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 @@ -29,18 +33,42 @@ _pdu() { COMPREPLY=($(compgen -W "plain metric binary" -- "${cur}")) return 0 ;; + -b) + COMPREPLY=($(compgen -W "plain metric binary" -- "${cur}")) + return 0 + ;; --quantity) COMPREPLY=($(compgen -W "apparent-size block-size block-count" -- "${cur}")) return 0 ;; + -q) + COMPREPLY=($(compgen -W "apparent-size block-size block-count" -- "${cur}")) + return 0 + ;; --max-depth) COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + --depth) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; + -d) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --total-width) COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + --width) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; + -w) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --column-width) COMPREPLY=($(compgen -f "${cur}")) return 0 @@ -49,6 +77,10 @@ _pdu() { COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + -m) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --threads) COMPREPLY=($(compgen -f "${cur}")) return 0 diff --git a/exports/completion.elv b/exports/completion.elv index 9655ea8a..d3cda52c 100644 --- a/exports/completion.elv +++ b/exports/completion.elv @@ -18,20 +18,36 @@ set edit:completion:arg-completer[pdu] = {|@words| } var completions = [ &'pdu'= { + cand -b 'How to display the numbers of bytes' cand --bytes-format 'How to display the numbers of bytes' + cand -q 'Aspect of the files/directories to be measured' cand --quantity 'Aspect of the files/directories to be measured' - cand --max-depth 'Maximum depth to display the data (must be greater than 0)' + cand -d 'Maximum depth to display the data. Could be either "inf" or a positive integer' + cand --max-depth 'Maximum depth to display the data. Could be either "inf" or a positive integer' + cand --depth 'Maximum depth to display the data. Could be either "inf" or a positive integer' + cand -w 'Width of the visualization' cand --total-width 'Width of the visualization' + cand --width 'Width of the visualization' cand --column-width 'Maximum widths of the tree column and width of the bar column' + cand -m 'Minimal size proportion required to appear' cand --min-ratio 'Minimal size proportion required to appear' - cand --threads 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number' + cand --threads 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer' cand --json-input 'Read JSON data from stdin' cand --json-output 'Print JSON data instead of an ASCII chart' + cand -H 'Detect and subtract the sizes of hardlinks from their parent directory totals' + cand --deduplicate-hardlinks 'Detect and subtract the sizes of hardlinks from their parent directory totals' + cand --detect-links 'Detect and subtract the sizes of hardlinks from their parent directory totals' + cand --dedupe-links 'Detect and subtract the sizes of hardlinks from their parent directory totals' cand --top-down 'Print the tree top-down instead of bottom-up' cand --align-right 'Set the root of the bars to the right' - cand --no-sort 'Preserve order of entries' + cand --no-sort 'Do not sort the branches in the tree' + cand -s 'Prevent filesystem error messages from appearing in stderr' cand --silent-errors 'Prevent filesystem error messages from appearing in stderr' + cand --no-errors 'Prevent filesystem error messages from appearing in stderr' + cand -p 'Report progress being made at the expense of performance' cand --progress 'Report progress being made at the expense of performance' + cand --omit-json-shared-details 'Do not output `.shared.details` in the JSON output' + cand --omit-json-shared-summary 'Do not output `.shared.summary` in the JSON output' cand -h 'Print help (see more with ''--help'')' cand --help 'Print help (see more with ''--help'')' cand -V 'Print version' diff --git a/exports/completion.fish b/exports/completion.fish index 9532ca3d..41cc6448 100644 --- a/exports/completion.fish +++ b/exports/completion.fish @@ -1,16 +1,23 @@ -complete -c pdu -l bytes-format -d 'How to display the numbers of bytes' -r -f -a "{plain\t'Display plain number of bytes without units',metric\t'Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on',binary\t'Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on'}" -complete -c pdu -l quantity -d 'Aspect of the files/directories to be measured' -r -f -a "{apparent-size\t'Measure apparent sizes',block-size\t'Measure block sizes (block-count * 512B)',block-count\t'Count numbers of blocks'}" -complete -c pdu -l max-depth -d 'Maximum depth to display the data (must be greater than 0)' -r -complete -c pdu -l total-width -d 'Width of the visualization' -r +complete -c pdu -s b -l bytes-format -d 'How to display the numbers of bytes' -r -f -a "plain\t'Display plain number of bytes without units' +metric\t'Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on' +binary\t'Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on'" +complete -c pdu -s q -l quantity -d 'Aspect of the files/directories to be measured' -r -f -a "apparent-size\t'Measure apparent sizes' +block-size\t'Measure block sizes (block-count * 512B)' +block-count\t'Count numbers of blocks'" +complete -c pdu -s d -l max-depth -l depth -d 'Maximum depth to display the data. Could be either "inf" or a positive integer' -r +complete -c pdu -s w -l total-width -l width -d 'Width of the visualization' -r complete -c pdu -l column-width -d 'Maximum widths of the tree column and width of the bar column' -r -complete -c pdu -l min-ratio -d 'Minimal size proportion required to appear' -r -complete -c pdu -l threads -d 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number' -r +complete -c pdu -s m -l min-ratio -d 'Minimal size proportion required to appear' -r +complete -c pdu -l threads -d 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer' -r complete -c pdu -l json-input -d 'Read JSON data from stdin' complete -c pdu -l json-output -d 'Print JSON data instead of an ASCII chart' +complete -c pdu -s H -l deduplicate-hardlinks -l detect-links -l dedupe-links -d 'Detect and subtract the sizes of hardlinks from their parent directory totals' complete -c pdu -l top-down -d 'Print the tree top-down instead of bottom-up' complete -c pdu -l align-right -d 'Set the root of the bars to the right' -complete -c pdu -l no-sort -d 'Preserve order of entries' -complete -c pdu -l silent-errors -d 'Prevent filesystem error messages from appearing in stderr' -complete -c pdu -l progress -d 'Report progress being made at the expense of performance' +complete -c pdu -l no-sort -d 'Do not sort the branches in the tree' +complete -c pdu -s s -l silent-errors -l no-errors -d 'Prevent filesystem error messages from appearing in stderr' +complete -c pdu -s p -l progress -d 'Report progress being made at the expense of performance' +complete -c pdu -l omit-json-shared-details -d 'Do not output `.shared.details` in the JSON output' +complete -c pdu -l omit-json-shared-summary -d 'Do not output `.shared.summary` in the JSON output' complete -c pdu -s h -l help -d 'Print help (see more with \'--help\')' complete -c pdu -s V -l version -d 'Print version' diff --git a/exports/completion.ps1 b/exports/completion.ps1 index ceb23c68..8814bf76 100644 --- a/exports/completion.ps1 +++ b/exports/completion.ps1 @@ -21,20 +21,36 @@ Register-ArgumentCompleter -Native -CommandName 'pdu' -ScriptBlock { $completions = @(switch ($command) { 'pdu' { + [CompletionResult]::new('-b', '-b', [CompletionResultType]::ParameterName, 'How to display the numbers of bytes') [CompletionResult]::new('--bytes-format', '--bytes-format', [CompletionResultType]::ParameterName, 'How to display the numbers of bytes') + [CompletionResult]::new('-q', '-q', [CompletionResultType]::ParameterName, 'Aspect of the files/directories to be measured') [CompletionResult]::new('--quantity', '--quantity', [CompletionResultType]::ParameterName, 'Aspect of the files/directories to be measured') - [CompletionResult]::new('--max-depth', '--max-depth', [CompletionResultType]::ParameterName, 'Maximum depth to display the data (must be greater than 0)') + [CompletionResult]::new('-d', '-d', [CompletionResultType]::ParameterName, 'Maximum depth to display the data. Could be either "inf" or a positive integer') + [CompletionResult]::new('--max-depth', '--max-depth', [CompletionResultType]::ParameterName, 'Maximum depth to display the data. Could be either "inf" or a positive integer') + [CompletionResult]::new('--depth', '--depth', [CompletionResultType]::ParameterName, 'Maximum depth to display the data. Could be either "inf" or a positive integer') + [CompletionResult]::new('-w', '-w', [CompletionResultType]::ParameterName, 'Width of the visualization') [CompletionResult]::new('--total-width', '--total-width', [CompletionResultType]::ParameterName, 'Width of the visualization') + [CompletionResult]::new('--width', '--width', [CompletionResultType]::ParameterName, 'Width of the visualization') [CompletionResult]::new('--column-width', '--column-width', [CompletionResultType]::ParameterName, 'Maximum widths of the tree column and width of the bar column') + [CompletionResult]::new('-m', '-m', [CompletionResultType]::ParameterName, 'Minimal size proportion required to appear') [CompletionResult]::new('--min-ratio', '--min-ratio', [CompletionResultType]::ParameterName, 'Minimal size proportion required to appear') - [CompletionResult]::new('--threads', '--threads', [CompletionResultType]::ParameterName, 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number') + [CompletionResult]::new('--threads', '--threads', [CompletionResultType]::ParameterName, 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer') [CompletionResult]::new('--json-input', '--json-input', [CompletionResultType]::ParameterName, 'Read JSON data from stdin') [CompletionResult]::new('--json-output', '--json-output', [CompletionResultType]::ParameterName, 'Print JSON data instead of an ASCII chart') + [CompletionResult]::new('-H', '-H ', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') + [CompletionResult]::new('--deduplicate-hardlinks', '--deduplicate-hardlinks', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') + [CompletionResult]::new('--detect-links', '--detect-links', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') + [CompletionResult]::new('--dedupe-links', '--dedupe-links', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') [CompletionResult]::new('--top-down', '--top-down', [CompletionResultType]::ParameterName, 'Print the tree top-down instead of bottom-up') [CompletionResult]::new('--align-right', '--align-right', [CompletionResultType]::ParameterName, 'Set the root of the bars to the right') - [CompletionResult]::new('--no-sort', '--no-sort', [CompletionResultType]::ParameterName, 'Preserve order of entries') + [CompletionResult]::new('--no-sort', '--no-sort', [CompletionResultType]::ParameterName, 'Do not sort the branches in the tree') + [CompletionResult]::new('-s', '-s', [CompletionResultType]::ParameterName, 'Prevent filesystem error messages from appearing in stderr') [CompletionResult]::new('--silent-errors', '--silent-errors', [CompletionResultType]::ParameterName, 'Prevent filesystem error messages from appearing in stderr') + [CompletionResult]::new('--no-errors', '--no-errors', [CompletionResultType]::ParameterName, 'Prevent filesystem error messages from appearing in stderr') + [CompletionResult]::new('-p', '-p', [CompletionResultType]::ParameterName, 'Report progress being made at the expense of performance') [CompletionResult]::new('--progress', '--progress', [CompletionResultType]::ParameterName, 'Report progress being made at the expense of performance') + [CompletionResult]::new('--omit-json-shared-details', '--omit-json-shared-details', [CompletionResultType]::ParameterName, 'Do not output `.shared.details` in the JSON output') + [CompletionResult]::new('--omit-json-shared-summary', '--omit-json-shared-summary', [CompletionResultType]::ParameterName, 'Do not output `.shared.summary` in the JSON output') [CompletionResult]::new('-h', '-h', [CompletionResultType]::ParameterName, 'Print help (see more with ''--help'')') [CompletionResult]::new('--help', '--help', [CompletionResultType]::ParameterName, 'Print help (see more with ''--help'')') [CompletionResult]::new('-V', '-V ', [CompletionResultType]::ParameterName, 'Print version') diff --git a/exports/completion.zsh b/exports/completion.zsh index cd296575..dec1cef4 100644 --- a/exports/completion.zsh +++ b/exports/completion.zsh @@ -15,24 +15,44 @@ _pdu() { local context curcontext="$curcontext" state line _arguments "${_arguments_options[@]}" : \ +'-b+[How to display the numbers of bytes]:BYTES_FORMAT:((plain\:"Display plain number of bytes without units" +metric\:"Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on" +binary\:"Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on"))' \ '--bytes-format=[How to display the numbers of bytes]:BYTES_FORMAT:((plain\:"Display plain number of bytes without units" metric\:"Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on" binary\:"Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on"))' \ +'-q+[Aspect of the files/directories to be measured]:QUANTITY:((apparent-size\:"Measure apparent sizes" +block-size\:"Measure block sizes (block-count * 512B)" +block-count\:"Count numbers of blocks"))' \ '--quantity=[Aspect of the files/directories to be measured]:QUANTITY:((apparent-size\:"Measure apparent sizes" block-size\:"Measure block sizes (block-count * 512B)" block-count\:"Count numbers of blocks"))' \ -'--max-depth=[Maximum depth to display the data (must be greater than 0)]:MAX_DEPTH:_default' \ +'-d+[Maximum depth to display the data. Could be either "inf" or a positive integer]:MAX_DEPTH:_default' \ +'--max-depth=[Maximum depth to display the data. Could be either "inf" or a positive integer]:MAX_DEPTH:_default' \ +'--depth=[Maximum depth to display the data. Could be either "inf" or a positive integer]:MAX_DEPTH:_default' \ +'(--column-width)-w+[Width of the visualization]:TOTAL_WIDTH:_default' \ '(--column-width)--total-width=[Width of the visualization]:TOTAL_WIDTH:_default' \ +'(--column-width)--width=[Width of the visualization]:TOTAL_WIDTH:_default' \ '*--column-width=[Maximum widths of the tree column and width of the bar column]:TREE_WIDTH:_default:TREE_WIDTH:_default' \ +'-m+[Minimal size proportion required to appear]:MIN_RATIO:_default' \ '--min-ratio=[Minimal size proportion required to appear]:MIN_RATIO:_default' \ -'--threads=[Set the maximum number of threads to spawn. Could be either "auto", "max", or a number]:THREADS:_default' \ -'(--quantity)--json-input[Read JSON data from stdin]' \ +'--threads=[Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer]:THREADS:_default' \ +'(-q --quantity -H --deduplicate-hardlinks)--json-input[Read JSON data from stdin]' \ '--json-output[Print JSON data instead of an ASCII chart]' \ +'-H[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ +'--deduplicate-hardlinks[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ +'--detect-links[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ +'--dedupe-links[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ '--top-down[Print the tree top-down instead of bottom-up]' \ '--align-right[Set the root of the bars to the right]' \ -'--no-sort[Preserve order of entries]' \ +'--no-sort[Do not sort the branches in the tree]' \ +'-s[Prevent filesystem error messages from appearing in stderr]' \ '--silent-errors[Prevent filesystem error messages from appearing in stderr]' \ +'--no-errors[Prevent filesystem error messages from appearing in stderr]' \ +'-p[Report progress being made at the expense of performance]' \ '--progress[Report progress being made at the expense of performance]' \ +'--omit-json-shared-details[Do not output \`.shared.details\` in the JSON output]' \ +'--omit-json-shared-summary[Do not output \`.shared.summary\` in the JSON output]' \ '-h[Print help (see more with '\''--help'\'')]' \ '--help[Print help (see more with '\''--help'\'')]' \ '-V[Print version]' \ diff --git a/rust-toolchain b/rust-toolchain index 71fae54f..59be5921 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.82.0 +1.88.0 diff --git a/src/app.rs b/src/app.rs index 1282dd53..7fad5b51 100644 --- a/src/app.rs +++ b/src/app.rs @@ -4,24 +4,24 @@ pub use sub::Sub; use crate::{ args::{Args, Quantity, Threads}, - get_size::GetApparentSize, - json_data::{JsonData, UnitAndTree}, + bytes_format::BytesFormat, + get_size::{GetApparentSize, GetSize}, + hardlink, + json_data::{JsonData, JsonDataBody, JsonShared, JsonTree}, reporter::{ErrorOnlyReporter, ErrorReport, ProgressAndErrorReporter, ProgressReport}, runtime_error::RuntimeError, - size::{self, Bytes}, - visualizer::{BarAlignment, Direction, Visualizer}, + size, + visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, }; use clap::Parser; use hdd::any_path_is_in_hdd; use pipe_trait::Pipe; use std::{io::stdin, time::Duration}; +use sub::JsonOutputParam; use sysinfo::Disks; #[cfg(unix)] -use crate::{ - get_size::{GetBlockCount, GetBlockSize}, - size::Blocks, -}; +use crate::get_size::{GetBlockCount, GetBlockSize}; /// The main application. pub struct App { @@ -38,7 +38,7 @@ impl App { } /// Run the application. - pub fn run(self) -> Result<(), RuntimeError> { + pub fn run(mut self) -> Result<(), RuntimeError> { // DYNAMIC DISPATCH POLICY: // // Errors rarely occur, therefore, using dynamic dispatch to report errors have an acceptable @@ -57,55 +57,95 @@ impl App { bytes_format, top_down, align_right, - max_depth, .. } = self.args; let direction = Direction::from_top_down(top_down); let bar_alignment = BarAlignment::from_align_right(align_right); - let unit_and_tree = stdin() + let body = stdin() .pipe(serde_json::from_reader::<_, JsonData>) .map_err(RuntimeError::DeserializationFailure)? - .unit_and_tree; + .body; - macro_rules! visualize { - ($reflection:expr, $bytes_format: expr) => {{ - let data_tree = $reflection + trait VisualizeJsonTree: size::Size + Into + Send { + fn visualize_json_tree( + tree: JsonTree, + bytes_format: Self::DisplayFormat, + column_width_distribution: ColumnWidthDistribution, + direction: Direction, + bar_alignment: BarAlignment, + ) -> Result { + let JsonTree { tree, shared } = tree; + + let data_tree = tree .par_try_into_tree() .map_err(|error| RuntimeError::InvalidInputReflection(error.to_string()))?; - Visualizer { + let visualizer = Visualizer { data_tree: &data_tree, - bytes_format: $bytes_format, + bytes_format, + column_width_distribution, + direction, + bar_alignment, + }; + + let JsonShared { details, summary } = shared; + let summary = summary.or_else(|| details.map(|details| details.summarize())); + + let visualization = if let Some(summary) = summary { + let summary = summary.display(bytes_format); + // visualizer already ends with "\n" + format!("{visualizer}{summary}\n") + } else { + visualizer.to_string() + }; + + Ok(visualization) + } + } + + impl + Send> VisualizeJsonTree for Size {} + + macro_rules! visualize { + ($tree:expr, $bytes_format:expr) => { + VisualizeJsonTree::visualize_json_tree( + $tree, + $bytes_format, column_width_distribution, direction, bar_alignment, - max_depth, - } - .to_string() - }}; + ) + }; } - let visualization = match unit_and_tree { - UnitAndTree::Bytes(reflection) => visualize!(reflection, bytes_format), - UnitAndTree::Blocks(reflection) => visualize!(reflection, ()), - }; + let visualization = match body { + JsonDataBody::Bytes(tree) => visualize!(tree, bytes_format), + JsonDataBody::Blocks(tree) => visualize!(tree, ()), + }?; print!("{visualization}"); // it already ends with "\n", println! isn't needed here. return Ok(()); } + #[cfg(not(unix))] + if self.args.deduplicate_hardlinks { + return crate::runtime_error::UnsupportedFeature::DeduplicateHardlink + .pipe(RuntimeError::UnsupportedFeature) + .pipe(Err); + } + let threads = match self.args.threads { Threads::Auto => { let disks = Disks::new_with_refreshed_list(); if any_path_is_in_hdd::(&self.args.files, &disks) { eprintln!("warning: HDD detected, the thread limit will be set to 1"); + eprintln!("hint: You can pass --threads=max disable this behavior"); Some(1) } else { None } } Threads::Max => None, - Threads::Fixed(threads) => Some(threads), + Threads::Fixed(threads) => Some(threads.get()), }; if let Some(threads) = threads { @@ -115,47 +155,134 @@ impl App { .unwrap_or_else(|_| eprintln!("warning: Failed to set thread limit to {threads}")); } + if cfg!(unix) && self.args.deduplicate_hardlinks && self.args.files.len() > 1 { + // Hardlinks deduplication doesn't work properly if there are more than 1 paths pointing to + // the same tree or if a path points to a subtree of another path. Therefore, we must find + // and remove such overlapping paths before they cause problem. + use overlapping_arguments::{remove_overlapping_paths, RealApi}; + remove_overlapping_paths::(&mut self.args.files); + } + let report_error = if self.args.silent_errors { ErrorReport::SILENT } else { ErrorReport::TEXT }; - #[allow(clippy::extra_unused_type_parameters)] - fn error_only_reporter( - report_error: fn(ErrorReport), - ) -> ErrorOnlyReporter { - ErrorOnlyReporter::new(report_error) + trait GetSizeUtils: GetSize { + const INSTANCE: Self; + const QUANTITY: Quantity; + fn formatter(bytes_format: BytesFormat) -> ::DisplayFormat; + } + + impl GetSizeUtils for GetApparentSize { + const INSTANCE: Self = GetApparentSize; + const QUANTITY: Quantity = Quantity::ApparentSize; + #[inline] + fn formatter(bytes_format: BytesFormat) -> BytesFormat { + bytes_format + } } - fn progress_and_error_reporter( - report_error: fn(ErrorReport), - ) -> ProgressAndErrorReporter + #[cfg(unix)] + impl GetSizeUtils for GetBlockSize { + const INSTANCE: Self = GetBlockSize; + const QUANTITY: Quantity = Quantity::BlockSize; + #[inline] + fn formatter(bytes_format: BytesFormat) -> BytesFormat { + bytes_format + } + } + + #[cfg(unix)] + impl GetSizeUtils for GetBlockCount { + const INSTANCE: Self = GetBlockCount; + const QUANTITY: Quantity = Quantity::BlockCount; + #[inline] + fn formatter(_: BytesFormat) {} + } + + trait CreateReporter: GetSizeUtils { + type Reporter; + fn create_reporter(report_error: fn(ErrorReport)) -> Self::Reporter; + } + + impl CreateReporter for SizeGetter where - Size: size::Size + Into + Send + Sync, - ProgressReport: Default + 'static, - u64: Into, + Self: GetSizeUtils, { - ProgressAndErrorReporter::new( - ProgressReport::TEXT, - Duration::from_millis(100), - report_error, - ) + type Reporter = ErrorOnlyReporter; + #[inline] + fn create_reporter(report_error: fn(ErrorReport)) -> Self::Reporter { + ErrorOnlyReporter::new(report_error) + } + } + + impl CreateReporter for SizeGetter + where + Self: GetSizeUtils, + Self::Size: Into + Send + Sync, + ProgressReport: Default + 'static, + u64: Into, + { + type Reporter = ProgressAndErrorReporter; + #[inline] + fn create_reporter(report_error: fn(ErrorReport)) -> Self::Reporter { + ProgressAndErrorReporter::new( + ProgressReport::TEXT, + Duration::from_millis(100), + report_error, + ) + } + } + + trait CreateHardlinksHandler: + CreateReporter + { + type HardlinksHandler: hardlink::RecordHardlinks + + sub::HardlinkSubroutines; + fn create_hardlinks_handler() -> Self::HardlinksHandler; + } + + impl CreateHardlinksHandler + for SizeGetter + where + Self: CreateReporter, + Self::Size: Send + Sync, + { + type HardlinksHandler = hardlink::HardlinkIgnorant; + #[inline] + fn create_hardlinks_handler() -> Self::HardlinksHandler { + hardlink::HardlinkIgnorant + } + } + + #[cfg(unix)] + impl CreateHardlinksHandler + for SizeGetter + where + Self: CreateReporter, + Self::Size: Send + Sync + 'static, + Self::Reporter: crate::reporter::Reporter, + { + type HardlinksHandler = hardlink::HardlinkAware; + #[inline] + fn create_hardlinks_handler() -> Self::HardlinksHandler { + hardlink::HardlinkAware::new() + } } macro_rules! run { ($( $(#[$variant_attrs:meta])* - { - $size:ty => $format:expr; - $quantity:ident => $size_getter:ident; - $progress:literal => $create_reporter:ident; - } + $size_getter:ident, $progress:literal, $hardlinks:ident; )*) => { match self.args {$( $(#[$variant_attrs])* Args { - quantity: Quantity::$quantity, + quantity: <$size_getter as GetSizeUtils>::QUANTITY, progress: $progress, + #[cfg(unix)] deduplicate_hardlinks: $hardlinks, + #[cfg(not(unix))] deduplicate_hardlinks: _, files, json_output, bytes_format, @@ -164,15 +291,18 @@ impl App { max_depth, min_ratio, no_sort, + omit_json_shared_details, + omit_json_shared_summary, .. } => Sub { direction: Direction::from_top_down(top_down), bar_alignment: BarAlignment::from_align_right(align_right), - size_getter: $size_getter, - reporter: $create_reporter::<$size>(report_error), - bytes_format: $format(bytes_format), + size_getter: <$size_getter as GetSizeUtils>::INSTANCE, + hardlinks_handler: <$size_getter as CreateHardlinksHandler<{ cfg!(unix) && $hardlinks }, $progress>>::create_hardlinks_handler(), + reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error), + bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format), files, - json_output, + json_output: JsonOutputParam::from_cli_flags(json_output, omit_json_shared_details, omit_json_shared_summary), column_width_distribution, max_depth, min_ratio, @@ -183,48 +313,22 @@ impl App { } run! { - { - Bytes => |x| x; - ApparentSize => GetApparentSize; - false => error_only_reporter; - } - - { - Bytes => |x| x; - ApparentSize => GetApparentSize; - true => progress_and_error_reporter; - } - - #[cfg(unix)] - { - Bytes => |x| x; - BlockSize => GetBlockSize; - false => error_only_reporter; - } - - #[cfg(unix)] - { - Bytes => |x| x; - BlockSize => GetBlockSize; - true => progress_and_error_reporter; - } - - #[cfg(unix)] - { - Blocks => |_| (); - BlockCount => GetBlockCount; - false => error_only_reporter; - } - - #[cfg(unix)] - { - Blocks => |_| (); - BlockCount => GetBlockCount; - true => progress_and_error_reporter; - } + GetApparentSize, false, false; + GetApparentSize, true, false; + #[cfg(unix)] GetBlockSize, false, false; + #[cfg(unix)] GetBlockSize, true, false; + #[cfg(unix)] GetBlockCount, false, false; + #[cfg(unix)] GetBlockCount, true, false; + #[cfg(unix)] GetApparentSize, false, true; + #[cfg(unix)] GetApparentSize, true, true; + #[cfg(unix)] GetBlockSize, false, true; + #[cfg(unix)] GetBlockSize, true, true; + #[cfg(unix)] GetBlockCount, false, true; + #[cfg(unix)] GetBlockCount, true, true; } } } mod hdd; mod mount_point; +mod overlapping_arguments; diff --git a/src/app/hdd.rs b/src/app/hdd.rs index 58f6a844..4d29cf49 100644 --- a/src/app/hdd.rs +++ b/src/app/hdd.rs @@ -19,14 +19,17 @@ pub struct RealApi; impl Api for RealApi { type Disk = Disk; + #[inline] fn get_disk_kind(disk: &Self::Disk) -> DiskKind { disk.kind() } + #[inline] fn get_mount_point(disk: &Self::Disk) -> &Path { disk.mount_point() } + #[inline] fn canonicalize(path: &Path) -> io::Result { canonicalize(path) } @@ -52,101 +55,4 @@ fn path_is_in_hdd(path: &Path, disks: &[Api::Disk]) -> bool { } #[cfg(test)] -mod tests { - use super::{any_path_is_in_hdd, path_is_in_hdd, Api}; - use pipe_trait::Pipe; - use pretty_assertions::assert_eq; - use std::path::{Path, PathBuf}; - use sysinfo::DiskKind; - - /// Fake disk for [`Api`]. - struct Disk { - kind: DiskKind, - mount_point: &'static str, - } - - impl Disk { - fn new(kind: DiskKind, mount_point: &'static str) -> Self { - Self { kind, mount_point } - } - } - - /// Mocked implementation of [`Api`] for testing purposes. - struct MockedApi; - impl Api for MockedApi { - type Disk = Disk; - - fn get_disk_kind(disk: &Self::Disk) -> DiskKind { - disk.kind - } - - fn get_mount_point(disk: &Self::Disk) -> &Path { - Path::new(disk.mount_point) - } - - fn canonicalize(path: &Path) -> std::io::Result { - path.to_path_buf().pipe(Ok) - } - } - - #[test] - fn test_any_path_in_hdd() { - let disks = &[ - Disk::new(DiskKind::SSD, "/"), - Disk::new(DiskKind::HDD, "/home"), - Disk::new(DiskKind::HDD, "/mnt/hdd-data"), - Disk::new(DiskKind::SSD, "/mnt/ssd-data"), - Disk::new(DiskKind::HDD, "/mnt/hdd-data/repo"), - ]; - - let cases: &[(&[&str], bool)] = &[ - (&[], false), - (&["/"], false), - (&["/home"], true), - (&["/mnt"], false), - (&["/mnt/ssd-data"], false), - (&["/mnt/hdd-data"], true), - (&["/mnt/hdd-data/repo"], true), - (&["/etc/fstab"], false), - (&["/home/usr/file"], true), - (&["/home/data/repo/test"], true), - (&["/usr/share"], false), - (&["/mnt/ssd-data/test"], false), - (&["/etc/fstab", "/home/user/file"], true), - (&["/mnt/hdd-data/file", "/mnt/hdd-data/repo/test"], true), - (&["/usr/share", "/mnt/ssd-data/test"], false), - ( - &["/etc/fstab", "/home/user", "/mnt/hdd-data", "/usr/share"], - true, - ), - ]; - - for (paths, in_hdd) in cases { - let paths: Vec<_> = paths.iter().map(PathBuf::from).collect(); - println!("CASE: {paths:?} → {in_hdd:?}"); - assert_eq!(any_path_is_in_hdd::(&paths, disks), *in_hdd); - } - } - - #[test] - fn test_path_in_hdd() { - let disks = &[ - Disk::new(DiskKind::SSD, "/"), - Disk::new(DiskKind::HDD, "/home"), - Disk::new(DiskKind::HDD, "/mnt/hdd-data"), - Disk::new(DiskKind::SSD, "/mnt/ssd-data"), - Disk::new(DiskKind::HDD, "/mnt/hdd-data/repo"), - ]; - - for (path, in_hdd) in [ - ("/etc/fstab", false), - ("/mnt/", false), - ("/mnt/hdd-data/repo/test", true), - ("/mnt/hdd-data/test/test", true), - ("/mnt/ssd-data/test/test", false), - ] { - println!("CASE: {path} → {in_hdd:?}"); - assert_eq!(path_is_in_hdd::(Path::new(path), disks), in_hdd); - } - } -} +mod test; diff --git a/src/app/hdd/test.rs b/src/app/hdd/test.rs new file mode 100644 index 00000000..88d70a4d --- /dev/null +++ b/src/app/hdd/test.rs @@ -0,0 +1,96 @@ +use super::{any_path_is_in_hdd, path_is_in_hdd, Api}; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::path::{Path, PathBuf}; +use sysinfo::DiskKind; + +/// Fake disk for [`Api`]. +struct Disk { + kind: DiskKind, + mount_point: &'static str, +} + +impl Disk { + fn new(kind: DiskKind, mount_point: &'static str) -> Self { + Self { kind, mount_point } + } +} + +/// Mocked implementation of [`Api`] for testing purposes. +struct MockedApi; +impl Api for MockedApi { + type Disk = Disk; + + fn get_disk_kind(disk: &Self::Disk) -> DiskKind { + disk.kind + } + + fn get_mount_point(disk: &Self::Disk) -> &Path { + Path::new(disk.mount_point) + } + + fn canonicalize(path: &Path) -> std::io::Result { + path.to_path_buf().pipe(Ok) + } +} + +#[test] +fn test_any_path_in_hdd() { + let disks = &[ + Disk::new(DiskKind::SSD, "/"), + Disk::new(DiskKind::HDD, "/home"), + Disk::new(DiskKind::HDD, "/mnt/hdd-data"), + Disk::new(DiskKind::SSD, "/mnt/ssd-data"), + Disk::new(DiskKind::HDD, "/mnt/hdd-data/repo"), + ]; + + let cases: &[(&[&str], bool)] = &[ + (&[], false), + (&["/"], false), + (&["/home"], true), + (&["/mnt"], false), + (&["/mnt/ssd-data"], false), + (&["/mnt/hdd-data"], true), + (&["/mnt/hdd-data/repo"], true), + (&["/etc/fstab"], false), + (&["/home/usr/file"], true), + (&["/home/data/repo/test"], true), + (&["/usr/share"], false), + (&["/mnt/ssd-data/test"], false), + (&["/etc/fstab", "/home/user/file"], true), + (&["/mnt/hdd-data/file", "/mnt/hdd-data/repo/test"], true), + (&["/usr/share", "/mnt/ssd-data/test"], false), + ( + &["/etc/fstab", "/home/user", "/mnt/hdd-data", "/usr/share"], + true, + ), + ]; + + for (paths, in_hdd) in cases { + let paths: Vec<_> = paths.iter().map(PathBuf::from).collect(); + println!("CASE: {paths:?} → {in_hdd:?}"); + assert_eq!(any_path_is_in_hdd::(&paths, disks), *in_hdd); + } +} + +#[test] +fn test_path_in_hdd() { + let disks = &[ + Disk::new(DiskKind::SSD, "/"), + Disk::new(DiskKind::HDD, "/home"), + Disk::new(DiskKind::HDD, "/mnt/hdd-data"), + Disk::new(DiskKind::SSD, "/mnt/ssd-data"), + Disk::new(DiskKind::HDD, "/mnt/hdd-data/repo"), + ]; + + for (path, in_hdd) in [ + ("/etc/fstab", false), + ("/mnt/", false), + ("/mnt/hdd-data/repo/test", true), + ("/mnt/hdd-data/test/test", true), + ("/mnt/ssd-data/test/test", false), + ] { + println!("CASE: {path} → {in_hdd:?}"); + assert_eq!(path_is_in_hdd::(Path::new(path), disks), in_hdd); + } +} diff --git a/src/app/mount_point.rs b/src/app/mount_point.rs index 6691af43..c0dbcea7 100644 --- a/src/app/mount_point.rs +++ b/src/app/mount_point.rs @@ -12,7 +12,7 @@ pub fn find_mount_point<'a>( } #[cfg(test)] -mod tests { +mod test { use super::find_mount_point; use pretty_assertions::assert_eq; use std::path::Path; diff --git a/src/app/overlapping_arguments.rs b/src/app/overlapping_arguments.rs new file mode 100644 index 00000000..f8ff31f6 --- /dev/null +++ b/src/app/overlapping_arguments.rs @@ -0,0 +1,126 @@ +use pipe_trait::Pipe; +use std::{ + collections::HashSet, + fs::{canonicalize, symlink_metadata}, + io, + mem::take, + path::PathBuf, +}; + +/// Mockable APIs to interact with the system. +pub trait Api { + type Argument; + type RealPath: Eq; + type RealPathError; + fn canonicalize(path: &Self::Argument) -> Result; + fn is_real_dir(path: &Self::Argument) -> bool; + fn starts_with(a: &Self::RealPath, b: &Self::RealPath) -> bool; +} + +/// Implementation of [`Api`] that interacts with the real system. +pub struct RealApi; +impl Api for RealApi { + type Argument = PathBuf; + type RealPath = PathBuf; + type RealPathError = io::Error; + + #[inline] + fn canonicalize(path: &Self::Argument) -> Result { + canonicalize(path) + } + + #[inline] + fn is_real_dir(path: &Self::Argument) -> bool { + path.pipe(symlink_metadata) + .is_ok_and(|metadata| !metadata.is_symlink() && metadata.is_dir()) + } + + #[inline] + fn starts_with(a: &Self::RealPath, b: &Self::RealPath) -> bool { + a.starts_with(b) + } +} + +/// Hardlinks deduplication doesn't work properly if there are more than 1 paths pointing to +/// the same tree or if a path points to a subtree of another path. Therefore, we must find +/// and remove such overlapping paths before they cause problem. +pub fn remove_overlapping_paths(arguments: &mut Vec) { + let to_remove = find_overlapping_paths_to_remove::(arguments); + remove_items_from_vec_by_indices(arguments, &to_remove); +} + +/// Find overlapping paths in a list of arguments to remove and return their indices. +/// +/// Prefer keeping the containing tree over the subtree (returning the index of the subtree). +/// +/// Prefer keeping the first instance of the path over the later instances (returning the indices of +/// the later instances). +pub fn find_overlapping_paths_to_remove( + arguments: &[Api::Argument], +) -> HashSet { + let real_paths: Vec<_> = arguments + .iter() + .map(|path| { + Api::is_real_dir(path) + .then(|| Api::canonicalize(path)) + .and_then(Result::ok) + }) + .collect(); + assert_eq!(arguments.len(), real_paths.len()); + + let mut to_remove = HashSet::new(); + for left_index in 0..arguments.len() { + for right_index in (left_index + 1)..arguments.len() { + if let (Some(left), Some(right)) = (&real_paths[left_index], &real_paths[right_index]) { + // both paths are the same, remove the second one + if left == right { + to_remove.insert(right_index); + continue; + } + + // `left` starts with `right` means `left` is subtree of `right`, remove `left` + if Api::starts_with(left, right) { + to_remove.insert(left_index); + continue; + } + + // `right` starts with `left` means `right` is subtree of `left`, remove `right` + if Api::starts_with(right, left) { + to_remove.insert(right_index); + continue; + } + } + } + } + to_remove +} + +/// Remove elements from a vector by indices. +pub fn remove_items_from_vec_by_indices(vec: &mut Vec, indices: &HashSet) { + // Optimization: If there is no element to remove then there is nothing to do. + if indices.is_empty() { + return; + } + + // Optimization: If there is only 1 element to remove, shifting elements would be cheaper than reallocate a whole array. + if indices.len() == 1 { + let index = *indices.iter().next().unwrap(); + vec.remove(index); + return; + } + + // Default: If there are more than 1 elements to remove, just copy the whole array without them. + *vec = vec + .pipe(take) + .into_iter() + .enumerate() + .filter(|(index, _)| !indices.contains(index)) + .map(|(_, item)| item) + .collect(); +} + +#[cfg(test)] +mod test_remove_items_from_vec_by_indices; +#[cfg(unix)] +#[cfg(test)] +mod test_remove_overlapping_paths; diff --git a/src/app/overlapping_arguments/test_remove_items_from_vec_by_indices.rs b/src/app/overlapping_arguments/test_remove_items_from_vec_by_indices.rs new file mode 100644 index 00000000..b2aae115 --- /dev/null +++ b/src/app/overlapping_arguments/test_remove_items_from_vec_by_indices.rs @@ -0,0 +1,31 @@ +use super::remove_items_from_vec_by_indices; +use maplit::hashset; +use pretty_assertions::assert_eq; +use std::collections::HashSet; + +#[test] +fn remove_nothing() { + let original = vec![31, 54, 22, 81, 67, 45, 52, 20, 85, 66, 27, 84]; + let mut modified = original.clone(); + remove_items_from_vec_by_indices(&mut modified, &HashSet::new()); + assert_eq!(modified, original); +} + +#[test] +fn remove_single() { + let original = vec![31, 54, 22, 81, 67, 45, 52, 20, 85, 66, 27, 84]; + let mut modified = original.clone(); + remove_items_from_vec_by_indices(&mut modified, &hashset! { 3 }); + assert_eq!(&modified[..3], &original[..3]); + assert_eq!(&modified[3..], &original[4..]); +} + +#[test] +fn remove_multiple() { + let original = vec![31, 54, 22, 81, 67, 45, 52, 20, 85, 66, 27, 84]; + let mut modified = original.clone(); + remove_items_from_vec_by_indices(&mut modified, &hashset! { 3, 4, 5, 7 }); + assert_eq!(&modified[..3], &original[..3]); + assert_eq!(&modified[3..4], &original[6..7]); + assert_eq!(&modified[4..], &original[8..]); +} diff --git a/src/app/overlapping_arguments/test_remove_overlapping_paths.rs b/src/app/overlapping_arguments/test_remove_overlapping_paths.rs new file mode 100644 index 00000000..7218fdb5 --- /dev/null +++ b/src/app/overlapping_arguments/test_remove_overlapping_paths.rs @@ -0,0 +1,256 @@ +use super::{remove_overlapping_paths, Api}; +use normalize_path::NormalizePath; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::{convert::Infallible, path::PathBuf}; + +const MOCKED_CURRENT_DIR: &str = "/home/user/current-dir"; + +const MOCKED_SYMLINKS: &[(&str, &str)] = &[ + ("/home/user/current-dir/link-to-current-dir", "."), + ("/home/user/current-dir/link-to-parent-dir", ".."), + ("/home/user/current-dir/link-to-root", "/"), + ("/home/user/current-dir/link-to-bin", "/usr/bin"), + ("/home/user/current-dir/link-to-foo", "foo"), + ("/home/user/current-dir/link-to-bar", "bar"), + ("/home/user/current-dir/link-to-012", "0/1/2"), +]; + +fn resolve_symlink(absolute_path: PathBuf) -> PathBuf { + assert!( + absolute_path.is_absolute(), + "absolute_path should be absolute: {absolute_path:?}", + ); + for &(link_path, link_target) in MOCKED_SYMLINKS { + let link_path = PathBuf::from(link_path); + assert!( + link_path.is_absolute(), + "link_path should be absolute: {link_path:?}", + ); + let Some(parent) = link_path.parent() else { + panic!("Cannot get parent of {link_path:?}"); + }; + if let Ok(suffix) = absolute_path.strip_prefix(&link_path) { + return parent + .join(link_target) + .join(suffix) + .normalize() + .pipe(resolve_symlink); + } + } + absolute_path +} + +/// Mocked implementation of [`Api`] for testing purposes. +struct MockedApi; +impl Api for MockedApi { + type Argument = &'static str; + type RealPath = PathBuf; + type RealPathError = Infallible; + + fn canonicalize(path: &Self::Argument) -> Result { + MOCKED_CURRENT_DIR + .pipe(PathBuf::from) + .join(path) + .normalize() + .pipe(resolve_symlink) + .pipe(Ok) + } + + fn is_real_dir(path: &Self::Argument) -> bool { + let path = MOCKED_CURRENT_DIR.pipe(PathBuf::from).join(path); + MOCKED_SYMLINKS + .iter() + .all(|(link, _)| PathBuf::from(link).normalize() != path) + } + + fn starts_with(a: &Self::RealPath, b: &Self::RealPath) -> bool { + a.starts_with(b) + } +} + +#[test] +fn remove_nothing() { + let original = vec!["foo", "bar", "abc/def", "0/1/2"]; + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = original; + assert_eq!(actual, expected); +} + +#[test] +fn remove_duplicated_arguments() { + let original = dbg!(vec![ + "foo", + "bar", + "abc/def", + "foo", + "0/1/2", + "./bar", + "./abc/./def", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["foo", "bar", "abc/def", "0/1/2"]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "foo", + "./bar", + "bar", + "./abc/./def", + "abc/def", + "foo", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["foo", "./bar", "./abc/./def", "0/1/2"]; + assert_eq!(actual, expected); +} + +#[test] +fn remove_overlapping_sub_paths() { + let original = vec![ + "foo/child", + "foo", + "bar", + "abc/def", + "0/1/2", + "bar/child", + "0/1/2/3", + ]; + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["foo", "bar", "abc/def", "0/1/2"]; + assert_eq!(actual, expected); +} + +#[test] +fn remove_all_except_current_dir() { + let original = dbg!(vec!["foo", "bar", ".", "abc/def", "0/1/2"]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["."]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "foo", + "bar", + ".", + "abc/def", + "0/1/2", + MOCKED_CURRENT_DIR, + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["."]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "foo", + "bar", + MOCKED_CURRENT_DIR, + ".", + "abc/def", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec![MOCKED_CURRENT_DIR]; + assert_eq!(actual, expected); +} + +#[test] +fn remove_all_except_parent_dir() { + let original = dbg!(vec!["foo", "bar", "..", "abc/def", ".", "0/1/2"]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec![".."]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "foo", + "/home/user", + "bar", + "..", + "abc/def", + ".", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["/home/user"]; + assert_eq!(actual, expected); +} + +#[test] +fn remove_overlapping_real_paths() { + let original = dbg!(vec![ + "foo", + "bar", + "abc/def", + "link-to-foo/child", + "link-to-bar/a/b/c", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["foo", "bar", "abc/def", "0/1/2"]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "link-to-foo/child", + "link-to-bar/a/b/c", + "foo", + "bar", + "abc/def", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["foo", "bar", "abc/def", "0/1/2"]; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "link-to-current-dir/foo", + "foo", + "bar", + "abc/def", + "link-to-current-dir/bar", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = vec!["link-to-current-dir/foo", "bar", "abc/def", "0/1/2"]; + assert_eq!(actual, expected); +} + +#[test] +fn do_not_remove_symlinks() { + let original = dbg!(vec![ + "foo", + "bar", + "abc/def", + "link-to-foo", + "link-to-bar", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = original; + assert_eq!(actual, expected); + + let original = dbg!(vec![ + "foo/child", + "bar", + "abc/def", + "link-to-foo", + "link-to-bar", + "0/1/2", + ]); + let mut actual = original.clone(); + remove_overlapping_paths::(&mut actual); + let expected = original; + assert_eq!(actual, expected); +} diff --git a/src/app/sub.rs b/src/app/sub.rs index 8c97cd32..3500a5f3 100644 --- a/src/app/sub.rs +++ b/src/app/sub.rs @@ -1,9 +1,10 @@ use crate::{ - args::Fraction, - data_tree::{DataTree, DataTreeReflection}, + args::{Depth, Fraction}, + data_tree::DataTree, fs_tree_builder::FsTreeBuilder, get_size::GetSize, - json_data::{BinaryVersion, JsonData, SchemaVersion, UnitAndTree}, + hardlink::{DeduplicateSharedSize, HardlinkIgnorant, RecordHardlinks}, + json_data::{BinaryVersion, JsonData, JsonDataBody, JsonShared, JsonTree, SchemaVersion}, os_string_display::OsStringDisplay, reporter::ParallelReporter, runtime_error::RuntimeError, @@ -11,21 +12,23 @@ use crate::{ status_board::GLOBAL_STATUS_BOARD, visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, }; +use pipe_trait::Pipe; use serde::Serialize; -use std::{io::stdout, iter::once, num::NonZeroUsize, path::PathBuf}; +use std::{io::stdout, iter::once, path::PathBuf}; /// The sub program of the main application. -pub struct Sub +pub struct Sub where Report: ParallelReporter + Sync, Size: size::Size + Into + Serialize + Send + Sync, SizeGetter: GetSize + Copy + Sync, - DataTreeReflection: Into, + HardlinksHandler: RecordHardlinks + HardlinkSubroutines + Sync, + JsonTree: Into, { /// List of files and/or directories. pub files: Vec, /// Print JSON data instead of an ASCII chart. - pub json_output: bool, + pub json_output: Option, /// Format to be used to [`display`](size::Size::display) the sizes returned by [`size_getter`](Self::size_getter). pub bytes_format: Size::DisplayFormat, /// The direction of the visualization. @@ -35,9 +38,11 @@ where /// Distribution and number of characters/blocks can be placed in a line. pub column_width_distribution: ColumnWidthDistribution, /// Maximum number of levels that should be visualized. - pub max_depth: NonZeroUsize, + pub max_depth: Depth, /// [Get the size](GetSize) of files/directories. pub size_getter: SizeGetter, + /// Handle to detect, record, and deduplicate hardlinks. + pub hardlinks_handler: HardlinksHandler, /// Reports measurement progress. pub reporter: Report, /// Minimal size proportion required to appear. @@ -46,12 +51,13 @@ where pub no_sort: bool, } -impl Sub +impl Sub where Size: size::Size + Into + Serialize + Send + Sync, Report: ParallelReporter + Sync, SizeGetter: GetSize + Copy + Sync, - DataTreeReflection: Into, + HardlinksHandler: RecordHardlinks + HardlinkSubroutines + Sync, + JsonTree: Into, { /// Run the sub program. pub fn run(self) -> Result<(), RuntimeError> { @@ -64,11 +70,14 @@ where column_width_distribution, max_depth, size_getter, + hardlinks_handler, reporter, min_ratio, no_sort, } = self; + let max_depth = max_depth.get(); + let mut iter = files .into_iter() .map(|root| -> DataTree { @@ -76,6 +85,8 @@ where reporter: &reporter, root, size_getter, + hardlinks_recorder: &hardlinks_handler, + max_depth, } .into() }); @@ -85,22 +96,25 @@ where } else { return Sub { files: vec![".".into()], + hardlinks_handler, reporter, ..self } .run(); }; - // ExactSizeIterator::is_empty is unstable - let data_tree = if iter.len() == 0 { + let only_one_arg = iter.len() == 0; // ExactSizeIterator::is_empty is unstable + let data_tree = if only_one_arg { data_tree } else { let children: Vec<_> = once(data_tree).chain(iter).collect(); - DataTree::dir( - OsStringDisplay::os_string_from("(total)"), - Size::default(), - children, - ) + + // This name is for hardlinks deduplication to work correctly as empty string is considered to be the start of any path. + // It would be changed into "(total)" later. + let fake_root_name = OsStringDisplay::os_string_from(""); + + DataTree::dir(fake_root_name, Size::default(), children) + .into_par_retained(|_, depth| depth + 1 < max_depth) }; if reporter.destroy().is_err() { @@ -108,7 +122,7 @@ where } let min_ratio: f32 = min_ratio.into(); - let data_tree = { + let (data_tree, deduplication_record) = { let mut data_tree = data_tree; if min_ratio > 0.0 { data_tree.par_cull_insignificant_data(min_ratio); @@ -116,24 +130,61 @@ where if !no_sort { data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse()); } - data_tree + let deduplication_record = hardlinks_handler.deduplicate(&mut data_tree); + if !only_one_arg { + assert_eq!(data_tree.name().as_os_str().to_str(), Some("")); + *data_tree.name_mut() = OsStringDisplay::os_string_from("(total)"); + } + (data_tree, deduplication_record) }; GLOBAL_STATUS_BOARD.clear_line(0); - if json_output { - let unit_and_tree: UnitAndTree = data_tree + if let Some(json_output) = json_output { + let JsonOutputParam { + shared_details, + shared_summary, + } = json_output; + let tree = data_tree .into_reflection() // I really want to use std::mem::transmute here but can't. .par_convert_names_to_utf8() // TODO: allow non-UTF8 somehow. - .expect("convert all names from raw string to UTF-8") - .into(); + .expect("convert all names from raw string to UTF-8"); + + let deduplication_result = if !shared_details && !shared_summary { + Ok(JsonShared::default()) + } else { + // `try` expression would be extremely useful right now but it sadly requires nightly + || -> Result<_, RuntimeError> { + let mut shared = deduplication_record + .map_err(HardlinksHandler::convert_error)? + .pipe(HardlinksHandler::json_report)? + .unwrap_or_default(); + if !shared_details { + shared.details = None; + } + if !shared_summary { + shared.summary = None; + } + Ok(shared) + }() + }; + + // errors caused by failing deduplication shouldn't prevent the JSON data from being printed + let (shared, deduplication_result) = match deduplication_result { + Ok(shared) => (shared, Ok(())), + Err(error) => (JsonShared::default(), Err(error)), + }; + + let json_tree = JsonTree { tree, shared }; let json_data = JsonData { schema_version: SchemaVersion, binary_version: Some(BinaryVersion::current()), - unit_and_tree, + body: json_tree.into(), }; + return serde_json::to_writer(stdout(), &json_data) - .map_err(RuntimeError::SerializationFailure); + .map_err(RuntimeError::SerializationFailure) + .or(deduplication_result); } let visualizer = Visualizer { @@ -142,10 +193,74 @@ where direction, bar_alignment, column_width_distribution, - max_depth, }; print!("{visualizer}"); // visualizer already ends with "\n", println! isn't needed here. + + let deduplication_record = deduplication_record.map_err(HardlinksHandler::convert_error)?; + HardlinksHandler::print_report(deduplication_record, bytes_format)?; + + Ok(()) + } +} + +/// Value to pass to [`Sub::json_output`] to decide how much details should be +/// put in the output JSON object. +#[derive(Debug, Clone, Copy)] +pub struct JsonOutputParam { + /// Whether to include `.shared.details` in the JSON output. + pub shared_details: bool, + /// Whether to include `.shared.summary` in the JSON output. + pub shared_summary: bool, +} + +impl JsonOutputParam { + /// Infer from the CLI flags. + pub(super) fn from_cli_flags( + output_json: bool, + omit_shared_details: bool, + omit_shared_summary: bool, + ) -> Option { + output_json.then_some(JsonOutputParam { + shared_details: !omit_shared_details, + shared_summary: !omit_shared_summary, + }) + } +} + +/// Subroutines used by [`Sub`] to deduplicate sizes of detected hardlinks and report about it. +pub trait HardlinkSubroutines: DeduplicateSharedSize { + /// Convert the error to runtime error. + fn convert_error(error: Self::Error) -> RuntimeError; + /// Handle the report. + fn print_report( + report: Self::Report, + bytes_format: Size::DisplayFormat, + ) -> Result<(), RuntimeError>; + /// Create a JSON serializable object from the report. + fn json_report(report: Self::Report) -> Result>, RuntimeError>; +} + +impl HardlinkSubroutines for HardlinkIgnorant +where + DataTree: Send, + Size: size::Size + Sync, +{ + #[inline] + fn convert_error(error: Self::Error) -> RuntimeError { + match error {} + } + + #[inline] + fn print_report((): Self::Report, _: Size::DisplayFormat) -> Result<(), RuntimeError> { Ok(()) } + + #[inline] + fn json_report((): Self::Report) -> Result>, RuntimeError> { + Ok(None) + } } + +#[cfg(unix)] +mod unix_ext; diff --git a/src/app/sub/unix_ext.rs b/src/app/sub/unix_ext.rs new file mode 100644 index 00000000..12fd8bf8 --- /dev/null +++ b/src/app/sub/unix_ext.rs @@ -0,0 +1,33 @@ +use super::HardlinkSubroutines; +use crate::{ + data_tree::DataTree, hardlink::HardlinkAware, json_data::JsonShared, + os_string_display::OsStringDisplay, runtime_error::RuntimeError, size, +}; +use pipe_trait::Pipe; + +impl HardlinkSubroutines for HardlinkAware +where + DataTree: Send, + Size: size::Size + Sync, +{ + fn convert_error(error: Self::Error) -> RuntimeError { + match error {} + } + + fn print_report( + report: Self::Report, + bytes_format: Size::DisplayFormat, + ) -> Result<(), RuntimeError> { + let summary = report.summarize(); + if summary.inodes > 0 { + print!("{}", summary.display(bytes_format)); // the summary already ends with "\n", println! isn't needed here. + } + Ok(()) + } + + fn json_report(report: Self::Report) -> Result>, RuntimeError> { + let summary = report.summarize().pipe(Some); + let details = report.into_reflection().pipe(Some); + Ok(Some(JsonShared { details, summary })) + } +} diff --git a/src/args.rs b/src/args.rs index 840e9dfa..db6698c8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,24 +1,30 @@ +pub mod depth; pub mod fraction; pub mod quantity; pub mod threads; +pub use depth::Depth; pub use fraction::Fraction; pub use quantity::Quantity; pub use threads::Threads; use crate::{bytes_format::BytesFormat, visualizer::ColumnWidthDistribution}; use clap::{ColorChoice, Parser}; -use std::{num::NonZeroUsize, path::PathBuf}; +use derive_setters::Setters; +use smart_default::SmartDefault; +use std::path::PathBuf; use terminal_size::{terminal_size, Width}; use text_block_macros::text_block; /// The CLI arguments. -#[derive(Debug, Clone, Parser)] +#[derive(Debug, SmartDefault, Setters, Clone, Parser)] #[clap( name = "pdu", version, + about = "Summarize disk usage of the set of files, recursively for directories.", + long_about = text_block! { "Summarize disk usage of the set of files, recursively for directories." "" @@ -27,21 +33,22 @@ use text_block_macros::text_block; }, after_help = text_block! { - "EXAMPLES:" + "Examples:" " $ pdu" " $ pdu path/to/file/or/directory" " $ pdu file.txt dir/" " $ pdu --quantity=apparent-size" + " $ pdu --deduplicate-hardlinks" " $ pdu --bytes-format=plain" " $ pdu --bytes-format=binary" " $ pdu --min-ratio=0" " $ pdu --min-ratio=0.05" - " $ pdu --min-ratio=0 --json-output | jq" - " $ pdu --min-ratio=0 < disk-usage.json" + " $ pdu --min-ratio=0 --max-depth=inf --json-output | jq" + " $ pdu --json-input < disk-usage.json" }, after_long_help = text_block! { - "EXAMPLES:" + "Examples:" " Show disk usage chart of current working directory" " $ pdu" "" @@ -54,6 +61,9 @@ use text_block_macros::text_block; " Show chart in apparent sizes instead of block sizes" " $ pdu --quantity=apparent-size" "" + " Detect and subtract the sizes of hardlinks from their parent nodes" + " $ pdu --deduplicate-hardlinks" + "" " Show sizes in plain numbers instead of metric units" " $ pdu --bytes-format=plain" "" @@ -67,20 +77,25 @@ use text_block_macros::text_block; " $ pdu --min-ratio=0.05" "" " Show disk usage data as JSON instead of chart" - " $ pdu --min-ratio=0 --json-output | jq" + " $ pdu --min-ratio=0 --max-depth=inf --json-output | jq" "" " Visualize existing JSON representation of disk usage data" - " $ pdu --min-ratio=0 < disk-usage.json" + " $ pdu --json-input < disk-usage.json" }, color = ColorChoice::Never, )] +#[setters(prefix = "with_")] +#[non_exhaustive] pub struct Args { /// List of files and/or directories. pub files: Vec, /// Read JSON data from stdin. - #[clap(long, conflicts_with = "quantity")] + #[clap( + long, + conflicts_with_all = ["quantity", "deduplicate_hardlinks"] + )] pub json_input: bool, /// Print JSON data instead of an ASCII chart. @@ -88,9 +103,15 @@ pub struct Args { pub json_output: bool, /// How to display the numbers of bytes. - #[clap(long, value_enum, default_value_t = BytesFormat::MetricUnits)] + #[clap(long, short, value_enum, default_value_t = BytesFormat::MetricUnits)] + #[default(BytesFormat::MetricUnits)] pub bytes_format: BytesFormat, + /// Detect and subtract the sizes of hardlinks from their parent directory totals. + #[clap(long, short = 'H', visible_aliases = ["detect-links", "dedupe-links"])] + #[cfg_attr(not(unix), clap(hide = true))] + pub deduplicate_hardlinks: bool, + /// Print the tree top-down instead of bottom-up. #[clap(long)] pub top_down: bool, @@ -100,15 +121,22 @@ pub struct Args { pub align_right: bool, /// Aspect of the files/directories to be measured. - #[clap(long, value_enum, default_value_t = Quantity::DEFAULT)] + #[clap(long, short, value_enum, default_value_t = Quantity::DEFAULT)] + #[default(Quantity::DEFAULT)] pub quantity: Quantity, - /// Maximum depth to display the data (must be greater than 0). - #[clap(long, default_value = "10")] - pub max_depth: NonZeroUsize, + /// Maximum depth to display the data. Could be either "inf" or a positive integer. + #[clap(long, short = 'd', default_value = "10", visible_alias = "depth")] + #[default(_code = "10.try_into().unwrap()")] + pub max_depth: Depth, /// Width of the visualization. - #[clap(long, conflicts_with = "column_width")] + #[clap( + long, + short = 'w', + conflicts_with = "column_width", + visible_alias = "width" + )] pub total_width: Option, /// Maximum widths of the tree column and width of the bar column. @@ -116,24 +144,32 @@ pub struct Args { pub column_width: Option>, /// Minimal size proportion required to appear. - #[clap(long, default_value = "0.01")] + #[clap(long, short, default_value = "0.01")] pub min_ratio: Fraction, - /// Preserve order of entries. + /// Do not sort the branches in the tree. #[clap(long)] pub no_sort: bool, /// Prevent filesystem error messages from appearing in stderr. - #[clap(long)] + #[clap(long, short, visible_alias = "no-errors")] pub silent_errors: bool, /// Report progress being made at the expense of performance. - #[clap(long)] + #[clap(long, short)] pub progress: bool, - /// Set the maximum number of threads to spawn. Could be either "auto", "max", or a number. + /// Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer. #[clap(long, default_value_t = Threads::Auto)] pub threads: Threads, + + /// Do not output `.shared.details` in the JSON output. + #[clap(long, requires = "json_output", requires = "deduplicate_hardlinks")] + pub omit_json_shared_details: bool, + + /// Do not output `.shared.summary` in the JSON output. + #[clap(long, requires = "json_output", requires = "deduplicate_hardlinks")] + pub omit_json_shared_summary: bool, } impl Args { diff --git a/src/args/depth.rs b/src/args/depth.rs new file mode 100644 index 00000000..d4d5a4b1 --- /dev/null +++ b/src/args/depth.rs @@ -0,0 +1,53 @@ +use derive_more::{Display, Error}; +use std::{ + num::{NonZeroU64, ParseIntError, TryFromIntError}, + str::FromStr, +}; + +const INFINITE: &str = "inf"; + +/// Maximum depth of the tree. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Display)] +pub enum Depth { + #[display("{INFINITE}")] + Infinite, + Finite(NonZeroU64), +} + +impl Depth { + /// Convert depth into something comparable. + pub(crate) fn get(self) -> u64 { + match self { + Depth::Infinite => u64::MAX, + Depth::Finite(value) => value.get(), + } + } +} + +/// Error that occurs when parsing a string as [`Depth`]. +#[derive(Debug, Display, Clone, PartialEq, Eq, Error)] +#[non_exhaustive] +pub enum FromStrError { + #[display("Value is neither {INFINITE:?} nor a positive integer: {_0}")] + InvalidSyntax(ParseIntError), +} + +impl FromStr for Depth { + type Err = FromStrError; + fn from_str(text: &str) -> Result { + let text = text.trim(); + if text == INFINITE { + return Ok(Depth::Infinite); + } + text.parse() + .map_err(FromStrError::InvalidSyntax) + .map(Depth::Finite) + } +} + +impl TryFrom for Depth { + type Error = TryFromIntError; + fn try_from(value: u64) -> Result { + value.try_into().map(Depth::Finite) + } +} diff --git a/src/args/fraction.rs b/src/args/fraction.rs index 0e651008..4145b74f 100644 --- a/src/args/fraction.rs +++ b/src/args/fraction.rs @@ -41,7 +41,7 @@ impl TryFrom for Fraction { } } -/// Error that occurs when converting a string into an instance of [`Fraction`]. +/// Error that occurs when parsing a string as [`Fraction`]. #[derive(Debug, Display, Clone, PartialEq, Eq, Error)] pub enum FromStrError { ParseFloatError(ParseFloatError), diff --git a/src/args/quantity.rs b/src/args/quantity.rs index 549f45e0..a51225d7 100644 --- a/src/args/quantity.rs +++ b/src/args/quantity.rs @@ -6,12 +6,15 @@ use clap::ValueEnum; #[cfg_attr(feature = "cli", derive(ValueEnum))] pub enum Quantity { /// Measure apparent sizes. + #[cfg_attr(feature = "cli", clap(alias = "len"))] ApparentSize, /// Measure block sizes (block-count * 512B). #[cfg(unix)] + #[cfg_attr(feature = "cli", clap(alias = "blksize"))] BlockSize, /// Count numbers of blocks. #[cfg(unix)] + #[cfg_attr(feature = "cli", clap(alias = "blocks"))] BlockCount, } diff --git a/src/args/threads.rs b/src/args/threads.rs index dbf0c768..44dc44aa 100644 --- a/src/args/threads.rs +++ b/src/args/threads.rs @@ -1,5 +1,8 @@ use derive_more::{Display, Error}; -use std::{num::ParseIntError, str::FromStr}; +use std::{ + num::{NonZeroUsize, ParseIntError}, + str::FromStr, +}; const AUTO: &str = "auto"; const MAX: &str = "max"; @@ -12,11 +15,12 @@ pub enum Threads { Auto, #[display("{MAX}")] Max, - Fixed(usize), + Fixed(NonZeroUsize), } -/// Error that occurs when converting a string to an instance of [`Threads`]. +/// Error that occurs when parsing a string to as [`Threads`]. #[derive(Debug, Display, Clone, PartialEq, Eq, Error)] +#[non_exhaustive] pub enum FromStrError { #[display("Value is neither {AUTO:?}, {MAX:?}, nor a number: {_0}")] InvalidSyntax(ParseIntError), @@ -24,15 +28,14 @@ pub enum FromStrError { impl FromStr for Threads { type Err = FromStrError; - fn from_str(value: &str) -> Result { - let value = value.trim(); - match value { + fn from_str(text: &str) -> Result { + let text = text.trim(); + match text { AUTO => return Ok(Threads::Auto), MAX => return Ok(Threads::Max), _ => {} }; - value - .parse() + text.parse() .map_err(FromStrError::InvalidSyntax) .map(Threads::Fixed) } diff --git a/src/bytes_format.rs b/src/bytes_format.rs index c3104d56..a3601d93 100644 --- a/src/bytes_format.rs +++ b/src/bytes_format.rs @@ -19,7 +19,11 @@ pub enum BytesFormat { /// Display the value as-is. #[cfg_attr( feature = "cli", - clap(name = "plain", help = "Display plain number of bytes without units") + clap( + name = "plain", + help = "Display plain number of bytes without units", + alias = "1" + ) )] PlainNumber, /// Display the value with a unit suffix in [metric scale](formatter::METRIC). @@ -28,6 +32,7 @@ pub enum BytesFormat { clap( name = "metric", help = "Use metric scale, i.e. 1K = 1000B, 1M = 1000K, and so on", + alias = "1000" ) )] MetricUnits, @@ -37,6 +42,7 @@ pub enum BytesFormat { clap( name = "binary", help = "Use binary scale, i.e. 1K = 1024B, 1M = 1024K, and so on", + alias = "1024" ) )] BinaryUnits, diff --git a/src/bytes_format/formatter.rs b/src/bytes_format/formatter.rs index 9605b70b..11a1ae7a 100644 --- a/src/bytes_format/formatter.rs +++ b/src/bytes_format/formatter.rs @@ -9,16 +9,19 @@ pub struct Formatter { impl Formatter { /// Create a new formatter. + #[inline] pub const fn new(scale_base: u64) -> Self { Formatter { scale_base } } /// Multiplication factor. + #[inline] pub const fn scale_base(self) -> u64 { self.scale_base } /// Get scale in number. + #[inline] pub const fn scale(self, exp: u32) -> u64 { self.scale_base().pow(exp) } diff --git a/src/data_tree.rs b/src/data_tree.rs index f2be4107..6c68ff6e 100644 --- a/src/data_tree.rs +++ b/src/data_tree.rs @@ -32,3 +32,6 @@ mod constructors; mod getters; mod retain; mod sort; + +#[cfg(unix)] // for now, it is only available on unix +mod hardlink; diff --git a/src/data_tree/constructors.rs b/src/data_tree/constructors.rs index 383773cb..3b95367a 100644 --- a/src/data_tree/constructors.rs +++ b/src/data_tree/constructors.rs @@ -3,6 +3,7 @@ use crate::size; impl DataTree { /// Create a tree representation of a directory. + #[inline] pub fn dir(name: Name, inode_size: Size, children: Vec) -> Self { let size = inode_size + children.iter().map(DataTree::size).sum(); DataTree { @@ -13,15 +14,17 @@ impl DataTree { } /// Create a tree representation of a file. + #[inline] pub fn file(name: Name, size: Size) -> Self { DataTree { name, size, - children: Vec::with_capacity(0), + children: Vec::new(), } } /// Create a directory constructor of fixed inode size. + #[inline] pub fn fixed_size_dir_constructor(inode_size: Size) -> impl Fn(Name, Vec) -> Self where Size: Copy, diff --git a/src/data_tree/getters.rs b/src/data_tree/getters.rs index 92ffeba9..cfedad10 100644 --- a/src/data_tree/getters.rs +++ b/src/data_tree/getters.rs @@ -3,21 +3,25 @@ use crate::size; impl DataTree { /// Extract name + #[inline] pub fn name(&self) -> &Name { &self.name } /// Get mutable reference to name. + #[inline] pub fn name_mut(&mut self) -> &mut Name { &mut self.name } /// Extract total disk usage + #[inline] pub fn size(&self) -> Size { self.size } /// Extract children + #[inline] pub fn children(&self) -> &Vec { &self.children } diff --git a/src/data_tree/hardlink.rs b/src/data_tree/hardlink.rs new file mode 100644 index 00000000..8cbf93b8 --- /dev/null +++ b/src/data_tree/hardlink.rs @@ -0,0 +1,45 @@ +use super::DataTree; +use crate::size; +use assert_cmp::debug_assert_op; +use rayon::prelude::*; +use std::{ffi::OsStr, path::Path}; + +impl DataTree +where + Self: Send, + Name: AsRef, + Size: size::Size + Sync, +{ + /// Reduce the size of the directories that have hardlinks. + #[cfg_attr(not(unix), expect(unused))] + pub(crate) fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) { + if hardlink_info.is_empty() { + return; + } + + let prefix = self.name().as_ref(); + let sub_hardlink_info: Vec<(Size, Vec<&Path>)> = hardlink_info + .iter() + .filter(|(_, link_paths)| link_paths.len() > 1) + .map(|(size, link_paths)| { + let link_suffices: Vec<&Path> = link_paths + .iter() + .map(|link_path| link_path.strip_prefix(prefix)) + .filter_map(Result::ok) + .collect(); + (*size, link_suffices) + }) + .filter(|(_, link_paths)| link_paths.len() > 1) + .collect(); + + for (size, link_suffices) in &sub_hardlink_info { + let number_of_links = link_suffices.len(); + debug_assert_op!(number_of_links > 1); + self.size -= *size * (number_of_links - 1); + } + + self.children + .par_iter_mut() + .for_each(|child| child.par_deduplicate_hardlinks(&sub_hardlink_info)) + } +} diff --git a/src/data_tree/reflection.rs b/src/data_tree/reflection.rs index ffe3163b..8288b4b2 100644 --- a/src/data_tree/reflection.rs +++ b/src/data_tree/reflection.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; /// Intermediate format used for construction and inspection of /// [`DataTree`](crate::data_tree::DataTree)'s internal content. /// -/// Unlike `Tree` where the fields are all private, the fields of `TreeReflection` +/// Unlike `DataTree` where the fields are all private, the fields of `Reflection` /// are all public to allow construction in tests. /// /// **Conversion between `DataTree` and `Reflection`:** @@ -44,16 +44,14 @@ pub struct Reflection { #[derive(Debug, Clone, PartialEq, Eq)] #[non_exhaustive] pub enum ConversionError { - /// When a node's size is less than the sum of its children. + /// When a node's size is less than one of its children. ExcessiveChildren { /// Path from root to the node. path: VecDeque, /// Size hold by the node. size: Size, - /// Children of the node. - children: Vec>, - /// Sum of size hold by children of the node. - children_sum: Size, + /// The child whose size was greater than that of the node. + child: Reflection, }, } @@ -65,19 +63,16 @@ where fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), Error> { use ConversionError::*; match self { - ExcessiveChildren { - path, - size, - children_sum, - .. - } => { + ExcessiveChildren { path, size, child } => { let path = path .iter() .map(PathBuf::from) .fold(PathBuf::new(), |acc, x| acc.join(x)); write!( formatter, - "ExcessiveChildren: {path:?}: {size:?} is less than {children_sum:?}", + "ExcessiveChildren: {path:?} ({size:?}) is less than a child named {child_name:?} ({child_size:?})", + child_name = child.name, + child_size = child.size, ) } } diff --git a/src/data_tree/reflection/convert.rs b/src/data_tree/reflection/convert.rs index 97ebae91..ff360800 100644 --- a/src/data_tree/reflection/convert.rs +++ b/src/data_tree/reflection/convert.rs @@ -19,6 +19,7 @@ impl From> for Reflection DataTree { /// Create reflection. + #[inline] pub fn into_reflection(self) -> Reflection { self.into() } diff --git a/src/data_tree/reflection/par_methods.rs b/src/data_tree/reflection/par_methods.rs index 1cbdb6e9..786c1dad 100644 --- a/src/data_tree/reflection/par_methods.rs +++ b/src/data_tree/reflection/par_methods.rs @@ -15,14 +15,14 @@ where size, children, } = self; - let children_sum = children.iter().map(|child| child.size).sum(); - if size < children_sum { - return Err(ConversionError::ExcessiveChildren { - path: once(name).collect(), - size, - children, - children_sum, - }); + let excess_child = children + .iter() + .enumerate() + .find(|(_, child)| child.size > size); + if let Some((index, _)) = excess_child { + let path = once(name).collect(); + let child = keep_one(children, index).expect("excess child"); + return Err(ConversionError::ExcessiveChildren { path, size, child }); } let children: Result, _> = children .into_par_iter() @@ -33,16 +33,10 @@ where Err(ConversionError::ExcessiveChildren { mut path, size, - children, - children_sum, + child, }) => { path.push_front(name); - return Err(ConversionError::ExcessiveChildren { - path, - size, - children, - children_sum, - }); + return Err(ConversionError::ExcessiveChildren { path, size, child }); } }; Ok(DataTree { @@ -94,3 +88,10 @@ where }) } } + +/// Extract an item at `index` if it exists. Then drop all remaining items. +#[inline] +fn keep_one(vec: Vec, index: usize) -> Option { + // Worry not about performance, for `std::vec::IntoIter::advanced_by` is overridden with O(1) algorithm! + vec.into_iter().nth(index) +} diff --git a/src/data_tree/retain.rs b/src/data_tree/retain.rs index f4564afe..2de05127 100644 --- a/src/data_tree/retain.rs +++ b/src/data_tree/retain.rs @@ -7,16 +7,30 @@ where Self: Send, Size: size::Size, { - /// Recursively cull all descendants that do not satisfy given `predicate`, in parallel. - pub fn par_retain(&mut self, predicate: impl Fn(&Self) -> bool + Copy + Sync) { - self.children.retain(predicate); + /// Internal function to be used by [`Self::par_retain`]. + fn par_retain_with_depth( + &mut self, + current_depth: u64, + predicate: impl Fn(&Self, u64) -> bool + Copy + Sync, + ) { + self.children + .retain(|child| predicate(child, current_depth)); + let next_depth = current_depth + 1; self.children .par_iter_mut() - .for_each(|child| child.par_retain(predicate)); + .for_each(|child| child.par_retain_with_depth(next_depth, predicate)) + } + + /// Recursively cull all descendants that do not satisfy given `predicate`, in parallel. + pub fn par_retain(&mut self, predicate: impl Fn(&Self, u64) -> bool + Copy + Sync) { + self.par_retain_with_depth(0, predicate) } /// Process the tree via [`par_retain`](Self::par_retain) method. - pub fn into_par_retained(mut self, predicate: impl Fn(&Self) -> bool + Copy + Sync) -> Self { + pub fn into_par_retained( + mut self, + predicate: impl Fn(&Self, u64) -> bool + Copy + Sync, + ) -> Self { self.par_retain(predicate); self } @@ -28,7 +42,7 @@ where Size: Into, { let minimal = self.size().into() as f32 * min_ratio; - self.par_retain(|descendant| descendant.size().into() as f32 >= minimal); + self.par_retain(|descendant, _| descendant.size().into() as f32 >= minimal); } /// Process the tree via [`par_cull_insignificant_data`](Self::par_cull_insignificant_data) method. diff --git a/src/data_tree/retain/test.rs b/src/data_tree/retain/test.rs index f91ed0d6..2c8e7fef 100644 --- a/src/data_tree/retain/test.rs +++ b/src/data_tree/retain/test.rs @@ -156,7 +156,7 @@ fn edge_cases() { ), ], ) - .into_par_retained(|descendant| descendant.name().starts_with('!').not()) + .into_par_retained(|descendant, _| descendant.name().starts_with('!').not()) .into_reflection(); let expected = dir( "root", diff --git a/src/data_tree/sort.rs b/src/data_tree/sort.rs index 4844328f..0a441a10 100644 --- a/src/data_tree/sort.rs +++ b/src/data_tree/sort.rs @@ -13,10 +13,11 @@ where self.children .par_iter_mut() .for_each(|child| child.par_sort_by(compare)); - self.children.sort_by(compare); + self.children.sort_unstable_by(compare); } /// Process the tree via [`par_sort_by`](Self::par_sort_by) method. + #[inline] pub fn into_par_sorted( mut self, compare: impl Fn(&Self, &Self) -> Ordering + Copy + Sync, diff --git a/src/fs_tree_builder.rs b/src/fs_tree_builder.rs index 6d33be7d..bddcfbab 100644 --- a/src/fs_tree_builder.rs +++ b/src/fs_tree_builder.rs @@ -1,6 +1,7 @@ use super::{ data_tree::DataTree, get_size::GetSize, + hardlink::{RecordHardlinks, RecordHardlinksArgument}, os_string_display::OsStringDisplay, reporter::{error_report::Operation::*, ErrorReport, Event, Reporter}, size, @@ -24,42 +25,54 @@ use std::{ /// os_string_display::OsStringDisplay, /// reporter::{ErrorOnlyReporter, ErrorReport}, /// size::Bytes, +/// hardlink::HardlinkIgnorant, /// }; /// let builder = FsTreeBuilder { /// root: std::env::current_dir().unwrap(), +/// hardlinks_recorder: &HardlinkIgnorant, /// size_getter: GetApparentSize, -/// reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), +/// reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), +/// max_depth: 10, /// }; /// let data_tree: DataTree = builder.into(); /// ``` #[derive(Debug)] -pub struct FsTreeBuilder +pub struct FsTreeBuilder<'a, Size, SizeGetter, HardlinksRecorder, Report> where - Report: Reporter + Sync, + Report: Reporter + Sync + ?Sized, Size: size::Size + Send + Sync, SizeGetter: GetSize + Sync, + HardlinksRecorder: RecordHardlinks + Sync + ?Sized, { /// Root of the directory tree. pub root: PathBuf, /// Returns size of an item. pub size_getter: SizeGetter, + /// Handle to detect and record hardlinks. + pub hardlinks_recorder: &'a HardlinksRecorder, /// Reports progress to external system. - pub reporter: Report, + pub reporter: &'a Report, + /// Deepest level of descendent display in the graph. The sizes beyond the max depth still count toward total. + pub max_depth: u64, } -impl From> +impl<'a, Size, SizeGetter, HardlinksRecorder, Report> + From> for DataTree where - Report: Reporter + Sync, + Report: Reporter + Sync + ?Sized, Size: size::Size + Send + Sync, SizeGetter: GetSize + Sync, + HardlinksRecorder: RecordHardlinks + Sync + ?Sized, { /// Create a [`DataTree`] from an [`FsTreeBuilder`]. - fn from(builder: FsTreeBuilder) -> Self { + fn from(builder: FsTreeBuilder) -> Self { let FsTreeBuilder { root, size_getter, + hardlinks_recorder, reporter, + max_depth, } = builder; TreeBuilder:: { @@ -68,7 +81,7 @@ where path: root, get_info: |path| { - let stats = match symlink_metadata(path) { + let (is_dir, size) = match symlink_metadata(path) { Err(error) => { reporter.report(Event::EncounterError(ErrorReport { operation: SymlinkMetadata, @@ -80,10 +93,21 @@ where children: Vec::new(), }; } - Ok(stats) => stats, + Ok(stats) => { + // `stats` should be dropped ASAP to avoid piling up kernel memory usage + let is_dir = stats.is_dir(); + let size = size_getter.get_size(&stats); + reporter.report(Event::ReceiveData(size)); + hardlinks_recorder + .record_hardlinks(RecordHardlinksArgument::new( + path, &stats, size, reporter, + )) + .ok(); // ignore the error for now + (is_dir, size) + } }; - let children: Vec<_> = if stats.file_type().is_dir() { + let children: Vec<_> = if is_dir { match read_dir(path) { Err(error) => { reporter.report(Event::EncounterError(ErrorReport { @@ -111,13 +135,12 @@ where Vec::new() }; - let size = size_getter.get_size(&stats); - reporter.report(Event::ReceiveData(size)); - Info { size, children } }, join_path: |prefix, name| prefix.join(&name.0), + + max_depth, } .into() } diff --git a/src/get_size.rs b/src/get_size.rs index 6dfc7670..235fef8c 100644 --- a/src/get_size.rs +++ b/src/get_size.rs @@ -17,6 +17,7 @@ pub trait GetSize { pub struct GetApparentSize; impl GetSize for GetApparentSize { type Size = Bytes; + #[inline] fn get_size(&self, metadata: &Metadata) -> Self::Size { metadata.len().into() } @@ -29,6 +30,7 @@ pub struct GetBlockSize; #[cfg(unix)] impl GetSize for GetBlockSize { type Size = Bytes; + #[inline] fn get_size(&self, metadata: &Metadata) -> Self::Size { (metadata.blocks() * 512).into() } @@ -41,6 +43,7 @@ pub struct GetBlockCount; #[cfg(unix)] impl GetSize for GetBlockCount { type Size = Blocks; + #[inline] fn get_size(&self, metadata: &Metadata) -> Self::Size { metadata.blocks().into() } diff --git a/src/hardlink.rs b/src/hardlink.rs new file mode 100644 index 00000000..87809d94 --- /dev/null +++ b/src/hardlink.rs @@ -0,0 +1,17 @@ +// `RecordHardlink` is POSIX-exclusive, because whilst Windows does have `MetadataExt::number_of_links`, it requires Nightly. +#[cfg(unix)] +pub mod aware; +#[cfg(unix)] +pub use aware::HardlinkAware; + +pub mod deduplicate; +pub mod hardlink_list; +pub mod ignorant; +pub mod link_path_list; +pub mod record; + +pub use deduplicate::DeduplicateSharedSize; +pub use hardlink_list::{HardlinkList, HardlinkListReflection, SharedLinkSummary}; +pub use ignorant::HardlinkIgnorant; +pub use link_path_list::{LinkPathList, LinkPathListReflection}; +pub use record::{RecordHardlinks, RecordHardlinksArgument}; diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs new file mode 100644 index 00000000..36dedbc0 --- /dev/null +++ b/src/hardlink/aware.rs @@ -0,0 +1,114 @@ +use super::{ + hardlink_list, DeduplicateSharedSize, HardlinkList, LinkPathList, RecordHardlinks, + RecordHardlinksArgument, +}; +use crate::{ + data_tree::DataTree, + inode::InodeNumber, + os_string_display::OsStringDisplay, + reporter::{event::HardlinkDetection, Event, Reporter}, + size, +}; +use derive_more::{AsMut, AsRef, Display, Error, From, Into}; +use pipe_trait::Pipe; +use smart_default::SmartDefault; +use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path}; + +/// Be aware of hardlinks. Treat them as links that share space. +/// Detect files with more than 1 links and record them. +/// Deduplicate them (remove duplicated size) from total size to +/// accurately reflect the real size of their containers. +#[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)] +pub struct Aware { + /// Map an inode number to its size and detected paths. + record: HardlinkList, +} + +pub use Aware as HardlinkAware; + +impl Aware { + /// Create new hardlinks handler. + pub fn new() -> Self { + HardlinkList::default().pipe(Aware::from) + } + + /// Create a detector/recorder of hardlinks. + pub fn from_record(record: HardlinkList) -> Self { + Aware::from(record) + } +} + +/// Error that occurs when [`Aware::record_hardlinks`] fails. +#[derive(Debug, Display, Error)] +#[non_exhaustive] +pub enum ReportHardlinksError { + /// Fail to add an entry to the record. + #[display("Fail to add an entry to record: {_0}")] + AddToRecord(hardlink_list::AddError), +} + +impl RecordHardlinks for Aware +where + Size: size::Size + Eq + Debug, + Report: Reporter + ?Sized, +{ + type Error = ReportHardlinksError; + + fn record_hardlinks( + &self, + argument: RecordHardlinksArgument, + ) -> Result<(), Self::Error> { + let RecordHardlinksArgument { + path, + stats, + size, + reporter, + } = argument; + + if stats.is_dir() { + return Ok(()); + } + + let links = stats.nlink(); + if links <= 1 { + return Ok(()); + } + + reporter.report(Event::DetectHardlink(HardlinkDetection { + path, + stats, + size, + links, + })); + + let ino = InodeNumber::get(stats); + self.record + .add(ino, size, links, path) + .map_err(ReportHardlinksError::AddToRecord) + } +} + +impl DeduplicateSharedSize for Aware +where + DataTree: Send, + Size: size::Size + Sync, +{ + type Report = HardlinkList; + type Error = Infallible; + fn deduplicate( + self, + data_tree: &mut DataTree, + ) -> Result { + let record: Self::Report = self.into(); + let hardlink_info: Box<[(Size, LinkPathList)]> = record + .iter() + .map(|values| (*values.size(), values.paths().clone())) + .collect(); + let hardlink_info: Box<[(Size, Vec<&Path>)]> = hardlink_info + .iter() + .map(|(size, paths)| (*size, paths.iter().map(AsRef::as_ref).collect())) + .collect(); + data_tree.par_deduplicate_hardlinks(&hardlink_info); + Ok(record) + } +} diff --git a/src/hardlink/deduplicate.rs b/src/hardlink/deduplicate.rs new file mode 100644 index 00000000..737ae74c --- /dev/null +++ b/src/hardlink/deduplicate.rs @@ -0,0 +1,22 @@ +use crate::{data_tree::DataTree, os_string_display::OsStringDisplay, size}; + +/// Ability to correct the sizes in a [`DataTree`] by reducing the size of recorded shared links. +/// +/// The input tree is assumed to be not yet deduplicated. +pub trait DeduplicateSharedSize: Sized { + /// Report returned when [`DeduplicateSharedSize::deduplicate`] succeeds. + type Report; + /// Error returned when [`DeduplicateSharedSize::deduplicate`] fails. + type Error; + /// Correct the sizes in a [`DataTree`] by reducing the size of recorded shared links. + fn deduplicate( + self, + data_tree: &mut DataTree, + ) -> Result; +} + +/// Do deduplicate the sizes of hardlinks. +#[cfg(unix)] +pub type Do = super::HardlinkAware; +/// Do not deduplicate the sizes of hardlinks. +pub type DoNot = super::HardlinkIgnorant; diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs new file mode 100644 index 00000000..c955de8e --- /dev/null +++ b/src/hardlink/hardlink_list.rs @@ -0,0 +1,154 @@ +pub mod iter; +pub mod reflection; +pub mod summary; + +pub use iter::Iter; +pub use reflection::Reflection; +pub use summary::Summary; + +pub use Reflection as HardlinkListReflection; +pub use Summary as SharedLinkSummary; + +use crate::{hardlink::LinkPathList, inode::InodeNumber, size}; +use dashmap::DashMap; +use derive_more::{Display, Error}; +use smart_default::SmartDefault; +use std::fmt::Debug; + +#[cfg(any(unix, test))] +use pipe_trait::Pipe; +#[cfg(any(unix, test))] +use std::path::Path; + +/// Map value in [`HardlinkList`]. +#[derive(Debug, Clone)] +struct Value { + /// The size of the file. + size: Size, + /// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted. + links: u64, + /// Paths to the detected links of the file. + paths: LinkPathList, +} + +/// Storage to be used by [`crate::hardlink::RecordHardlinks`]. +/// +/// **Reflection:** `HardlinkList` does not implement `PartialEq`, `Eq`, +/// `Deserialize`, and `Serialize` directly. Instead, it can be converted into a +/// [`Reflection`] which implement these traits. +#[derive(Debug, SmartDefault, Clone)] +pub struct HardlinkList( + /// Map an inode number to its size, number of links, and detected paths. + DashMap>, +); + +impl HardlinkList { + /// Create a new record. + pub fn new() -> Self { + HardlinkList::default() + } + + /// Get the number of entries in the list. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Check whether the list is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Create reflection. + pub fn into_reflection(self) -> Reflection { + self.into() + } +} + +/// Error that occurs when a different size was detected for the same [`ino`][ino]. +/// +/// +/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +#[derive(Debug, Display, Error)] +#[cfg_attr(test, derive(PartialEq, Eq))] +#[display(bound(Size: Debug))] +#[display("Size for inode {ino} changed from {recorded:?} to {detected:?}")] +pub struct SizeConflictError { + pub ino: InodeNumber, + pub recorded: Size, + pub detected: Size, +} + +/// Error that occurs when a different [`nlink`][nlink] was detected for the same [`ino`][ino]. +/// +/// +/// [nlink]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink +/// +/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +#[derive(Debug, Display, Error)] +#[cfg_attr(test, derive(PartialEq, Eq))] +#[display("Number of links of inode {ino} changed from {recorded:?} to {detected:?}")] +pub struct NumberOfLinksConflictError { + pub ino: InodeNumber, + pub recorded: u64, + pub detected: u64, +} + +/// Error that occurs when it fails to add an item to [`HardlinkList`]. +#[derive(Debug, Display, Error)] +#[cfg_attr(test, derive(PartialEq, Eq))] +#[display(bound(Size: Debug))] +#[non_exhaustive] +pub enum AddError { + SizeConflict(SizeConflictError), + NumberOfLinksConflict(NumberOfLinksConflictError), +} + +impl HardlinkList +where + Size: size::Size, +{ + /// Add an entry to the record. + #[cfg(any(unix, test))] // this function isn't used on non-POSIX except in tests + pub(crate) fn add( + &self, + ino: InodeNumber, + size: Size, + links: u64, + path: &Path, + ) -> Result<(), AddError> { + let mut assertions = Ok(()); + self.0 + .entry(ino) + .and_modify(|recorded| { + if size != recorded.size { + assertions = Err(AddError::SizeConflict(SizeConflictError { + ino, + recorded: recorded.size, + detected: size, + })); + return; + } + + if links != recorded.links { + assertions = Err(AddError::NumberOfLinksConflict( + NumberOfLinksConflictError { + ino, + recorded: recorded.links, + detected: links, + }, + )); + return; + } + + recorded.paths.add(path.to_path_buf()); + }) + .or_insert_with(|| { + let paths = path.to_path_buf().pipe(LinkPathList::single); + Value { size, links, paths } + }); + assertions + } +} + +#[cfg(test)] +mod test; diff --git a/src/hardlink/hardlink_list/iter.rs b/src/hardlink/hardlink_list/iter.rs new file mode 100644 index 00000000..d0dfdc53 --- /dev/null +++ b/src/hardlink/hardlink_list/iter.rs @@ -0,0 +1,56 @@ +use super::{HardlinkList, Value}; +use crate::{hardlink::LinkPathList, inode::InodeNumber}; +use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti}; +use pipe_trait::Pipe; + +/// Iterator over entries in [`HardlinkList`]. +#[derive(derive_more::Debug)] +#[debug(bound())] +#[debug("Iter(..)")] +pub struct Iter<'a, Size>(DashIter<'a, InodeNumber, Value>); + +impl HardlinkList { + /// Iterate over the recorded entries. + pub fn iter(&self) -> Iter { + self.0.iter().pipe(Iter) + } +} + +/// [Item](Iterator::Item) of [`Iter`]. +#[derive(derive_more::Debug)] +#[debug(bound())] +#[debug("Item(..)")] +pub struct Item<'a, Size>(RefMulti<'a, InodeNumber, Value>); + +impl<'a, Size> Iterator for Iter<'a, Size> { + type Item = Item<'a, Size>; + fn next(&mut self) -> Option { + self.0.next().map(Item) + } +} + +impl<'a, Size> Item<'a, Size> { + /// The inode number of the file. + #[inline] + pub fn ino(&self) -> InodeNumber { + *self.0.key() + } + + /// Size of the file. + #[inline] + pub fn size(&self) -> &Size { + &self.0.value().size + } + + /// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted. + #[inline] + pub fn links(&self) -> u64 { + self.0.value().links + } + + /// Paths to the detected links of the file. + #[inline] + pub fn paths(&self) -> &LinkPathList { + &self.0.value().paths + } +} diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs new file mode 100644 index 00000000..c190041b --- /dev/null +++ b/src/hardlink/hardlink_list/reflection.rs @@ -0,0 +1,129 @@ +use super::{HardlinkList, Value}; +use crate::{hardlink::LinkPathListReflection, inode::InodeNumber}; +use dashmap::DashMap; +use derive_more::{Display, Error, Into, IntoIterator}; +use into_sorted::IntoSortedUnstable; +use pipe_trait::Pipe; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Intermediate format used for construction and inspection of [`HardlinkList`]'s +/// internal content. +/// +/// **Guarantees:** +/// * Every inode number is unique. +/// * The internal list is always sorted by inode numbers. +/// +/// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. +/// +/// **Serialization and deserialization:** _(feature: `json`)_ `Reflection` implements +/// `Serialize` and `Deserialize` traits, this allows functions in `serde_json` to convert +/// a `Reflection` into/from JSON. +#[derive(Debug, Clone, PartialEq, Eq, Into, IntoIterator)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct Reflection(Vec>); + +impl Reflection { + /// Get the number of entries inside the reflection. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Check whether the reflection has any entry. + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Iterate over the entries. + #[inline] + pub fn iter(&self) -> impl Iterator> + Clone { + self.0.iter() + } +} + +/// An entry in [`Reflection`]. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct ReflectionEntry { + /// The inode number of the file. + pub ino: InodeNumber, + /// Size of the file. + pub size: Size, + /// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted. + pub links: u64, + /// Paths to the detected links of the file. + pub paths: LinkPathListReflection, +} + +impl ReflectionEntry { + /// Create a new entry. + #[inline] + fn new(ino: InodeNumber, Value { size, links, paths }: Value) -> Self { + let paths = paths.into(); + ReflectionEntry { + ino, + size, + links, + paths, + } + } + + /// Dissolve [`ReflectionEntry`] into a pair of [`InodeNumber`] and [`Value`]. + #[inline] + fn dissolve(self) -> (InodeNumber, Value) { + let ReflectionEntry { + ino, + size, + links, + paths, + } = self; + let paths = paths.into(); + (ino, Value { size, links, paths }) + } +} + +impl From>> for Reflection { + /// Sort the list by inode numbers, then create the reflection. + fn from(list: Vec>) -> Self { + list.into_sorted_unstable_by_key(|entry| u64::from(entry.ino)) + .pipe(Reflection) + } +} + +impl From> for Reflection { + fn from(HardlinkList(list): HardlinkList) -> Self { + list.into_iter() + .map(|(ino, value)| ReflectionEntry::new(ino, value)) + .collect::>() + .pipe(Reflection::from) + } +} + +/// Error that occurs when an attempt to convert a [`Reflection`] into a +/// [`HardlinkList`] fails. +#[derive(Debug, Display, Error, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum ConversionError { + /// When the source has duplicated inode numbers. + #[display("Inode number {_0} is duplicated")] + DuplicatedInode(#[error(not(source))] InodeNumber), +} + +impl TryFrom> for HardlinkList { + type Error = ConversionError; + fn try_from(Reflection(entries): Reflection) -> Result { + let map = DashMap::with_capacity(entries.len()); + + for entry in entries { + let (ino, value) = entry.dissolve(); + if map.insert(ino, value).is_some() { + return ino.pipe(ConversionError::DuplicatedInode).pipe(Err); + } + } + + map.pipe(HardlinkList).pipe(Ok) + } +} diff --git a/src/hardlink/hardlink_list/summary.rs b/src/hardlink/hardlink_list/summary.rs new file mode 100644 index 00000000..ca5faa0a --- /dev/null +++ b/src/hardlink/hardlink_list/summary.rs @@ -0,0 +1,228 @@ +use super::{iter::Item as IterItem, reflection::ReflectionEntry, HardlinkList, Reflection}; +use crate::size; +use derive_more::{Add, AddAssign, Sum}; +use derive_setters::Setters; +use std::{ + cmp::Ordering, + fmt::{self, Display}, +}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Summary from [`HardlinkList`] or [`Reflection`]. +#[derive(Debug, Default, Setters, Clone, Copy, PartialEq, Eq, Add, AddAssign, Sum)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +#[setters(prefix = "with_")] +#[non_exhaustive] +pub struct Summary { + /// Number of shared inodes, each with more than 1 links (i.e. `nlink > 1`). + pub inodes: usize, + + /// Number of [shared inodes](Self::inodes) that don't have links outside the measured tree. + /// + /// This number is expected to be less than or equal to [`Self::inodes`]. + pub exclusive_inodes: usize, + + /// Totality of the numbers of links of all [shared inodes](Self::inodes). + pub all_links: u64, + + /// Total number of links of [shared inodes](Self::inodes) that were detected within the measured tree. + /// + /// This number is expected to be less than or equal to [`Self::all_links`]. + pub detected_links: usize, + + /// Total number of links of [shared inodes](Self::inodes) that don't have links outside the measured tree. + /// + /// This number is expected to be less than or equal to [`Self::detected_links`]. + pub exclusive_links: usize, + + /// Totality of the sizes of all [shared inodes](Self::inodes). + pub shared_size: Size, + + /// Totality of the sizes of all [shared inodes](Self::inodes) that don't have links outside the measured tree. + /// + /// This number is expected to be less than or equal to [`Self::all_links`]. + pub exclusive_shared_size: Size, +} + +/// Ability to summarize into a [`Summary`]. +pub trait SummarizeHardlinks: Sized { + /// The result of [`SummarizeHardlinks::summarize_hardlinks`]. + type Summary; + /// Summarize into a summary of shared links and size. + fn summarize_hardlinks(self) -> Self::Summary; +} + +/// Summary of a single unique file. +#[derive(Debug, Clone, Copy)] +pub struct SingleInodeSummary { + /// Total number of all links to the file. + links: u64, + /// Number of detected links to the file. + paths: usize, + /// Size of the file. + size: Size, +} + +impl SummarizeHardlinks for Iter +where + Size: size::Size, + Iter: IntoIterator, + Iter::Item: SummarizeHardlinks, + >::Summary: Into>, +{ + type Summary = Summary; + fn summarize_hardlinks(self) -> Self::Summary { + let mut summary = Summary::default(); + for item in self { + let SingleInodeSummary { links, paths, size } = item.summarize_hardlinks().into(); + summary.inodes += 1; + summary.all_links += links; + summary.detected_links += paths; + summary.shared_size += size; + match links.cmp(&(paths as u64)) { + Ordering::Greater => {} + Ordering::Equal => { + summary.exclusive_inodes += 1; + summary.exclusive_links += paths; // `links` and `paths` are both fine, but `paths` doesn't require type cast + summary.exclusive_shared_size += size; + } + Ordering::Less => { + panic!("Impossible! Total of nlink ({links}) is less than detected paths ({paths}). Something must have gone wrong!"); + } + } + } + summary + } +} + +impl HardlinkList { + /// Create summary for the shared links and size. + pub fn summarize(&self) -> Summary { + self.iter().summarize_hardlinks() + } +} + +impl SummarizeHardlinks for &HardlinkList { + type Summary = Summary; + fn summarize_hardlinks(self) -> Self::Summary { + self.summarize() + } +} + +impl Reflection { + /// Create summary for the shared links and size. + pub fn summarize(&self) -> Summary { + self.iter().summarize_hardlinks() + } +} + +impl SummarizeHardlinks for &Reflection { + type Summary = Summary; + fn summarize_hardlinks(self) -> Self::Summary { + self.summarize() + } +} + +/// Return type of [`Summary::display`] which implements [`Display`]. +#[derive(Debug, Clone, Copy)] +pub struct SummaryDisplay<'a, Size: size::Size> { + format: Size::DisplayFormat, + summary: &'a Summary, +} + +impl Display for SummaryDisplay<'_, Size> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let SummaryDisplay { format, summary } = self; + let Summary { + inodes, + exclusive_inodes, + all_links, + detected_links, + exclusive_links, + shared_size, + exclusive_shared_size, + } = summary; + + let shared_size = shared_size.display(*format); + let exclusive_shared_size = exclusive_shared_size.display(*format); + + macro_rules! ln { + ($($args:tt)*) => { + writeln!(f, $($args)*) + }; + } + + if inodes == &0 { + return ln!("There are no hardlinks."); + } + + write!(f, "Hardlinks detected! ")?; + if exclusive_inodes == inodes { + ln!("No files have links outside this tree")?; + ln!("* Number of shared inodes: {inodes}")?; + ln!("* Total number of links: {all_links}")?; + ln!("* Total shared size: {shared_size}")?; + } else if exclusive_inodes == &0 { + ln!("All hardlinks within this tree have links without")?; + ln!("* Number of shared inodes: {inodes}")?; + ln!("* Total number of links: {all_links} total, {detected_links} detected")?; + ln!("* Total shared size: {shared_size}")?; + } else { + ln!("Some files have links outside this tree")?; + ln!("* Number of shared inodes: {inodes} total, {exclusive_inodes} exclusive")?; + ln!("* Total number of links: {all_links} total, {detected_links} detected, {exclusive_links} exclusive")?; + ln!("* Total shared size: {shared_size} total, {exclusive_shared_size} exclusive")?; + } + + Ok(()) + } +} + +impl Summary { + /// Turns this [`Summary`] into something [displayable](Display). + #[inline] + pub fn display(&self, format: Size::DisplayFormat) -> SummaryDisplay { + SummaryDisplay { + format, + summary: self, + } + } +} + +impl SummarizeHardlinks for ReflectionEntry { + type Summary = SingleInodeSummary; + fn summarize_hardlinks(self) -> Self::Summary { + (&self).summarize_hardlinks() + } +} + +impl SummarizeHardlinks for &ReflectionEntry { + type Summary = SingleInodeSummary; + fn summarize_hardlinks(self) -> Self::Summary { + SingleInodeSummary { + links: self.links, + paths: self.paths.len(), + size: self.size, + } + } +} + +impl<'a, Size: Copy> SummarizeHardlinks for IterItem<'a, Size> { + type Summary = SingleInodeSummary; + fn summarize_hardlinks(self) -> Self::Summary { + (&self).summarize_hardlinks() + } +} + +impl<'a, Size: Copy> SummarizeHardlinks for &IterItem<'a, Size> { + type Summary = SingleInodeSummary; + fn summarize_hardlinks(self) -> Self::Summary { + SingleInodeSummary { + links: self.links(), + paths: self.paths().len(), + size: *self.size(), + } + } +} diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs new file mode 100644 index 00000000..8e6878d2 --- /dev/null +++ b/src/hardlink/hardlink_list/test.rs @@ -0,0 +1,149 @@ +use super::{AddError, HardlinkList, NumberOfLinksConflictError, SizeConflictError}; +use crate::size::Bytes; +use pipe_trait::Pipe; +use pretty_assertions::{assert_eq, assert_ne}; + +const TABLE: &[(u64, u64, u64, &str)] = &[ + (241, 3652, 1, "a"), + (569, 2210, 1, "b"), + (110, 2350, 3, "c"), + (110, 2350, 3, "c1"), + (778, 1110, 1, "d"), + (274, 6060, 2, "e"), + (274, 6060, 2, "e1"), + (883, 4530, 1, "f"), +]; + +fn add(list: HardlinkList) -> HardlinkList { + let values = TABLE[ROW]; + let (ino, size, links, path) = values; + if let Err(error) = list.add(ino.into(), size.into(), links, path.as_ref()) { + panic!("Failed to add {values:?} (index: {ROW}) to the list: {error}"); + } + list +} + +#[test] +fn insertion_order_is_irrelevant_to_equality() { + let a = HardlinkList::new() + .pipe(add::<3>) + .pipe(add::<1>) + .pipe(add::<4>) + .pipe(add::<6>) + .pipe(add::<5>) + .pipe(add::<0>) + .pipe(add::<7>) + .pipe(add::<2>) + .into_reflection(); + + let b = HardlinkList::new() + .pipe(add::<5>) + .pipe(add::<6>) + .pipe(add::<2>) + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<3>) + .pipe(add::<7>) + .pipe(add::<4>) + .into_reflection(); + + let c = HardlinkList::new() + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<2>) + .pipe(add::<3>) + .pipe(add::<4>) + .pipe(add::<5>) + .pipe(add::<6>) + .pipe(add::<7>) + .into_reflection(); + + assert_eq!(a, b); + assert_eq!(b, c); + assert_eq!(a, c); +} + +#[test] +fn omitting_insertion_cause_inequality() { + let a = HardlinkList::new() + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<2>) + .pipe(add::<3>) + .pipe(add::<4>) + .pipe(add::<5>) + .pipe(add::<6>) + .pipe(add::<7>) + .into_reflection(); + + let b = HardlinkList::new() + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<2>) + .pipe(add::<3>) + .pipe(add::<4>) + .pipe(add::<5>) + .pipe(add::<7>) + .into_reflection(); + + assert_ne!(a, b); + assert_ne!(b, a); +} + +#[test] +fn insertion_difference_cause_inequality() { + let a = HardlinkList::new() + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<2>) + .pipe(add::<3>) + .pipe(add::<4>) + .pipe(add::<5>) + .pipe(add::<6>) + .into_reflection(); + + let b = HardlinkList::new() + .pipe(add::<0>) + .pipe(add::<1>) + .pipe(add::<2>) + .pipe(add::<3>) + .pipe(add::<4>) + .pipe(add::<5>) + .pipe(add::<7>) + .into_reflection(); + + assert_ne!(a, b); + assert_ne!(b, a); +} + +#[test] +fn detect_size_change() { + let list = HardlinkList::::new(); + list.add(123.into(), 100.into(), 1, "a".as_ref()) + .expect("add the first path"); + let actual = list + .add(123.into(), 110.into(), 1, "b".as_ref()) + .expect_err("add the second path"); + let expected = AddError::SizeConflict(SizeConflictError { + ino: 123.into(), + recorded: 100.into(), + detected: 110.into(), + }); + assert_eq!(actual, expected); +} + +#[test] +fn detect_number_of_links_change() { + let list = HardlinkList::::new(); + list.add(123.into(), 100.into(), 1, "a".as_ref()) + .expect("add the first path"); + let actual = list + .add(123.into(), 100.into(), 2, "b".as_ref()) + .expect_err("add the second path"); + let expected = AddError::NumberOfLinksConflict(NumberOfLinksConflictError { + ino: 123.into(), + recorded: 1, + detected: 2, + }); + assert_eq!(actual, expected); +} diff --git a/src/hardlink/ignorant.rs b/src/hardlink/ignorant.rs new file mode 100644 index 00000000..bf9acaeb --- /dev/null +++ b/src/hardlink/ignorant.rs @@ -0,0 +1,46 @@ +use super::{DeduplicateSharedSize, RecordHardlinks, RecordHardlinksArgument}; +use crate::{data_tree::DataTree, os_string_display::OsStringDisplay, size}; +use std::convert::Infallible; + +/// Be ignorant of hardlinks. Treat them as real files. +/// Do not detect it. Do not deduplicate it. +/// Essentially no-op. +#[derive(Debug, Default, Clone, Copy)] +pub struct Ignorant; + +pub use Ignorant as HardlinkIgnorant; + +/// Do nothing to detect nor record any hardlink. +impl RecordHardlinks for Ignorant { + /// Doing nothing cannot fail. + type Error = Infallible; + + /// Do nothing. + #[inline] + fn record_hardlinks( + &self, + _: RecordHardlinksArgument, + ) -> Result<(), Self::Error> { + Ok(()) + } +} + +/// Do nothing to deduplicate the sizes of hardlinks. +impl DeduplicateSharedSize for HardlinkIgnorant +where + Size: size::Size + Sync, +{ + /// Return nothing. + type Report = (); + /// Doing nothing cannot fail. + type Error = Infallible; + + /// Do nothing. + #[inline] + fn deduplicate( + self, + _: &mut DataTree, + ) -> Result { + Ok(()) + } +} diff --git a/src/hardlink/link_path_list.rs b/src/hardlink/link_path_list.rs new file mode 100644 index 00000000..e13b1d44 --- /dev/null +++ b/src/hardlink/link_path_list.rs @@ -0,0 +1,63 @@ +mod iter; +mod reflection; + +pub use iter::Iter; +pub use reflection::Reflection; + +pub use Reflection as LinkPathListReflection; + +use std::path::PathBuf; + +/// List of different hardlinks to the same file. +/// +/// **Reflection:** `LinkPathList` does not implement `PartialEq`, `Eq`, +/// `Deserialize`, and `Serialize` directly. Instead, it can be converted into a +/// [`Reflection`] which implement these traits. Do note that the time complexity +/// of such conversion is O(n) as it has to convert a `Vec` into a `HashSet`. +#[derive(Debug, Clone)] +pub struct LinkPathList(Vec); + +impl LinkPathList { + /// Create a list of a single path. + #[cfg(any(unix, test))] + #[inline] + pub(crate) fn single(path: PathBuf) -> Self { + LinkPathList(vec![path]) + } + + /// Create a list of many paths. + #[cfg(test)] + pub(crate) fn many(paths: impl IntoIterator>) -> Self { + let paths: Vec<_> = paths.into_iter().map(Into::into).collect(); + assert!(!paths.is_empty(), "paths must not be empty"); + LinkPathList(paths) + } + + /// Add a path to the list. + #[cfg(any(unix, test))] + #[inline] + pub(crate) fn add(&mut self, path: PathBuf) { + self.0.push(path) + } + + /// Get the number of paths inside the list. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Check whether the list is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Create reflection. + #[inline] + pub fn into_reflection(self) -> Reflection { + self.into() + } +} + +#[cfg(test)] +mod test; diff --git a/src/hardlink/link_path_list/iter.rs b/src/hardlink/link_path_list/iter.rs new file mode 100644 index 00000000..1e9b5f10 --- /dev/null +++ b/src/hardlink/link_path_list/iter.rs @@ -0,0 +1,64 @@ +use super::LinkPathList; +use pipe_trait::Pipe; +use std::{iter::FusedIterator, path::PathBuf, slice}; + +/// [Iterator] over the paths inside a [`LinkPathList`]. +#[derive(Debug, Clone)] +pub struct Iter<'a>(slice::Iter<'a, PathBuf>); + +impl LinkPathList { + /// Iterate over the paths inside the list. + pub fn iter(&self) -> Iter { + self.0.iter().pipe(Iter) + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = &'a PathBuf; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + self.0.nth(n) + } + + #[inline] + fn last(self) -> Option { + self.0.last() + } +} + +impl<'a> DoubleEndedIterator for Iter<'a> { + #[inline] + fn next_back(&mut self) -> Option { + self.0.next_back() + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + self.0.nth_back(n) + } +} + +impl<'a> ExactSizeIterator for Iter<'a> { + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + +impl FusedIterator for Iter<'_> {} diff --git a/src/hardlink/link_path_list/reflection.rs b/src/hardlink/link_path_list/reflection.rs new file mode 100644 index 00000000..f9ac42ed --- /dev/null +++ b/src/hardlink/link_path_list/reflection.rs @@ -0,0 +1,51 @@ +use super::LinkPathList; +use derive_more::{From, Into, IntoIterator}; +use pipe_trait::Pipe; +use std::{collections::HashSet, path::PathBuf}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Intermediate format used for construction and inspection of [`LinkPathList`]'s +/// internal content. +/// +/// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. +/// +/// **Serialization and deserialization:** _(feature: `json`)_ `Reflection` implements +/// `Serialize` and `Deserialize` traits, this allows functions in `serde_json` to convert +/// a `Reflection` into/from JSON. +#[derive(Debug, Default, Clone, PartialEq, Eq, From, Into, IntoIterator)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct Reflection(pub HashSet); + +impl Reflection { + /// Create an empty reflection. + #[inline] + pub fn new() -> Self { + Reflection::default() + } + + /// Get the number of paths in the reflection. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Check whether the reflection has any path. + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl From for Reflection { + fn from(value: LinkPathList) -> Self { + value.0.into_iter().collect::>().pipe(Reflection) + } +} + +impl From for LinkPathList { + fn from(value: Reflection) -> Self { + value.0.into_iter().collect::>().pipe(LinkPathList) + } +} diff --git a/src/hardlink/link_path_list/test.rs b/src/hardlink/link_path_list/test.rs new file mode 100644 index 00000000..5bf35431 --- /dev/null +++ b/src/hardlink/link_path_list/test.rs @@ -0,0 +1,43 @@ +use super::LinkPathList; +use pipe_trait::Pipe; +use pretty_assertions::{assert_eq, assert_ne}; + +#[test] +fn item_order_is_irrelevant_to_equality() { + let a = ["3", "4", "0", "2", "1"] + .pipe(LinkPathList::many) + .into_reflection(); + let b = ["4", "0", "3", "2", "1"] + .pipe(LinkPathList::many) + .into_reflection(); + let c = ["0", "1", "2", "3", "4"] + .pipe(LinkPathList::many) + .into_reflection(); + assert_eq!(a, b); + assert_eq!(b, c); + assert_eq!(a, c); +} + +#[test] +fn item_absent_cause_inequality() { + let a = ["0", "1", "2", "3"] + .pipe(LinkPathList::many) + .into_reflection(); + let b = ["0", "1", "2", "3", "4"] + .pipe(LinkPathList::many) + .into_reflection(); + assert_ne!(a, b); + assert_ne!(b, a); +} + +#[test] +fn item_difference_cause_inequality() { + let a = ["0", "1", "2", "3", "5"] + .pipe(LinkPathList::many) + .into_reflection(); + let b = ["0", "1", "2", "3", "4"] + .pipe(LinkPathList::many) + .into_reflection(); + assert_ne!(a, b); + assert_ne!(b, a); +} diff --git a/src/hardlink/record.rs b/src/hardlink/record.rs new file mode 100644 index 00000000..074c7834 --- /dev/null +++ b/src/hardlink/record.rs @@ -0,0 +1,43 @@ +use std::{fs::Metadata, path::Path}; + +/// Argument to pass to [`RecordHardlinks::record_hardlinks`]. +#[derive(Debug, Clone, Copy)] +pub struct Argument<'a, Size, Report: ?Sized> { + pub path: &'a Path, + pub stats: &'a Metadata, + pub size: Size, + pub reporter: &'a Report, +} + +pub use Argument as RecordHardlinksArgument; + +impl<'a, Size, Report: ?Sized> Argument<'a, Size, Report> { + #[inline] + pub(crate) fn new( + path: &'a Path, + stats: &'a Metadata, + size: Size, + reporter: &'a Report, + ) -> Self { + Argument { + path, + stats, + size, + reporter, + } + } +} + +/// Ability to detect and record hardlinks. +pub trait RecordHardlinks { + /// Error when [`RecordHardlinks::record_hardlinks`] fails. + type Error; + /// Perform hardlinks detection and recording. + fn record_hardlinks(&self, argument: Argument) -> Result<(), Self::Error>; +} + +/// Do detect and record hardlinks. +#[cfg(unix)] +pub type Do = super::HardlinkAware; +/// Do not detect nor record hardlinks. +pub type DoNot = super::HardlinkIgnorant; diff --git a/src/inode.rs b/src/inode.rs new file mode 100644 index 00000000..2305dfb8 --- /dev/null +++ b/src/inode.rs @@ -0,0 +1,23 @@ +use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// The inode number of a file or directory. +#[derive( + Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into, +)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct InodeNumber(u64); + +/// POSIX-exclusive functions. +#[cfg(unix)] +impl InodeNumber { + /// Get inode number of a [`std::fs::Metadata`]. + #[inline] + pub fn get(stats: &std::fs::Metadata) -> Self { + use pipe_trait::Pipe; + use std::os::unix::fs::MetadataExt; + stats.ino().pipe(InodeNumber) + } +} diff --git a/src/json_data.rs b/src/json_data.rs index b5296973..eed2b094 100644 --- a/src/json_data.rs +++ b/src/json_data.rs @@ -5,24 +5,84 @@ pub use binary_version::BinaryVersion; pub use schema_version::SchemaVersion; use crate::{ - data_tree::Reflection, - size::{Blocks, Bytes}, + data_tree::DataTreeReflection, + hardlink::{HardlinkListReflection, SharedLinkSummary}, + size::{self, Blocks, Bytes}, }; -use derive_more::{From, TryInto}; +use derive_more::{Deref, DerefMut, From, TryInto}; +use smart_default::SmartDefault; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -/// The `"unit"` field and the `"tree"` field of [`JsonData`]. +/// The `"shared"` field of [`JsonData`]. +#[derive(Debug, SmartDefault, Clone)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +#[cfg_attr(feature = "json", serde(rename_all = "kebab-case"))] +pub struct JsonShared { + /// Detailed list of all detected hardlinks. + #[cfg_attr( + feature = "json", + serde(skip_serializing_if = "JsonShared::skip_details") + )] + pub details: Option>, + /// Summary of all detected hardlinks. + #[cfg_attr( + feature = "json", + serde(skip_serializing_if = "JsonShared::skip_summary") + )] + pub summary: Option>, +} + +#[cfg(feature = "json")] +impl JsonShared { + /// Decide whether to skip serializing [`JsonShared::details`]. + fn skip_details(details: &Option>) -> bool { + details + .as_ref() + .is_none_or(|reflection| reflection.is_empty()) + } + + /// Decide whether to skip serializing [`JsonShared::summary`]. + fn skip_summary(summary: &Option>) -> bool { + summary + .as_ref() + .is_none_or(|summary| summary == &SharedLinkSummary::default()) + } + + /// Decide whether to skip serializing. + fn skip(&self) -> bool { + JsonShared::skip_details(&self.details) && JsonShared::skip_summary(&self.summary) + } +} + +/// The `"tree"` field and the `"shared"` field of [`JsonData`]. +#[derive(Debug, Clone, Deref, DerefMut)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +#[cfg_attr(feature = "json", serde(rename_all = "kebab-case"))] +pub struct JsonTree { + /// The main data of the tree. + #[deref] + #[deref_mut] + pub tree: DataTreeReflection, + /// Optional list of shared inodes, their sizes, and their many links. + #[cfg_attr( + feature = "json", + serde(default, skip_serializing_if = "JsonShared::skip") + )] + pub shared: JsonShared, +} + +/// The `"unit"` field, the `"tree"` field, and the `"shared-inodes"` field of [`JsonData`]. #[derive(Debug, Clone, From, TryInto)] #[cfg_attr(feature = "json", derive(Deserialize, Serialize))] -#[cfg_attr(feature = "json", serde(tag = "unit", content = "tree"))] +#[cfg_attr(feature = "json", serde(tag = "unit"))] #[cfg_attr(feature = "json", serde(rename_all = "kebab-case"))] -pub enum UnitAndTree { +pub enum JsonDataBody { /// Tree where size is [bytes](Bytes). - Bytes(Reflection), + Bytes(JsonTree), /// Tree where size is [blocks](Blocks). - Blocks(Reflection), + Blocks(JsonTree), } /// Output of the program with `--json-output` flag as well as @@ -36,7 +96,7 @@ pub struct JsonData { /// The `"pdu"` field. #[cfg_attr(feature = "json", serde(rename = "pdu"))] pub binary_version: Option, - /// The `"unit"` field and the `"tree"` field. + /// The `"unit"` field, the `"tree"` field, and the `"shared"` field. #[cfg_attr(feature = "json", serde(flatten))] - pub unit_and_tree: UnitAndTree, + pub body: JsonDataBody, } diff --git a/src/json_data/binary_version.rs b/src/json_data/binary_version.rs index 997f8394..6b520dea 100644 --- a/src/json_data/binary_version.rs +++ b/src/json_data/binary_version.rs @@ -13,6 +13,7 @@ pub struct BinaryVersion(String); impl BinaryVersion { /// Get version of the current `pdu` program as a `BinaryVersion`. + #[inline] pub fn current() -> Self { CURRENT_VERSION.to_string().into() } diff --git a/src/json_data/schema_version.rs b/src/json_data/schema_version.rs index ca7db95d..a392f1d6 100644 --- a/src/json_data/schema_version.rs +++ b/src/json_data/schema_version.rs @@ -35,7 +35,7 @@ impl TryFrom for SchemaVersion { } } -impl<'a> From for &'a str { +impl From for &str { fn from(_: SchemaVersion) -> Self { SCHEMA_VERSION } diff --git a/src/lib.rs b/src/lib.rs index 75ddf647..f032fa4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,8 @@ +//! The library crate for `pdu`. +//! +//! The things you are most interested in are likely [`fs_tree_builder::FsTreeBuilder`], +//! [`tree_builder::TreeBuilder`], [`data_tree::DataTree`], or [`visualizer::Visualizer`]. + #![deny(warnings)] #[cfg(feature = "json")] @@ -17,7 +22,7 @@ pub mod runtime_error; pub fn main() -> std::process::ExitCode { if let Err(error) = app::App::from_env().run() { eprintln!("[error] {error}"); - return std::process::ExitCode::FAILURE; + return error.code(); } std::process::ExitCode::SUCCESS } @@ -33,6 +38,8 @@ pub mod bytes_format; pub mod data_tree; pub mod fs_tree_builder; pub mod get_size; +pub mod hardlink; +pub mod inode; pub mod json_data; pub mod os_string_display; pub mod reporter; diff --git a/src/reporter.rs b/src/reporter.rs index 2ff37337..b1663fe4 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -29,7 +29,7 @@ pub trait ParallelReporter: Reporter { impl Reporter for &Target where Size: size::Size, - Target: Reporter, + Target: Reporter + ?Sized, { fn report(&self, event: Event) { Target::report(*self, event) diff --git a/src/reporter/error_only_reporter.rs b/src/reporter/error_only_reporter.rs index 0bd8e7d3..78273c39 100644 --- a/src/reporter/error_only_reporter.rs +++ b/src/reporter/error_only_reporter.rs @@ -1,5 +1,6 @@ use super::{ErrorReport, Event, ParallelReporter, Reporter}; use crate::size; +use std::convert::Infallible; /// Only report errors. #[derive(Debug)] @@ -33,7 +34,7 @@ where Size: size::Size, ReportError: Fn(ErrorReport), { - type DestructionError = (); // TODO: change this to `!` once it is stable. + type DestructionError = Infallible; // TODO: change this to `!` once it is stable. fn destroy(self) -> Result<(), Self::DestructionError> { Ok(()) } diff --git a/src/reporter/event.rs b/src/reporter/event.rs index 379b2211..8643ab81 100644 --- a/src/reporter/event.rs +++ b/src/reporter/event.rs @@ -1,9 +1,25 @@ use super::ErrorReport; use crate::size; +use std::{fs::Metadata, path::Path}; /// Report trigger event. #[derive(Debug)] +#[non_exhaustive] pub enum Event<'a, Size: size::Size> { ReceiveData(Size), EncounterError(ErrorReport<'a>), + DetectHardlink(HardlinkDetection<'a, Size>), +} + +/// Data of [`Event::DetectHardlink`]. +#[derive(Debug, Clone, Copy)] +pub struct HardlinkDetection<'a, Size: size::Size> { + /// Path of the detected hardlink. + pub path: &'a Path, + /// Stats of the detected hardlink. + pub stats: &'a Metadata, + /// Size of the file. + pub size: Size, + /// Number of links, including this one. + pub links: u64, } diff --git a/src/reporter/progress_and_error_reporter.rs b/src/reporter/progress_and_error_reporter.rs index 7271bb5a..4741b64d 100644 --- a/src/reporter/progress_and_error_reporter.rs +++ b/src/reporter/progress_and_error_reporter.rs @@ -98,6 +98,10 @@ where report_error(error_report); bump!(errors += 1); } + DetectHardlink(info) => { + bump!(linked += info.links); + bump!(shared += info.size.into()); + } } } } diff --git a/src/reporter/progress_and_error_reporter/progress_report_state.rs b/src/reporter/progress_and_error_reporter/progress_report_state.rs index 2c4b9659..e832421d 100644 --- a/src/reporter/progress_and_error_reporter/progress_report_state.rs +++ b/src/reporter/progress_and_error_reporter/progress_report_state.rs @@ -15,6 +15,10 @@ pub struct ProgressReportState { pub total: AtomicU64, /// Number of occurred errors. pub errors: AtomicU64, + /// Total number of detected hardlinks. + pub linked: AtomicU64, + /// Total size of detected hardlinks. + pub shared: AtomicU64, } impl ProgressReportState { @@ -38,10 +42,14 @@ impl ProgressReportState { let items = load!(items); let total = load!(total).into(); let errors = load!(errors); + let linked = load!(linked); + let shared = load!(shared).into(); ControlFlow::Continue(ProgressReport { items, total, errors, + linked, + shared, }) } } diff --git a/src/reporter/progress_report.rs b/src/reporter/progress_report.rs index e0466f8f..6aa690b4 100644 --- a/src/reporter/progress_report.rs +++ b/src/reporter/progress_report.rs @@ -1,8 +1,10 @@ use crate::{size, status_board::GLOBAL_STATUS_BOARD}; +use derive_setters::Setters; use std::fmt::Write; /// Scan progress. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Default, Setters, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[setters(prefix = "with_")] pub struct ProgressReport { /// Number of scanned items. pub items: u64, @@ -10,28 +12,64 @@ pub struct ProgressReport { pub total: Size, /// Number of occurred errors. pub errors: u64, + /// Total number of detected hardlinks. + pub linked: u64, + /// Total size of detected hardlinks. + pub shared: Size, } impl> ProgressReport { - /// Print progress to stderr. - pub const TEXT: fn(Self) = |report| { + /// Maximum length by which the progress text may extend. + /// + /// This constant is used as capacity in [`Self::TEXT`] to prevent + /// performance penalty from string resizing. + /// + /// The value of this constant is made correct by a unit test. + const TEXT_MAX_LEN: usize = 145; + + /// Create a text to be used in [`Self::TEXT`]. + fn text(self) -> String { let ProgressReport { items, total, errors, - } = report; - let mut text = String::new(); - write!( - text, - "\r(scanned {items}, total {total}", - items = items, - total = total.into(), - ) - .unwrap(); + linked, + shared, + } = self; + let mut text = String::with_capacity(Self::TEXT_MAX_LEN); + let total: u64 = total.into(); + write!(text, "\r(scanned {items}, total {total}").unwrap(); + if linked != 0 { + write!(text, ", linked {linked}").unwrap(); + } + let shared: u64 = shared.into(); + if shared != 0 { + write!(text, ", shared {shared}").unwrap(); + } if errors != 0 { write!(text, ", erred {errors}").unwrap(); } - write!(text, ")").unwrap(); - GLOBAL_STATUS_BOARD.temporary_message(&text); + text.push(')'); + text + } + + /// Print progress to stderr. + pub const TEXT: fn(Self) = |report| { + GLOBAL_STATUS_BOARD.temporary_message(&report.text()); }; } + +#[test] +fn text_max_len() { + use crate::size::Bytes; + let correct_value = ProgressReport:: { + items: u64::MAX, + total: u64::MAX.into(), + errors: u64::MAX, + linked: u64::MAX, + shared: u64::MAX.into(), + } + .text() + .len(); + assert_eq!(ProgressReport::::TEXT_MAX_LEN, correct_value); +} diff --git a/src/runtime_error.rs b/src/runtime_error.rs index e01f6a75..dba94c00 100644 --- a/src/runtime_error.rs +++ b/src/runtime_error.rs @@ -1,4 +1,5 @@ use derive_more::{Display, Error}; +use std::{convert::Infallible, process::ExitCode}; /// Error caused by the CLI program. #[derive(Debug, Display, Error)] @@ -12,10 +13,42 @@ pub enum RuntimeError { /// [DataTreeReflection](crate::data_tree::Reflection) from stdin. #[display("DeserializationFailure: {_0}")] DeserializationFailure(serde_json::Error), - /// When both `--json-input` and file names are both specified. + /// When `--json-input` and file names are both specified. #[display("JsonInputArgConflict: Arguments exist alongside --json-input")] JsonInputArgConflict, /// When input JSON data is not a valid tree. #[display("InvalidInputReflection: {_0}")] InvalidInputReflection(#[error(not(source))] String), + /// When the user attempts to use unavailable platform-specific features. + #[display("UnsupportedFeature: {_0}")] + UnsupportedFeature(UnsupportedFeature), +} + +/// Error caused by the user attempting to use unavailable platform-specific features. +#[derive(Debug, Display, Error)] +#[non_exhaustive] +pub enum UnsupportedFeature { + /// Using `--deduplicate-hardlinks` on non-POSIX. + #[cfg(not(unix))] + #[display("Feature --deduplicate-hardlinks is not available on this platform")] + DeduplicateHardlink, +} + +impl From for RuntimeError { + fn from(value: Infallible) -> Self { + match value {} + } +} + +impl RuntimeError { + /// Convert error into exit code. + pub fn code(&self) -> ExitCode { + ExitCode::from(match self { + RuntimeError::SerializationFailure(_) => 2, + RuntimeError::DeserializationFailure(_) => 3, + RuntimeError::JsonInputArgConflict => 4, + RuntimeError::InvalidInputReflection(_) => 5, + RuntimeError::UnsupportedFeature(_) => 6, + }) + } } diff --git a/src/size.rs b/src/size.rs index d23a9504..34224b8c 100644 --- a/src/size.rs +++ b/src/size.rs @@ -1,13 +1,21 @@ use super::bytes_format::{self, BytesFormat}; -use derive_more::{Add, AddAssign, From, Into, Sum}; +use derive_more::{Add, AddAssign, From, Into, Sub, SubAssign, Sum}; use std::{ fmt::{Debug, Display}, - ops::{Mul, MulAssign}, + iter::Sum, + ops::{Add, AddAssign, Mul, MulAssign, Sub, SubAssign}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +mod mul_traits { + use std::ops::{Mul, MulAssign}; + pub trait MulAssignEx: Mul + MulAssign + Sized {} + impl + MulAssign, Rhs> MulAssignEx for Lhs {} +} +use mul_traits::MulAssignEx; + /// Types whose values can be used as disk usage statistic. pub trait Size: Debug @@ -20,7 +28,14 @@ pub trait Size: + Ord + Add + AddAssign + + Sub + + SubAssign + Sum + + MulAssignEx + + MulAssignEx + + MulAssignEx + + MulAssignEx + + MulAssignEx { /// Underlying type type Inner: From + Into + Mul; @@ -32,6 +47,36 @@ pub trait Size: fn display(self, input: Self::DisplayFormat) -> Self::DisplayOutput; } +macro_rules! impl_mul { + ($name:ident: $inner:ident *= $($num_type:ident)+) => { + $( + impl Mul<$num_type> for $name { + type Output = Self; + fn mul(self, rhs: $num_type) -> Self::Output { + self.0.mul(rhs as $inner).into() + } + } + + impl Mul<$name> for $num_type { + type Output = $name; + fn mul(self, rhs: $name) -> Self::Output { + rhs * self + } + } + + impl MulAssign<$num_type> for $name { + fn mul_assign(&mut self, rhs: $num_type) { + self.0 *= rhs as $inner; + } + } + )+ + }; + + ($name:ident: u64) => { + impl_mul!($name: u64 *= usize u8 u16 u32 u64); + }; +} + macro_rules! newtype { ( $(#[$attribute:meta])* @@ -39,7 +84,7 @@ macro_rules! newtype { display: ($display_format:ty) -> $display_output:ty = $display_impl:expr; ) => { #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] - #[derive(From, Into, Add, AddAssign, Sum)] + #[derive(From, Into, Add, AddAssign, Sub, SubAssign, Sum)] #[cfg_attr(feature = "json", derive(Deserialize, Serialize))] $(#[$attribute])* pub struct $name($inner); @@ -58,31 +103,14 @@ macro_rules! newtype { type Inner = $inner; type DisplayFormat = $display_format; type DisplayOutput = $display_output; + #[inline] fn display(self, format: Self::DisplayFormat) -> Self::DisplayOutput { let display: fn(Self, Self::DisplayFormat) -> Self::DisplayOutput = $display_impl; display(self, format) } } - impl Mul<$inner> for $name { - type Output = Self; - fn mul(self, rhs: $inner) -> Self::Output { - self.0.mul(rhs).into() - } - } - - impl Mul<$name> for $inner { - type Output = $name; - fn mul(self, rhs: $name) -> Self::Output { - rhs * self - } - } - - impl MulAssign<$inner> for $name { - fn mul_assign(&mut self, rhs: $inner) { - self.0 *= rhs; - } - } + impl_mul!($name: u64); }; } diff --git a/src/status_board.rs b/src/status_board.rs index 110a578e..a46657e3 100644 --- a/src/status_board.rs +++ b/src/status_board.rs @@ -13,6 +13,7 @@ pub struct StatusBoard { impl StatusBoard { /// Create a new [`StatusBoard`]. + #[inline] const fn new() -> Self { StatusBoard { line_width: AtomicUsize::new(0), @@ -20,11 +21,13 @@ impl StatusBoard { } /// Get the number of characters of the current line. + #[inline] fn get_line_width(&self) -> usize { self.line_width.load(Ordering::Relaxed) } /// Set the number of characters of the current line. + #[inline] fn set_line_width(&self, value: usize) { self.line_width.store(value, Ordering::Relaxed); } diff --git a/src/tree_builder.rs b/src/tree_builder.rs index 0f94a974..c6e1494d 100644 --- a/src/tree_builder.rs +++ b/src/tree_builder.rs @@ -23,6 +23,8 @@ where pub get_info: GetInfo, /// Function to join parent's `path` with a child's name to make the child's `name`. pub join_path: JoinPath, + /// Deepest level of descendent to store as arrays. The sizes beyond the max depth still count toward total. + pub max_depth: u64, } impl From> @@ -41,21 +43,28 @@ where name, get_info, join_path, + max_depth, } = builder; let Info { size, children } = get_info(&path); + let max_depth = max_depth.saturating_sub(1); - let children: Vec<_> = children + let children = children .into_par_iter() .map(|name| TreeBuilder { path: join_path(&path, &name), name, get_info, join_path, + max_depth, }) - .map(Self::from) - .collect(); + .map(Self::from); - DataTree::dir(name, size, children) + if max_depth > 0 { + DataTree::dir(name, size, children.collect()) + } else { + let size = size + children.map(|child| child.size()).sum(); + DataTree::dir(name, size, Vec::new()) + } } } diff --git a/src/visualizer.rs b/src/visualizer.rs index 9db33950..71effaaf 100644 --- a/src/visualizer.rs +++ b/src/visualizer.rs @@ -15,7 +15,7 @@ pub use proportion_bar::{ProportionBar, ProportionBarBlock}; pub use tree::{TreeHorizontalSlice, TreeSkeletalComponent}; use super::{data_tree::DataTree, size}; -use std::{fmt::Display, num::NonZeroUsize}; +use std::fmt::Display; /// Visualize a [`DataTree`]. /// @@ -38,7 +38,6 @@ use std::{fmt::Display, num::NonZeroUsize}; /// direction: Direction::BottomUp, /// bar_alignment: BarAlignment::Right, /// column_width_distribution: ColumnWidthDistribution::total(100), -/// max_depth: std::num::NonZeroUsize::new(10).unwrap(), /// }; /// println!("{visualizer}"); /// # } @@ -59,8 +58,6 @@ where pub bar_alignment: BarAlignment, /// Distribution and total number of characters/blocks can be placed in a line. pub column_width_distribution: ColumnWidthDistribution, - /// Maximum number of levels that should be visualized. - pub max_depth: NonZeroUsize, } mod copy; diff --git a/src/visualizer/direction.rs b/src/visualizer/direction.rs index 2dd3dc11..03215d1a 100644 --- a/src/visualizer/direction.rs +++ b/src/visualizer/direction.rs @@ -9,6 +9,7 @@ pub enum Direction { impl Direction { #[cfg(feature = "cli")] + #[inline] pub(crate) const fn from_top_down(top_down: bool) -> Self { if top_down { Direction::TopDown diff --git a/src/visualizer/methods/initial_table.rs b/src/visualizer/methods/initial_table.rs index 0a70482b..b02e0920 100644 --- a/src/visualizer/methods/initial_table.rs +++ b/src/visualizer/methods/initial_table.rs @@ -46,7 +46,6 @@ where { #[derive(Clone)] struct Param { - remaining_depth: usize, index_as_child: usize, ancestors: Vec>, preceding_sibling: Option>, @@ -69,11 +68,7 @@ where Size: size::Size, Act: FnMut(&'a DataTree, Param<&'a Name, Size>) -> ActResult<&'a Name, Size>, { - if param.remaining_depth == 0 { - return None; - } let ActResult { node_info } = act(tree, param.clone()); - let remaining_depth = param.remaining_depth - 1; let mut preceding_sibling = None; for (index_as_child, child) in tree.children().iter().enumerate() { let mut ancestors = Vec::with_capacity(param.ancestors.len() + 1); @@ -83,7 +78,6 @@ where child, act, Param { - remaining_depth, index_as_child, ancestors, preceding_sibling, @@ -103,18 +97,12 @@ where let Param { index_as_child, ancestors, - remaining_depth, preceding_sibling, } = param; let name = node.name(); let node_data = node.size(); let row_index = initial_table.len(); - debug_assert_op!(remaining_depth > 0); - let children_count = if remaining_depth != 1 { - node.children().len() - } else { - 0 - }; + let children_count = node.children().len(); let fs_size = node.size().into(); let percentage = if total_fs_size == 0 { "0%".to_string() @@ -134,7 +122,6 @@ where sibling_count, index_as_child, children_count, - remaining_depth, }; initial_table.column_width.size_column_width = @@ -151,9 +138,8 @@ where ActResult { node_info } }, Param { - remaining_depth: visualizer.max_depth.get(), index_as_child: 0, - ancestors: Vec::with_capacity(0), + ancestors: Vec::new(), preceding_sibling: None, }, ); diff --git a/src/visualizer/methods/node_info.rs b/src/visualizer/methods/node_info.rs index 060bc77e..fe67cdd5 100644 --- a/src/visualizer/methods/node_info.rs +++ b/src/visualizer/methods/node_info.rs @@ -8,5 +8,4 @@ pub struct NodeInfo { pub sibling_count: NonZeroUsize, pub index_as_child: usize, pub children_count: usize, - pub remaining_depth: usize, } diff --git a/src/visualizer/methods/tree_table.rs b/src/visualizer/methods/tree_table.rs index 99a6ea8f..6419ee51 100644 --- a/src/visualizer/methods/tree_table.rs +++ b/src/visualizer/methods/tree_table.rs @@ -65,11 +65,7 @@ where .map(|initial_row| { let child_position = ChildPosition::from_index(initial_row.index_as_child, initial_row.sibling_count); - let parenthood = if initial_row.remaining_depth == 0 { - Parenthood::Childless - } else { - Parenthood::from_children_count(initial_row.children_count) - }; + let parenthood = Parenthood::from_children_count(initial_row.children_count); let skeletal_component = TreeSkeletalComponent { child_position, parenthood, @@ -121,7 +117,7 @@ where for excluded_row_index in excluded_row_indices.iter().copied() { let is_child = |row: &&TreeRow<&Name, Size>| { row.parent() - .map_or(false, |node_info| node_info.row_index == excluded_row_index) + .is_some_and(|node_info| node_info.row_index == excluded_row_index) }; intermediate_table .index(excluded_row_index..) @@ -167,7 +163,7 @@ where { let is_sibling = |row: &&TreeRow<&Name, Size>| { row.parent() - .map_or(false, |parent| parent.row_index == parent_row_index) + .is_some_and(|parent| parent.row_index == parent_row_index) }; let is_excluded = |row: &TreeRow<&Name, Size>| excluded_row_indices.contains(&row.row_index); diff --git a/src/visualizer/parenthood.rs b/src/visualizer/parenthood.rs index 6a1b5e67..6eea4db4 100644 --- a/src/visualizer/parenthood.rs +++ b/src/visualizer/parenthood.rs @@ -9,6 +9,7 @@ pub enum Parenthood { impl Parenthood { /// Deduce parenthood from the number of children. + #[inline] pub const fn from_children_count(children_count: usize) -> Self { if children_count == 0 { Parenthood::Childless diff --git a/src/visualizer/proportion_bar.rs b/src/visualizer/proportion_bar.rs index 5074a1c8..9e9cd19a 100644 --- a/src/visualizer/proportion_bar.rs +++ b/src/visualizer/proportion_bar.rs @@ -1,7 +1,7 @@ use super::BarAlignment; use derive_more::{AsRef, Deref, Display, From, Into}; use fmt_iter::repeat; -use std::fmt::{Error, Formatter}; +use std::fmt::{Display, Error, Formatter}; /// Block of proportion bar. #[derive(Debug, Clone, Copy, PartialEq, Eq, AsRef, Deref, Display, Into)] @@ -30,27 +30,33 @@ pub struct ProportionBar { } impl ProportionBar { + #[inline] fn display_level0(self) -> impl Display { repeat(LEVEL0_BLOCK, self.level0) } + #[inline] fn display_level1(self) -> impl Display { repeat(LEVEL1_BLOCK, self.level1) } + #[inline] fn display_level2(self) -> impl Display { repeat(LEVEL2_BLOCK, self.level2) } + #[inline] fn display_level3(self) -> impl Display { repeat(LEVEL3_BLOCK, self.level3) } + #[inline] fn display_level4(self) -> impl Display { repeat(LEVEL4_BLOCK, self.level4) } /// Create a [displayable](Display) value. + #[inline] pub fn display(self, align: BarAlignment) -> ProportionBarDisplay { ProportionBarDisplay { bar: self, align } } diff --git a/src/visualizer/tree.rs b/src/visualizer/tree.rs index 5d4289ce..133b58c1 100644 --- a/src/visualizer/tree.rs +++ b/src/visualizer/tree.rs @@ -2,7 +2,7 @@ use super::{ChildPosition, Direction, Parenthood}; use derive_more::{AsRef, Deref, Display, Into}; use fmt_iter::FmtIter; use pipe_trait::Pipe; -use std::fmt::{Error, Formatter}; +use std::fmt::{Display, Error, Formatter}; use zero_copy_pads::Width; /// Determine 3 characters to use as skeletal component that connect a node @@ -49,6 +49,7 @@ impl Display for TreeSkeletalComponent { } impl Width for TreeSkeletalComponent { + #[inline] fn width(&self) -> usize { self.visualize().width() } @@ -103,6 +104,7 @@ impl Display for TreeHorizontalSlice { } impl Width for TreeHorizontalSlice { + #[inline] fn width(&self) -> usize { self.required_width() + self.name.width() } diff --git a/tests/_utils.rs b/tests/_utils.rs index 19bf6d66..3887feb9 100644 --- a/tests/_utils.rs +++ b/tests/_utils.rs @@ -1,21 +1,24 @@ use build_fs_tree::{dir, file, Build, MergeableFileSystemTree}; use command_extra::CommandExtra; use derive_more::{AsRef, Deref}; +use into_sorted::IntoSorted; use parallel_disk_usage::{ data_tree::{DataTree, DataTreeReflection}, fs_tree_builder::FsTreeBuilder, get_size::{self, GetSize}, + hardlink::HardlinkIgnorant, os_string_display::OsStringDisplay, reporter::ErrorOnlyReporter, size, }; use pipe_trait::Pipe; use pretty_assertions::assert_eq; -use rand::{distributions::Alphanumeric, thread_rng, Rng}; +use rand::{distr::Alphanumeric, rng, Rng}; use rayon::prelude::*; use std::{ + cmp::Ordering, env::temp_dir, - fs::{create_dir, metadata, remove_dir_all}, + fs::{create_dir, metadata, remove_dir_all, symlink_metadata}, io::Error, path::{Path, PathBuf}, process::{Command, Output}, @@ -39,7 +42,7 @@ pub struct Temp(PathBuf); impl Temp { /// Create a temporary directory. pub fn new_dir() -> Result { - let path = thread_rng() + let path = rng() .sample_iter(&Alphanumeric) .take(15) .map(char::from) @@ -77,13 +80,13 @@ impl Default for SampleWorkspace { MergeableFileSystemTree::<&str, String>::from(dir! { "flat" => dir! { "0" => file!("") - "1" => file!("a".repeat(1000)) - "2" => file!("a".repeat(2000)) - "3" => file!("a".repeat(3000)) + "1" => file!("a".repeat(100_000)) + "2" => file!("a".repeat(200_000)) + "3" => file!("a".repeat(300_000)) } "nested" => dir! { "0" => dir! { - "1" => file!("a".repeat(5000)) + "1" => file!("a".repeat(500_000)) } } "empty-dir" => dir! {} @@ -95,6 +98,222 @@ impl Default for SampleWorkspace { } } +/// POSIX-exclusive functions +#[cfg(unix)] +impl SampleWorkspace { + /// Set up a temporary directory for tests. + /// + /// This directory would have a couple of normal files and a couple of hardlinks. + pub fn simple_tree_with_some_hardlinks(sizes: [usize; 5]) -> Self { + use std::fs::hard_link; + let temp = Temp::new_dir().expect("create working directory for sample workspace"); + + MergeableFileSystemTree::<&str, String>::from(dir! { + "main" => dir! { + "sources" => dir! { + "no-hardlinks.txt" => file!("a".repeat(sizes[0])), + "one-internal-hardlink.txt" => file!("a".repeat(sizes[1])), + "two-internal-hardlinks.txt" => file!("a".repeat(sizes[2])), + "one-external-hardlink.txt" => file!("a".repeat(sizes[3])), + "one-internal-one-external-hardlinks.txt" => file!("a".repeat(sizes[4])), + } + "internal-hardlinks" => dir! {} + } + "external-hardlinks" => dir! {} + }) + .build(&temp) + .expect("build the filesystem tree for the sample workspace"); + + macro_rules! link { + ($original:literal -> $link:literal) => {{ + let original = $original; + let link = $link; + if let Err(error) = hard_link(temp.join(original), temp.join(link)) { + panic!("Failed to link {original} to {link}: {error}"); + } + }}; + } + + link!("main/sources/one-internal-hardlink.txt" -> "main/internal-hardlinks/link-0.txt"); + link!("main/sources/two-internal-hardlinks.txt" -> "main/internal-hardlinks/link-1a.txt"); + link!("main/sources/two-internal-hardlinks.txt" -> "main/internal-hardlinks/link-1b.txt"); + link!("main/sources/one-external-hardlink.txt" -> "external-hardlinks/link-2.txt"); + link!("main/sources/one-internal-one-external-hardlinks.txt" -> "main/internal-hardlinks/link-3a.txt"); + link!("main/sources/one-internal-one-external-hardlinks.txt" -> "external-hardlinks/link-3b.txt"); + + SampleWorkspace(temp) + } + + pub fn simple_tree_with_some_symlinks_and_hardlinks(sizes: [usize; 5]) -> Self { + use std::os::unix::fs::symlink; + let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes); + + macro_rules! symlink { + ($link_name:literal -> $target:literal) => { + let link_name = $link_name; + let target = $target; + if let Err(error) = symlink(target, workspace.join(link_name)) { + panic!("Failed create symbolic link {link_name} pointing to {target}: {error}"); + } + }; + } + + symlink!("workspace-itself" -> "."); + symlink!("main/main-itself" -> "."); + symlink!("main/parent-of-main" -> ".."); + symlink!("main-mirror" -> "./main"); + symlink!("sources-mirror" -> "./main/sources"); + + workspace + } + + /// Set up a temporary directory for tests. + /// + /// This directory would have a single file being hard-linked multiple times. + pub fn multiple_hardlinks_to_a_single_file(bytes: usize, links: u64) -> Self { + use std::fs::{hard_link, write as write_file}; + let temp = Temp::new_dir().expect("create working directory for sample workspace"); + + let file_path = temp.join("file.txt"); + write_file(&file_path, "a".repeat(bytes)).expect("create file.txt"); + + for num in 0..links { + hard_link(&file_path, temp.join(format!("link.{num}"))) + .unwrap_or_else(|error| panic!("Failed to create 'link.{num}': {error}")); + } + + SampleWorkspace(temp) + } + + /// Set up a temporary directory for tests. + /// + /// The tree in this tests have a diverse types of files, both shared (hardlinks) + /// and unique (non-hardlinks). + pub fn complex_tree_with_shared_and_unique_files( + files_per_branch: usize, + bytes_per_file: usize, + ) -> Self { + use std::fs::{create_dir_all, hard_link, write as write_file}; + + let whole = files_per_branch; + let half = files_per_branch / 2; + let quarter = files_per_branch / 4; + let half_quarter = files_per_branch / 8; + let temp = Temp::new_dir().expect("create working directory for sample workspace"); + + temp.join("no-hardlinks") + .pipe(create_dir_all) + .expect("create no-hardlinks"); + temp.join("some-hardlinks") + .pipe(create_dir_all) + .expect("create some-hardlinks"); + temp.join("only-hardlinks/exclusive") + .pipe(create_dir_all) + .expect("create only-hardlinks/exclusive"); + temp.join("only-hardlinks/mixed") + .pipe(create_dir_all) + .expect("create only-hardlinks/mixed"); + temp.join("only-hardlinks/external") + .pipe(create_dir_all) + .expect("create only-hardlinks/external"); + + // Create files in no-hardlinks. + // There will be no files with nlink > 1. + (0..files_per_branch).par_bridge().for_each(|index| { + let file_name = format!("file-{index}.txt"); + let file_path = temp.join("no-hardlinks").join(file_name); + if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) { + panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}"); + } + }); + + // Create files in some-hardlinks. + // Let's divide the files into 8 equal groups. + // Each file in the first group will have 2 exclusive links. + // Each file in the second group will have 1 exclusive link. + // Each file in the third and fourth groups will have no links. + // Each file in the remaining groups is PLANNED to have 1 external link from only-hardlinks/mixed. + (0..whole).par_bridge().for_each(|file_index| { + let file_name = format!("file-{file_index}.txt"); + let file_path = temp.join("some-hardlinks").join(file_name); + if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) { + panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}"); + } + + let link_count = + ((file_index < quarter) as usize) + ((file_index < half_quarter) as usize); + + for link_index in 0..link_count { + let link_name = format!("link{link_index}-file{file_index}.txt"); + let link_path = temp.join("some-hardlinks").join(link_name); + if let Err(error) = hard_link(&file_path, &link_path) { + panic!("Failed to link {file_path:?} to {link_path:?}: {error}"); + } + } + }); + + // Create files in only-hardlinks/exclusive. + // Each file in this directory will have 1 exclusive link. + (0..whole).par_bridge().for_each(|index| { + let file_name = format!("file-{index}.txt"); + let file_path = temp.join("only-hardlinks/exclusive").join(file_name); + if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) { + panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}"); + } + let link_name = format!("link-{index}.txt"); + let link_path = temp.join("only-hardlinks/exclusive").join(link_name); + if let Err(error) = hard_link(&file_path, &link_path) { + panic!("Failed to link {file_path:?} to {link_path:?}: {error}"); + } + }); + + // Create links in only-hardlinks/mixed. + // Let's divide the PLANNED links into 2 equal groups. + // Each link in the first group is PLANNED to share with only-hardlinks/external. + // Each link in the second group is exclusive. + (half..whole).par_bridge().for_each(|index| { + let file_name = format!("link0-{index}.txt"); + let file_path = temp.join("only-hardlinks/mixed").join(file_name); + if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) { + panic!("Failed to write {bytes_per_file} bytes to {file_path:?}: {error}"); + } + + let link_name = format!("link1-{index}.txt"); + let link_path = temp.join("only-hardlinks/mixed").join(link_name); + if let Err(error) = hard_link(&file_path, &link_path) { + panic!("Failed to link {file_path:?} to {link_path:?}: {error}"); + } + }); + + // Create links in only-hardlinks/external + // Let's divide the links into 2 equal groups. + // The first group will share with only-hardlinks/mixed. + // The second group will share with some-hardlinks. + (0..whole).par_bridge().for_each(|index| { + let link_name = format!("linkX-{index}.txt"); + let link_path = temp.join("only-hardlinks/external").join(link_name); + + let file_path = if index < half { + let file_name = format!("link0-{index}.txt"); // file name from only-hardlinks/mixed + let file_path = temp.join("only-hardlinks/mixed").join(file_name); + if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) { + panic!("Failed to write {bytes_per_file} bytes to {file_path:?}: {error}"); + } + file_path + } else { + let file_name = format!("file-{index}.txt"); // file name from some-hardlinks + temp.join("some-hardlinks").join(file_name) + }; + + if let Err(error) = hard_link(&file_path, &link_path) { + panic!("Failed to link {file_path:?} to {link_path:?}: {error}"); + } + }); + + SampleWorkspace(temp) + } +} + /// Make the snapshot of a [`TreeReflection`] testable. /// /// The real filesystem is often messy, causing `children` to mess up its order. @@ -110,10 +329,10 @@ where let DataTreeReflection { name, size, - mut children, + children, } = tree_reflection; - children.sort_by(|left, right| left.name.cmp(&right.name)); let children = children + .into_sorted_by(|left, right| left.name.cmp(&right.name)) .into_par_iter() .map(sanitize_tree_reflection) .collect(); @@ -149,10 +368,12 @@ where let measure = |suffix: &str| { FsTreeBuilder { size_getter, - reporter: ErrorOnlyReporter::new(|error| { - panic!("Unexpected call to report_error: {:?}", error) + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(|error| { + panic!("Unexpected call to report_error: {error:?}") }), root: root.join(suffix), + max_depth: 10, } .pipe(DataTree::::from) .into_par_sorted(|left, right| left.name().cmp(right.name())) @@ -293,9 +514,9 @@ impl<'a> CommandList<'a> { /// Make sure a flag name has valid syntax. fn assert_flag(name: &str) { match name.len() { - 0 | 1 => panic!("{:?} is not a valid flag", name), - 2 => assert!(name.starts_with('-'), "{:?} is not a valid flag", name), - _ => assert!(name.starts_with("--"), "{:?} is not a valid flag", name), + 0 | 1 => panic!("{name:?} is not a valid flag"), + 2 => assert!(name.starts_with('-'), "{name:?} is not a valid flag"), + _ => assert!(name.starts_with("--"), "{name:?} is not a valid flag"), } } } @@ -312,8 +533,7 @@ pub fn stdout_text( inspect_stderr(&stderr); assert!( status.success(), - "progress exits with non-zero status: {:?}", - status + "progress exits with non-zero status: {status:?}", ); stdout .pipe(String::from_utf8) @@ -330,3 +550,34 @@ pub fn inspect_stderr(stderr: &[u8]) { eprintln!("STDERR:\n{text}\n"); } } + +/// Recursively sort a [`DataTreeReflection`]. +pub fn sort_reflection_by( + reflection: &mut DataTreeReflection, + order: Order, +) where + Size: size::Size, + Order: + FnMut(&DataTreeReflection, &DataTreeReflection) -> Ordering + Copy, +{ + reflection.children.sort_by(order); + for child in &mut reflection.children { + sort_reflection_by(child, order); + } +} + +/// Read [apparent size](std::fs::Metadata::len) of a path. +pub fn read_apparent_size(path: &Path) -> u64 { + path.pipe(symlink_metadata) + .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) + .len() +} + +/// Read [ino](std::os::unix::fs::MetadataExt::ino) of a path. +#[cfg(unix)] +pub fn read_inode_number(path: &Path) -> u64 { + use std::os::unix::fs::MetadataExt; + path.pipe(symlink_metadata) + .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) + .ino() +} diff --git a/tests/cli_errors.rs b/tests/cli_errors.rs index 35fd25ab..c33f0d9a 100644 --- a/tests/cli_errors.rs +++ b/tests/cli_errors.rs @@ -4,25 +4,26 @@ pub mod _utils; pub use _utils::*; use command_extra::CommandExtra; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::process::{Command, Output, Stdio}; +use text_block_macros::text_block; + +#[cfg(unix)] use maplit::btreeset; +#[cfg(unix)] use parallel_disk_usage::{ bytes_format::BytesFormat, data_tree::DataTree, fs_tree_builder::FsTreeBuilder, get_size::GetApparentSize, + hardlink::HardlinkIgnorant, os_string_display::OsStringDisplay, reporter::{ErrorOnlyReporter, ErrorReport}, visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, }; -use pipe_trait::Pipe; -use pretty_assertions::assert_eq; -use std::{ - collections::BTreeSet, - convert::TryInto, - path::Path, - process::{Command, Output, Stdio}, -}; -use text_block_macros::text_block; +#[cfg(unix)] +use std::{collections::BTreeSet, path::Path}; fn stdio(command: Command) -> Command { command @@ -43,7 +44,7 @@ fn fs_permission(path: impl AsRef, permission: &'static str, recursive: bo .output() .expect("run chmod command"); inspect_stderr(&stderr); - assert!(status.success(), "chmod fails {:?}", status); + assert!(status.success(), "chmod fails {status:?}"); } #[test] @@ -96,7 +97,7 @@ fn max_depth_0() { assert_eq!( stderr, text_block! { - "error: invalid value '0' for '--max-depth ': number would be zero for non-zero type" + r#"error: invalid value '0' for '--max-depth ': Value is neither "inf" nor a positive integer: number would be zero for non-zero type"# "" "For more information, try '--help'." } @@ -132,7 +133,9 @@ fn fs_errors() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse()); @@ -143,7 +146,6 @@ fn fs_errors() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected_stdout = format!("{visualizer}"); eprintln!("EXPECTED STDOUT:\n{}\n", &expected_stdout); diff --git a/tests/data_tree_reflection.rs b/tests/data_tree_reflection.rs index 29e17f1f..12f1ba57 100644 --- a/tests/data_tree_reflection.rs +++ b/tests/data_tree_reflection.rs @@ -121,23 +121,11 @@ fn invalid_conversion_excessive_children() { let expected = ConversionError::ExcessiveChildren { path: vec!["root", "b", "0"].into_iter().collect(), size: Bytes::new(321), - children: vec![ - Reflection { - name: "abc", - size: Bytes::new(123), - children: vec![Reflection { - name: "xyz", - size: Bytes::new(4321), - children: Vec::new(), - }], - }, - Reflection { - name: "def", - size: Bytes::new(456), - children: Vec::new(), - }, - ], - children_sum: Bytes::new(123 + 456), + child: Reflection { + name: "def", + size: Bytes::new(456), + children: Vec::new(), + }, }; assert_eq!(actual, expected); } @@ -148,7 +136,15 @@ fn display_excessive_children() { .par_try_into_tree() .expect_err("create error") .to_string(); - let expected = r#"ExcessiveChildren: "root/b/0": Bytes(321) is less than Bytes(579)"#; + let expected = if cfg!(unix) { + r#"ExcessiveChildren: "root/b/0" (Bytes(321)) is less than a child named "def" (Bytes(456))"# + } else if cfg!(windows) { + // TODO: stop using debug format + r#"ExcessiveChildren: "root\\b\\0" (Bytes(321)) is less than a child named "def" (Bytes(456))"# + } else { + eprintln!("ACTUAL: {actual}"); + panic!("This platform isn't supported!"); + }; assert_eq!(actual, expected); } diff --git a/tests/fs_tree_builder.rs b/tests/fs_tree_builder.rs index df360eb2..52789662 100644 --- a/tests/fs_tree_builder.rs +++ b/tests/fs_tree_builder.rs @@ -1,13 +1,13 @@ pub mod _utils; pub use _utils::*; -use parallel_disk_usage::{ - get_size::{GetApparentSize, GetBlockCount, GetBlockSize}, - size::Bytes, -}; +use parallel_disk_usage::{get_size::GetApparentSize, size::Bytes}; #[cfg(unix)] -use parallel_disk_usage::size::Blocks; +use parallel_disk_usage::{ + get_size::{GetBlockCount, GetBlockSize}, + size::Blocks, +}; #[test] fn len_as_bytes() { diff --git a/tests/hardlinks_deduplication.rs b/tests/hardlinks_deduplication.rs new file mode 100644 index 00000000..73274458 --- /dev/null +++ b/tests/hardlinks_deduplication.rs @@ -0,0 +1,1280 @@ +#![cfg(unix)] // This feature is not available in Windows +#![cfg(feature = "cli")] + +pub mod _utils; +pub use _utils::*; + +use command_extra::CommandExtra; +use into_sorted::IntoSorted; +use itertools::Itertools; +use normalize_path::NormalizePath; +use parallel_disk_usage::{ + bytes_format::BytesFormat, + data_tree::Reflection, + hardlink::{ + hardlink_list::{reflection::ReflectionEntry, Summary}, + LinkPathListReflection, + }, + inode::InodeNumber, + json_data::{JsonData, JsonTree}, + size::Bytes, +}; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use rayon::prelude::*; +use std::{ + collections::HashSet, + iter, + ops::Add, + path::PathBuf, + process::{Command, Stdio}, +}; + +fn stdio(command: Command) -> Command { + command + .with_stdin(Stdio::null()) + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) +} + +#[test] +fn simple_tree_with_some_hardlinks() { + #![expect(clippy::identity_op)] + + let sizes = [200_000, 220_000, 310_000, 110_000, 210_000]; + let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes); + + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + let tree = tree; + + let file_size = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |suffices: &[&str]| { + suffices + .iter() + .map(|suffix| PathBuf::from("main").join(suffix)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = Bytes::new(0) + + inode_size("main") + + inode_size("main/sources") + + inode_size("main/internal-hardlinks") + + file_size("no-hardlinks.txt") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"); + assert_eq!(actual_size, expected_size); + + let actual_tree = &tree.tree; + let mut expected_tree = Reflection { + name: "main".to_string(), + size: expected_size, + children: vec![ + Reflection { + name: "sources".to_string(), + size: inode_size("main/sources") + + file_size("no-hardlinks.txt") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + children: vec![ + Reflection { + name: "no-hardlinks.txt".to_string(), + size: file_size("no-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-internal-hardlink.txt".to_string(), + size: file_size("one-internal-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "two-internal-hardlinks.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-external-hardlink.txt".to_string(), + size: file_size("one-external-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-internal-one-external-hardlinks.txt".to_string(), + size: file_size("one-internal-one-external-hardlinks.txt"), + children: Vec::new(), + }, + ], + }, + Reflection { + name: "internal-hardlinks".to_string(), + size: inode_size("main/internal-hardlinks") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + children: vec![ + Reflection { + name: "link-0.txt".to_string(), + size: file_size("one-internal-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-1a.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-1b.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-3a.txt".to_string(), + size: file_size("one-internal-one-external-hardlinks.txt"), + children: Vec::new(), + }, + ], + }, + ], + }; + sort_reflection_by(&mut expected_tree, |a, b| a.name.cmp(&b.name)); + assert_eq!(actual_tree, &expected_tree); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .cloned() + .collect(); + let expected_shared_details = [ + ReflectionEntry { + ino: file_inode("one-internal-hardlink.txt"), + size: file_size("one-internal-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&[ + "sources/one-internal-hardlink.txt", + "internal-hardlinks/link-0.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("two-internal-hardlinks.txt"), + size: file_size("two-internal-hardlinks.txt"), + links: 1 + 2, + paths: shared_paths(&[ + "sources/two-internal-hardlinks.txt", + "internal-hardlinks/link-1a.txt", + "internal-hardlinks/link-1b.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("one-external-hardlink.txt"), + size: file_size("one-external-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&["sources/one-external-hardlink.txt"]), + }, + ReflectionEntry { + ino: file_inode("one-internal-one-external-hardlinks.txt"), + size: file_size("one-internal-one-external-hardlinks.txt"), + links: 1 + 1 + 1, + paths: shared_paths(&[ + "sources/one-internal-one-external-hardlinks.txt", + "internal-hardlinks/link-3a.txt", + ]), + }, + ] + .into_sorted_by_key(|item| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(0 + 1 + 1 + 1 + 1) + .with_exclusive_inodes(0 + 1 + 1 + 0 + 0) + .with_all_links(0 + 2 + 3 + 2 + 3) + .with_detected_links(0 + 2 + 3 + 1 + 2) + .with_exclusive_links(0 + 2 + 3 + 0 + 0) + .with_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + ) + .with_exclusive_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt"), + ) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! Some files have links outside this tree", + ) + .unwrap(); + writeln!( + summary, + "* Number of shared inodes: {total} total, {exclusive} exclusive", + total = expected_shared_summary.unwrap().inodes, + exclusive = expected_shared_summary.unwrap().exclusive_inodes, + ) + .unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + exclusive = expected_shared_summary.unwrap().exclusive_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {total} total, {exclusive} exclusive", + total = expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + exclusive = expected_shared_summary + .unwrap() + .exclusive_shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn multiple_hardlinks_to_a_single_file() { + let links = 10; + let workspace = SampleWorkspace::multiple_hardlinks_to_a_single_file(100_000, links); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("file.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let file_inode = workspace + .join("file.txt") + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from); + + let actual_size = tree.size; + let expected_size = workspace + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + .add(file_size); + assert_eq!(actual_size, expected_size); + + let actual_children = tree + .children + .clone() + .into_sorted_by(|a, b| a.name.cmp(&b.name)); + let expected_children: Vec<_> = { + let links = (0..links).map(|num| format!("link.{num}")); + let node = |name| Reflection { + name, + size: file_size, + children: Vec::new(), + }; + "file.txt" + .to_string() + .pipe(iter::once) + .chain(links) + .map(node) + .collect() + }; + assert_eq!(actual_children, expected_children); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .cloned() + .collect(); + let expected_shared_details = [ReflectionEntry { + ino: file_inode, + size: file_size, + links: 1 + links, + paths: (0..links) + .map(|num| format!("./link.{num}")) + .chain("./file.txt".to_string().pipe(iter::once)) + .map(PathBuf::from) + .collect::>() + .pipe(LinkPathListReflection), + }]; + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(1) + .with_exclusive_inodes(1) + .with_all_links(1 + links) + .with_detected_links(1 + links as usize) + .with_exclusive_links(1 + links as usize) + .with_shared_size(file_size) + .with_exclusive_shared_size(file_size) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! No files have links outside this tree", + ) + .unwrap(); + writeln!(summary, "* Number of shared inodes: 1").unwrap(); + writeln!(summary, "* Total number of links: 11").unwrap(); + writeln!( + summary, + "* Total shared size: {}", + file_size.display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn complex_tree_with_shared_and_unique_files() { + let files_per_branch = 2 * 3 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("no-hardlinks/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + + // The following formula treat the first file as "real" and + // the non-first file with the same inode as "fake" for ease + // of reasoning. + // It should still produce the same result as the proper + // deduplication formula however. + #[expect(clippy::erasing_op)] + let expected_size: Bytes = [ + inode_size("."), + inode_size("no-hardlinks"), + inode_size("some-hardlinks"), + inode_size("only-hardlinks"), + inode_size("only-hardlinks/exclusive"), + inode_size("only-hardlinks/mixed"), + inode_size("only-hardlinks/external"), + file_size * files_per_branch, // no-hardlinks/* + file_size * files_per_branch, // some-hardlinks/* + file_size * files_per_branch, // only-hardlinks/exclusive/* + file_size * files_per_branch, // only-hardlinks/mixed/* + file_size * 0usize, // only-hardlinks/external/* + ] + .into_iter() + .sum(); + + assert_eq!(actual_size, expected_size); + + fn starts_with_path(item: &ReflectionEntry, prefix: &str) -> bool { + item.paths + .0 + .iter() + .any(|path| path.normalize().starts_with(prefix)) + } + + // Files with nlink <= 1 shouldn't appear + { + let actual = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .find(|item| item.links <= 1) + .cloned(); + assert_eq!(actual, None); + } + + // All entries are sorted by their inodes and have unique inodes + { + let actual: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .map(|item| item.ino) + .map(u64::from) + .collect(); + let expected = actual + .clone() + .into_iter() + .collect::>() + .into_iter() + .collect::>() + .into_sorted(); + assert_eq!(actual, expected); + } + + // No files from no-hardlinks should appear + { + let actual = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .find(|item| starts_with_path(item, "no-hardlinks")) + .cloned(); + assert_eq!(actual, None); + } + + // This file in some-hardlinks should have 2 links created for it + { + let actual = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .find(|item| starts_with_path(item, "some-hardlinks/file-0.txt")) + .cloned(); + let expected = Some(ReflectionEntry { + ino: workspace + .join("some-hardlinks/file-0.txt") + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from), + size: workspace + .join("some-hardlinks/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new), + links: 3, + paths: ["file-0.txt", "link0-file0.txt", "link1-file0.txt"] + .map(|name| PathBuf::from(".").join("some-hardlinks").join(name)) + .pipe(HashSet::from) + .pipe(LinkPathListReflection), + }); + assert_eq!(actual, expected); + } + + // This file in some-hardlinks should have 1 link created for it + { + let file_index = files_per_branch / 8; + let actual = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .find(|item| starts_with_path(item, &format!("some-hardlinks/file-{file_index}.txt"))) + .cloned(); + let expected = Some(ReflectionEntry { + ino: workspace + .join(format!("some-hardlinks/file-{file_index}.txt")) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from), + size: workspace + .join(format!("some-hardlinks/file-{file_index}.txt")) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new), + links: 2, + paths: [ + format!("file-{file_index}.txt"), + format!("link0-file{file_index}.txt"), + ] + .map(|name| PathBuf::from(".").join("some-hardlinks").join(name)) + .pipe(HashSet::from) + .pipe(LinkPathListReflection), + }); + assert_eq!(actual, expected); + } + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = { + // The following formula treat the first file as "real" and + // the non-first file with the same inode as "fake" for ease + // of reasoning. + // It should still produce the same result as the proper + // deduplication formula however. + let inodes = [ + 0, // no-hardlinks/* + 2 * files_per_branch / 8 + files_per_branch / 2, // some-hardlinks/* + files_per_branch, // only-hardlinks/exclusive/* + files_per_branch, // only-hardlinks/mixed/* + 0, // only-hardlinks/external/* + ] + .into_iter() + .sum(); + let all_links = [ + 0, // no-hardlinks/* + 3 * files_per_branch / 8 + 2 * files_per_branch / 8 + 2 * files_per_branch / 2, // some-hardlinks/* + 2 * files_per_branch, // only-hardlinks/exclusive/* + 2 * files_per_branch / 2 + 2 * files_per_branch / 2, // only-hardlinks/mixed/* + 0, // only-hardlinks/external/* + ] + .into_iter() + .sum::() as u64; + let shared_size = file_size * inodes; + Summary::default() + .with_inodes(inodes) + .with_exclusive_inodes(inodes) + .with_all_links(all_links) + .with_detected_links(all_links as usize) + .with_exclusive_links(all_links as usize) + .with_shared_size(shared_size) + .with_exclusive_shared_size(shared_size) + .pipe(Some) + }; + assert_eq!(actual_shared_summary, expected_shared_summary); +} + +#[test] +fn hardlinks_and_non_hardlinks() { + #![expect(clippy::identity_op)] + + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("--deduplicate-hardlinks") + .with_arg("some-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("some-hardlinks/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("some-hardlinks") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |file_names: &[&str]| { + file_names + .iter() + .map(|file_name| PathBuf::from("some-hardlinks").join(file_name)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = inode_size("some-hardlinks") + file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .unwrap() + .iter() + .cloned() + .collect(); + let expected_shared_details = [ + ReflectionEntry { + ino: file_inode("file-0.txt"), + size: file_size, + links: 3, + paths: shared_paths(&["file-0.txt", "link0-file0.txt", "link1-file0.txt"]), + }, + ReflectionEntry { + ino: file_inode("file-1.txt"), + size: file_size, + links: 2, + paths: shared_paths(&["file-1.txt", "link0-file1.txt"]), + }, + // ... file-2.txt and file-3.txt don't have hardlinks so they shouldn't appear here ... + ReflectionEntry { + ino: file_inode("file-4.txt"), + size: file_size, + links: 2, + paths: shared_paths(&["file-4.txt"]), + }, + ReflectionEntry { + ino: file_inode("file-5.txt"), + size: file_size, + links: 2, + paths: shared_paths(&["file-5.txt"]), + }, + ReflectionEntry { + ino: file_inode("file-6.txt"), + size: file_size, + links: 2, + paths: shared_paths(&["file-6.txt"]), + }, + ReflectionEntry { + ino: file_inode("file-7.txt"), + size: file_size, + links: 2, + paths: shared_paths(&["file-7.txt"]), + }, + ] + .into_sorted_by_key(|item| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = { + let inodes = expected_shared_details.len(); + let exclusive_inodes = 2; + Summary::default() + .with_inodes(inodes) + .with_exclusive_inodes(exclusive_inodes) + .with_all_links(3 + 2 + 4 * 2) + .with_detected_links(3 + 2 + 4 * 1) + .with_exclusive_links(3 + 2) + .with_shared_size(inodes * file_size) + .with_exclusive_shared_size(exclusive_inodes * file_size) + .pipe(Some) + }; + assert_eq!(actual_shared_summary, expected_shared_summary); + assert_eq!(actual_shared_summary.unwrap().inodes, files_per_branch - 2); + assert_eq!( + actual_shared_summary.unwrap().all_links, + actual_shared_details + .iter() + .map(|item| item.links) + .sum::(), + ); + assert_eq!( + actual_shared_summary.unwrap().detected_links, + actual_shared_details + .iter() + .map(|item| item.paths.len()) + .sum::(), + ); + assert_eq!( + actual_shared_summary.unwrap().exclusive_links, + actual_shared_details + .iter() + .filter(|item| item.links == item.paths.len() as u64) + .map(|item| item.links as usize) + .sum::(), + ); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("some-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! Some files have links outside this tree", + ) + .unwrap(); + writeln!( + summary, + "* Number of shared inodes: {total} total, {exclusive} exclusive", + total = expected_shared_summary.unwrap().inodes, + exclusive = expected_shared_summary.unwrap().exclusive_inodes, + ) + .unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + exclusive = expected_shared_summary.unwrap().exclusive_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {total} total, {exclusive} exclusive", + total = expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + exclusive = expected_shared_summary + .unwrap() + .exclusive_shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn exclusive_hardlinks_only() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/exclusive") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/exclusive/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("only-hardlinks/exclusive") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |file_names: &[&str]| { + file_names + .iter() + .map(|file_name| PathBuf::from("only-hardlinks/exclusive").join(file_name)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = inode_size("only-hardlinks/exclusive") + file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .unwrap() + .iter() + .cloned() + .collect(); + let expected_shared_details = (0..files_per_branch) + .par_bridge() + .map(|index| ReflectionEntry { + ino: file_inode(&format!("file-{index}.txt")), + size: file_size, + links: 2, + paths: shared_paths(&[&format!("file-{index}.txt"), &format!("link-{index}.txt")]), + }) + .collect::>() + .into_sorted_by_key(|item: &ReflectionEntry| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(files_per_branch) + .with_exclusive_inodes(files_per_branch) + .with_all_links(2 * files_per_branch as u64) + .with_detected_links(2 * files_per_branch) + .with_exclusive_links(2 * files_per_branch) + .with_shared_size(files_per_branch * file_size) + .with_exclusive_shared_size(files_per_branch * file_size) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/exclusive") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! No files have links outside this tree", + ) + .unwrap(); + writeln!(summary, "* Number of shared inodes: {files_per_branch}").unwrap(); + writeln!(summary, "* Total number of links: {}", 2 * files_per_branch).unwrap(); + writeln!( + summary, + "* Total shared size: {}", + (file_size * files_per_branch).display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn exclusive_only_and_external_only_hardlinks() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/mixed") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/mixed/link0-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("only-hardlinks/mixed") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |file_names: &[&str]| { + file_names + .iter() + .map(|file_name| PathBuf::from("only-hardlinks/mixed").join(file_name)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = inode_size("only-hardlinks/mixed") + file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .unwrap() + .iter() + .cloned() + .collect(); + let expected_shared_details = iter::empty() + .par_bridge() + .chain( + (0..(files_per_branch / 2)) + .par_bridge() + .map(|index| ReflectionEntry { + ino: file_inode(&format!("link0-{index}.txt")), + size: file_size, + links: 2, + paths: shared_paths(&[&format!("link0-{index}.txt")]), + }), + ) + .chain( + ((files_per_branch / 2)..files_per_branch) + .par_bridge() + .map(|index| ReflectionEntry { + ino: file_inode(&format!("link0-{index}.txt")), + size: file_size, + links: 2, + paths: shared_paths(&[ + &format!("link0-{index}.txt"), + &format!("link1-{index}.txt"), + ]), + }), + ) + .collect::>() + .into_sorted_by_key(|item: &ReflectionEntry| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(files_per_branch) + .with_exclusive_inodes(files_per_branch / 2) + .with_all_links(2 * files_per_branch as u64) + .with_detected_links(files_per_branch + files_per_branch / 2) + .with_exclusive_links(files_per_branch * 2 / 2) + .with_shared_size(files_per_branch * file_size) + .with_exclusive_shared_size((files_per_branch / 2) * file_size) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/mixed") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! Some files have links outside this tree", + ) + .unwrap(); + writeln!( + summary, + "* Number of shared inodes: {total} total, {exclusive} exclusive", + total = expected_shared_summary.unwrap().inodes, + exclusive = expected_shared_summary.unwrap().exclusive_inodes, + ) + .unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + exclusive = expected_shared_summary.unwrap().exclusive_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {total} total, {exclusive} exclusive", + total = expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + exclusive = expected_shared_summary + .unwrap() + .exclusive_shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn external_hardlinks_only() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/external") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/external/linkX-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("only-hardlinks/external") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |file_names: &[&str]| { + file_names + .iter() + .map(|file_name| PathBuf::from("only-hardlinks/external").join(file_name)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = inode_size("only-hardlinks/external") + file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .unwrap() + .iter() + .cloned() + .collect(); + let expected_shared_details = (0..files_per_branch) + .par_bridge() + .map(|index| ReflectionEntry { + ino: file_inode(&format!("linkX-{index}.txt")), + size: file_size, + links: 2, + paths: shared_paths(&[&format!("linkX-{index}.txt")]), + }) + .collect::>() + .into_sorted_by_key(|item: &ReflectionEntry| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(files_per_branch) + .with_exclusive_inodes(0) + .with_all_links(2 * files_per_branch as u64) + .with_detected_links(files_per_branch) + .with_exclusive_links(0) + .with_shared_size(files_per_branch * file_size) + .with_exclusive_shared_size(Bytes::new(0)) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("only-hardlinks/external") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! All hardlinks within this tree have links without", + ) + .unwrap(); + writeln!(summary, "* Number of shared inodes: {files_per_branch}").unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {}", + expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} diff --git a/tests/hardlinks_deduplication_multi_args.rs b/tests/hardlinks_deduplication_multi_args.rs new file mode 100644 index 00000000..4851aff1 --- /dev/null +++ b/tests/hardlinks_deduplication_multi_args.rs @@ -0,0 +1,621 @@ +#![cfg(unix)] // This feature is not available in Windows +#![cfg(feature = "cli")] + +pub mod _utils; +pub use _utils::*; + +use command_extra::CommandExtra; +use into_sorted::IntoSorted; +use itertools::Itertools; +use parallel_disk_usage::{ + bytes_format::BytesFormat, + data_tree::Reflection, + hardlink::{ + hardlink_list::{reflection::ReflectionEntry, Summary}, + LinkPathListReflection, + }, + inode::InodeNumber, + json_data::{JsonData, JsonTree}, + size::Bytes, +}; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::{ + collections::HashSet, + path::PathBuf, + process::{Command, Stdio}, +}; + +fn stdio(command: Command) -> Command { + command + .with_stdin(Stdio::null()) + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) +} + +#[test] +fn simple_tree_with_some_hardlinks() { + #![expect(clippy::identity_op)] + + let sizes = [200_000, 220_000, 310_000, 110_000, 210_000]; + let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes); + + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_arg("main/sources") + .with_arg("main/internal-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + let tree = tree; + + let file_size = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |suffices: &[&str]| { + suffices + .iter() + .map(|suffix| PathBuf::from("main").join(suffix)) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = Bytes::new(0) + + inode_size("main/sources") + + inode_size("main/internal-hardlinks") + + file_size("no-hardlinks.txt") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"); + assert_eq!(actual_size, expected_size); + + let actual_tree = &tree.tree; + let expected_tree = { + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes") + .tree; + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + tree.name = "(total)".to_string(); + tree.size = expected_size; + for child in &mut tree.children { + let name = match child.name.as_str() { + "sources" => "main/sources", + "internal-hardlinks" => "main/internal-hardlinks", + name => panic!("Unexpected name: {name:?}"), + }; + child.name = name.to_string(); + } + tree + }; + assert_eq!(actual_tree, &expected_tree); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .cloned() + .collect(); + let expected_shared_details = [ + ReflectionEntry { + ino: file_inode("one-internal-hardlink.txt"), + size: file_size("one-internal-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&[ + "sources/one-internal-hardlink.txt", + "internal-hardlinks/link-0.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("two-internal-hardlinks.txt"), + size: file_size("two-internal-hardlinks.txt"), + links: 1 + 2, + paths: shared_paths(&[ + "sources/two-internal-hardlinks.txt", + "internal-hardlinks/link-1a.txt", + "internal-hardlinks/link-1b.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("one-external-hardlink.txt"), + size: file_size("one-external-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&["sources/one-external-hardlink.txt"]), + }, + ReflectionEntry { + ino: file_inode("one-internal-one-external-hardlinks.txt"), + size: file_size("one-internal-one-external-hardlinks.txt"), + links: 1 + 1 + 1, + paths: shared_paths(&[ + "sources/one-internal-one-external-hardlinks.txt", + "internal-hardlinks/link-3a.txt", + ]), + }, + ] + .into_sorted_by_key(|item| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(0 + 1 + 1 + 1 + 1) + .with_exclusive_inodes(0 + 1 + 1 + 0 + 0) + .with_all_links(0 + 2 + 3 + 2 + 3) + .with_detected_links(0 + 2 + 3 + 1 + 2) + .with_exclusive_links(0 + 2 + 3 + 0 + 0) + .with_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + ) + .with_exclusive_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt"), + ) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("main/sources") + .with_arg("main/internal-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! Some files have links outside this tree", + ) + .unwrap(); + writeln!( + summary, + "* Number of shared inodes: {total} total, {exclusive} exclusive", + total = expected_shared_summary.unwrap().inodes, + exclusive = expected_shared_summary.unwrap().exclusive_inodes, + ) + .unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + exclusive = expected_shared_summary.unwrap().exclusive_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {total} total, {exclusive} exclusive", + total = expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + exclusive = expected_shared_summary + .unwrap() + .exclusive_shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn multiple_hardlinks_to_a_single_file() { + let links = 10; + let args = ["file.txt", "link.3", "link.5"]; + let workspace = SampleWorkspace::multiple_hardlinks_to_a_single_file(100_000, links); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_args(args) + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("file.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let file_inode = workspace + .join("file.txt") + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from); + + let actual_size = tree.size; + let expected_size = file_size; + assert_eq!(actual_size, expected_size); + + let actual_children = tree + .children + .clone() + .into_sorted_by(|a, b| a.name.cmp(&b.name)); + let expected_children = args.map(|name| Reflection { + name: name.to_string(), + size: file_size, + children: Vec::new(), + }); + assert_eq!(actual_children, expected_children); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .cloned() + .collect(); + let expected_shared_details = [ReflectionEntry { + ino: file_inode, + size: file_size, + links: 1 + links, + paths: args + .map(PathBuf::from) + .pipe(HashSet::from) + .pipe(LinkPathListReflection), + }]; + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(1) + .with_exclusive_inodes(0) + .with_all_links(1 + links) + .with_detected_links(args.len()) + .with_exclusive_links(0) + .with_shared_size(file_size) + .with_exclusive_shared_size(Bytes::new(0)) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_args(args) + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! All hardlinks within this tree have links without", + ) + .unwrap(); + writeln!(summary, "* Number of shared inodes: 1").unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {}", + file_size.display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} + +#[test] +fn multiple_duplicated_arguments() { + #![expect(clippy::identity_op)] + + let sizes = [200_000, 220_000, 310_000, 110_000, 210_000]; + let workspace = SampleWorkspace::simple_tree_with_some_symlinks_and_hardlinks(sizes); + + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_arg("main/sources") // expected to be kept + .with_arg("main/main-itself/sources") // expected to be removed + .with_arg("workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks") // expected to be kept + .with_arg("main/internal-hardlinks") // expected to be removed + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + let tree = tree; + + let file_size = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let file_inode = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_inode_number) + .pipe(InodeNumber::from) + }; + + let shared_paths = |suffices: &[&str]| { + suffices + .iter() + .map(PathBuf::from) + .collect::>() + .pipe(LinkPathListReflection) + }; + + let actual_size = tree.size; + let expected_size = Bytes::new(0) + + inode_size("main/sources") + + inode_size("main/internal-hardlinks") + + file_size("no-hardlinks.txt") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"); + assert_eq!(actual_size, expected_size); + + let actual_tree = &tree.tree; + let expected_tree = { + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("--json-output") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes") + .tree; + tree.name = "(total)".to_string(); + tree.size = expected_size; + for child in &mut tree.children { + let name = match child.name.as_str() { + "sources" => "main/sources", + "internal-hardlinks" => { + "workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks" + } + name => panic!("Unexpected name: {name:?}"), + }; + child.name = name.to_string(); + } + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + tree + }; + assert_eq!(actual_tree, &expected_tree); + + let actual_shared_details: Vec<_> = tree + .shared + .details + .as_ref() + .expect("get details") + .iter() + .cloned() + .collect(); + let expected_shared_details = [ + ReflectionEntry { + ino: file_inode("one-internal-hardlink.txt"), + size: file_size("one-internal-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&[ + "main/sources/one-internal-hardlink.txt", + "workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks/link-0.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("two-internal-hardlinks.txt"), + size: file_size("two-internal-hardlinks.txt"), + links: 1 + 2, + paths: shared_paths(&[ + "main/sources/two-internal-hardlinks.txt", + "workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks/link-1a.txt", + "workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks/link-1b.txt", + ]), + }, + ReflectionEntry { + ino: file_inode("one-external-hardlink.txt"), + size: file_size("one-external-hardlink.txt"), + links: 1 + 1, + paths: shared_paths(&["main/sources/one-external-hardlink.txt"]), + }, + ReflectionEntry { + ino: file_inode("one-internal-one-external-hardlinks.txt"), + size: file_size("one-internal-one-external-hardlinks.txt"), + links: 1 + 1 + 1, + paths: shared_paths(&[ + "main/sources/one-internal-one-external-hardlinks.txt", + "workspace-itself/main/parent-of-main/main-mirror/internal-hardlinks/link-3a.txt", + ]), + }, + ] + .into_sorted_by_key(|item| u64::from(item.ino)); + assert_eq!(actual_shared_details, expected_shared_details); + + let actual_shared_summary = tree.shared.summary; + let expected_shared_summary = Summary::default() + .with_inodes(0 + 1 + 1 + 1 + 1) + .with_exclusive_inodes(0 + 1 + 1 + 0 + 0) + .with_all_links(0 + 2 + 3 + 2 + 3) + .with_detected_links(0 + 2 + 3 + 1 + 2) + .with_exclusive_links(0 + 2 + 3 + 0 + 0) + .with_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + ) + .with_exclusive_shared_size( + Bytes::new(0) + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt"), + ) + .pipe(Some); + assert_eq!(actual_shared_summary, expected_shared_summary); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--deduplicate-hardlinks") + .with_arg("main/sources") + .with_arg("main/internal-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + let actual_hardlinks_summary = visualization + .lines() + .skip_while(|line| !line.starts_with("Hardlinks detected!")) + .join("\n"); + let expected_hardlinks_summary = { + use parallel_disk_usage::size::Size; + use std::fmt::Write; + let mut summary = String::new(); + writeln!( + summary, + "Hardlinks detected! Some files have links outside this tree", + ) + .unwrap(); + writeln!( + summary, + "* Number of shared inodes: {total} total, {exclusive} exclusive", + total = expected_shared_summary.unwrap().inodes, + exclusive = expected_shared_summary.unwrap().exclusive_inodes, + ) + .unwrap(); + writeln!( + summary, + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", + total = expected_shared_summary.unwrap().all_links, + detected = expected_shared_summary.unwrap().detected_links, + exclusive = expected_shared_summary.unwrap().exclusive_links, + ) + .unwrap(); + writeln!( + summary, + "* Total shared size: {total} total, {exclusive} exclusive", + total = expected_shared_summary + .unwrap() + .shared_size + .display(BytesFormat::MetricUnits), + exclusive = expected_shared_summary + .unwrap() + .exclusive_shared_size + .display(BytesFormat::MetricUnits), + ) + .unwrap(); + summary + }; + assert_eq!( + actual_hardlinks_summary.trim_end(), + expected_hardlinks_summary.trim_end(), + ); +} diff --git a/tests/hardlinks_without_deduplication.rs b/tests/hardlinks_without_deduplication.rs new file mode 100644 index 00000000..27cfc2ce --- /dev/null +++ b/tests/hardlinks_without_deduplication.rs @@ -0,0 +1,491 @@ +#![cfg(unix)] // This feature is not available in Windows +#![cfg(feature = "cli")] + +pub mod _utils; +pub use _utils::*; + +use command_extra::CommandExtra; +use parallel_disk_usage::{ + data_tree::Reflection, + json_data::{JsonData, JsonTree}, + size::Bytes, +}; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::{ + ops::{Add, Mul}, + process::{Command, Stdio}, +}; + +fn stdio(command: Command) -> Command { + command + .with_stdin(Stdio::null()) + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) +} + +#[test] +fn simple_tree_with_some_hardlinks() { + let sizes = [200_000, 220_000, 310_000, 110_000, 210_000]; + let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes); + + let mut tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); + let tree = tree; + + let file_size = |name: &str| { + workspace + .join("main/sources") + .join(name) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size = Bytes::new(0) + + inode_size("main") + + inode_size("main/sources") + + inode_size("main/internal-hardlinks") + + file_size("no-hardlinks.txt") + + 2usize * file_size("one-internal-hardlink.txt") + + 3usize * file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + 2usize * file_size("one-internal-one-external-hardlinks.txt"); + assert_eq!(actual_size, expected_size); + + let actual_tree = &tree.tree; + let mut expected_tree = Reflection { + name: "main".to_string(), + size: expected_size, + children: vec![ + Reflection { + name: "sources".to_string(), + size: inode_size("main/sources") + + file_size("no-hardlinks.txt") + + file_size("one-internal-hardlink.txt") + + file_size("two-internal-hardlinks.txt") + + file_size("one-external-hardlink.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + children: vec![ + Reflection { + name: "no-hardlinks.txt".to_string(), + size: file_size("no-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-internal-hardlink.txt".to_string(), + size: file_size("one-internal-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "two-internal-hardlinks.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-external-hardlink.txt".to_string(), + size: file_size("one-external-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "one-internal-one-external-hardlinks.txt".to_string(), + size: file_size("one-internal-one-external-hardlinks.txt"), + children: Vec::new(), + }, + ], + }, + Reflection { + name: "internal-hardlinks".to_string(), + size: inode_size("main/internal-hardlinks") + + file_size("one-internal-hardlink.txt") + + 2usize * file_size("two-internal-hardlinks.txt") + + file_size("one-internal-one-external-hardlinks.txt"), + children: vec![ + Reflection { + name: "link-0.txt".to_string(), + size: file_size("one-internal-hardlink.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-1a.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-1b.txt".to_string(), + size: file_size("two-internal-hardlinks.txt"), + children: Vec::new(), + }, + Reflection { + name: "link-3a.txt".to_string(), + size: file_size("one-internal-one-external-hardlinks.txt"), + children: Vec::new(), + }, + ], + }, + ], + }; + sort_reflection_by(&mut expected_tree, |a, b| a.name.cmp(&b.name)); + assert_eq!(actual_tree, &expected_tree); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("main") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} + +#[test] +fn multiple_hardlinks_to_a_single_file() { + let links = 10; + let workspace = SampleWorkspace::multiple_hardlinks_to_a_single_file(100_000, links); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let actual_size = tree.size; + let expected_size = workspace + .join("file.txt") + .pipe_as_ref(read_apparent_size) + .mul(links + 1) + .add(read_apparent_size(&workspace)) + .pipe(Bytes::new); + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} + +#[test] +fn complex_tree_with_shared_and_unique_files() { + let files_per_branch = 2 * 3 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("no-hardlinks/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size: Bytes = [ + inode_size("."), + inode_size("no-hardlinks"), + inode_size("some-hardlinks"), + inode_size("only-hardlinks"), + inode_size("only-hardlinks/exclusive"), + inode_size("only-hardlinks/mixed"), + inode_size("only-hardlinks/external"), + file_size * files_per_branch, // no-hardlinks/* + file_size + * [ + 3 * files_per_branch / 8, + 2 * files_per_branch / 8, + files_per_branch / 8, + files_per_branch / 8, + files_per_branch * 4 / 8, + ] + .into_iter() + .sum::(), // some-hardlinks/* + file_size * (2 * files_per_branch), // only-hardlinks/exclusive/* + file_size * (files_per_branch / 2 + 2 * files_per_branch / 2), // only-hardlinks/mixed/* + file_size * files_per_branch, // only-hardlinks/external/* + ] + .into_iter() + .sum(); + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); +} + +#[test] +fn hardlinks_and_non_hardlinks() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("some-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("some-hardlinks/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size = [ + inode_size("some-hardlinks"), + file_size * files_per_branch, // file-{index}.txt + file_size * (2usize + 1usize), // link0-file0.txt, link1-file0.txt, link0-file1.txt + ] + .into_iter() + .sum(); + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("some-hardlinks") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} + +#[test] +fn exclusive_hardlinks_only() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("only-hardlinks/exclusive") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/exclusive/file-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size = + inode_size("only-hardlinks/exclusive") + 2usize * file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("only-hardlinks/exclusive") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} + +#[test] +fn exclusive_only_and_external_only_hardlinks() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("only-hardlinks/mixed") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/mixed/link0-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size = + inode_size("only-hardlinks/mixed") + file_size * (files_per_branch + files_per_branch / 2); + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("only-hardlinks/mixed") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} + +#[test] +fn external_hardlinks_only() { + let files_per_branch = 2 * 4; + let workspace = + SampleWorkspace::complex_tree_with_shared_and_unique_files(files_per_branch, 100_000); + + let tree = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--min-ratio=0") + .with_arg("--quantity=apparent-size") + .with_arg("--json-output") + .with_arg("only-hardlinks/external") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text) + .pipe_as_ref(serde_json::from_str::) + .expect("parse stdout as JsonData") + .body + .pipe(JsonTree::::try_from) + .expect("get tree of bytes"); + + let file_size = workspace + .join("only-hardlinks/external/linkX-0.txt") + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new); + + let inode_size = |path: &str| { + workspace + .join(path) + .pipe_as_ref(read_apparent_size) + .pipe(Bytes::new) + }; + + let actual_size = tree.size; + let expected_size = inode_size("only-hardlinks/external") + file_size * files_per_branch; + assert_eq!(actual_size, expected_size); + + assert_eq!(tree.shared.details, None); + assert_eq!(tree.shared.summary, None); + + let visualization = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("only-hardlinks/external") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("STDOUT:\n{visualization}"); + assert!(!visualization.contains("Hardlinks detected!")); +} diff --git a/tests/json.rs b/tests/json.rs index 35a0a5e8..95aee0b9 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -7,10 +7,11 @@ pub use _utils::*; use command_extra::CommandExtra; use parallel_disk_usage::{ bytes_format::BytesFormat, - data_tree::{DataTree, Reflection}, + data_tree::DataTree, fs_tree_builder::FsTreeBuilder, get_size::GetApparentSize, - json_data::{JsonData, SchemaVersion}, + hardlink::HardlinkIgnorant, + json_data::{JsonData, JsonTree, SchemaVersion}, reporter::{ErrorOnlyReporter, ErrorReport}, size::Bytes, visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, @@ -24,9 +25,9 @@ use std::{ }; type SampleName = String; -type SampleData = Bytes; -type SampleReflection = Reflection; -type SampleTree = DataTree; +type SampleSize = Bytes; +type SampleJsonTree = JsonTree; +type SampleTree = DataTree; fn sample_tree() -> SampleTree { let dir = |name: &'static str, children: Vec| { @@ -73,15 +74,18 @@ fn json_output() { .pipe(stdout_text) .pipe_as_ref(serde_json::from_str::) .expect("parse stdout as JsonData") - .unit_and_tree - .pipe(TryInto::::try_into) + .body + .pipe(TryInto::::try_into) .expect("extract reflection") + .tree .pipe(sanitize_tree_reflection); dbg!(&actual); let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let expected = builder .pipe(DataTree::<_, Bytes>::from) @@ -95,10 +99,14 @@ fn json_output() { #[test] fn json_input() { + let json_tree = JsonTree { + tree: sample_tree().into_reflection(), + shared: Default::default(), + }; let json_data = JsonData { schema_version: SchemaVersion, binary_version: None, - unit_and_tree: sample_tree().into_reflection().into(), + body: json_tree.into(), }; let json = serde_json::to_string_pretty(&json_data).expect("convert sample tree to JSON"); eprintln!("JSON: {json}\n"); @@ -133,7 +141,6 @@ fn json_input() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -146,7 +153,7 @@ fn json_input() { fn json_output_json_input() { let workspace = SampleWorkspace::default(); - let json_output = Command::new(PDU) + let mut json_output = Command::new(PDU) .with_current_dir(&workspace) .with_arg("--json-output") .with_arg("--quantity=apparent-size") @@ -165,6 +172,7 @@ fn json_output_json_input() { .with_stdin( json_output .stdout + .take() .expect("get stdout of command with --json-output") .into(), ) @@ -191,4 +199,9 @@ fn json_output_json_input() { eprintln!("EXPECTED:\n{expected}\n"); assert_eq!(actual, expected); + + let json_output_status = json_output + .wait() + .expect("wait for the command with --json-output to terminate"); + assert!(json_output_status.success()); } diff --git a/tests/tree_builder.rs b/tests/tree_builder.rs index ae7d3bfe..cd1951c2 100644 --- a/tests/tree_builder.rs +++ b/tests/tree_builder.rs @@ -58,6 +58,7 @@ impl SampleTree { } }, join_path: |prefix, name| format!("{prefix}{SAMPLE_SEPARATOR}{name}"), + max_depth: 10, } .pipe(DataTree::from) .into_par_sorted(|left, right| left.name().as_str().cmp(right.name().as_str())) diff --git a/tests/usual_cli.rs b/tests/usual_cli.rs index 4cd6a26a..cbacb9f2 100644 --- a/tests/usual_cli.rs +++ b/tests/usual_cli.rs @@ -8,17 +8,18 @@ use parallel_disk_usage::{ bytes_format::BytesFormat, data_tree::DataTree, fs_tree_builder::FsTreeBuilder, - get_size::{GetApparentSize, GetBlockCount, GetBlockSize}, + get_size::GetApparentSize, + hardlink::HardlinkIgnorant, os_string_display::OsStringDisplay, reporter::{ErrorOnlyReporter, ErrorReport}, visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, }; use pipe_trait::Pipe; use pretty_assertions::assert_eq; -use std::{ - convert::TryInto, - process::{Command, Stdio}, -}; +use std::process::{Command, Stdio}; + +#[cfg(unix)] +use parallel_disk_usage::get_size::{GetBlockCount, GetBlockSize}; fn stdio(command: Command) -> Command { command @@ -42,7 +43,9 @@ fn total_width() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: DEFAULT_GET_SIZE, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -54,7 +57,6 @@ fn total_width() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -80,7 +82,9 @@ fn column_width() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: DEFAULT_GET_SIZE, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -92,7 +96,6 @@ fn column_width() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::components(10, 90), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -118,7 +121,9 @@ fn min_ratio_0() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse()); @@ -129,7 +134,6 @@ fn min_ratio_0() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -155,7 +159,9 @@ fn min_ratio() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.1); @@ -167,7 +173,6 @@ fn min_ratio() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -193,7 +198,9 @@ fn max_depth_2() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 2, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -205,7 +212,6 @@ fn max_depth_2() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 2.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -231,7 +237,9 @@ fn max_depth_1() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 1, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -243,7 +251,6 @@ fn max_depth_1() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 1.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -268,7 +275,9 @@ fn top_down() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: DEFAULT_GET_SIZE, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -280,7 +289,6 @@ fn top_down() { direction: Direction::TopDown, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -305,7 +313,9 @@ fn align_right() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: DEFAULT_GET_SIZE, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -317,7 +327,6 @@ fn align_right() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Right, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -342,7 +351,9 @@ fn quantity_apparent_size() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -354,7 +365,6 @@ fn quantity_apparent_size() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -380,7 +390,9 @@ fn quantity_block_size() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetBlockSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -392,7 +404,6 @@ fn quantity_block_size() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -418,7 +429,9 @@ fn quantity_block_count() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: GetBlockCount, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -430,7 +443,6 @@ fn quantity_block_count() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -439,14 +451,13 @@ fn quantity_block_count() { assert_eq!(actual, expected); } -#[cfg(unix)] #[test] fn bytes_format_plain() { let workspace = SampleWorkspace::default(); let actual = Command::new(PDU) .with_current_dir(&workspace) .with_arg("--total-width=100") - .with_arg("--quantity=block-size") + .with_arg("--quantity=apparent-size") .with_arg("--bytes-format=plain") .pipe(stdio) .output() @@ -456,8 +467,10 @@ fn bytes_format_plain() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), - size_getter: GetBlockSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -469,7 +482,6 @@ fn bytes_format_plain() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -478,14 +490,13 @@ fn bytes_format_plain() { assert_eq!(actual, expected); } -#[cfg(unix)] #[test] fn bytes_format_metric() { let workspace = SampleWorkspace::default(); let actual = Command::new(PDU) .with_current_dir(&workspace) .with_arg("--total-width=100") - .with_arg("--quantity=block-size") + .with_arg("--quantity=apparent-size") .with_arg("--bytes-format=metric") .pipe(stdio) .output() @@ -495,8 +506,10 @@ fn bytes_format_metric() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), - size_getter: GetBlockSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -508,7 +521,6 @@ fn bytes_format_metric() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -517,14 +529,13 @@ fn bytes_format_metric() { assert_eq!(actual, expected); } -#[cfg(unix)] #[test] fn bytes_format_binary() { let workspace = SampleWorkspace::default(); let actual = Command::new(PDU) .with_current_dir(&workspace) .with_arg("--total-width=100") - .with_arg("--quantity=block-size") + .with_arg("--quantity=apparent-size") .with_arg("--bytes-format=binary") .pipe(stdio) .output() @@ -534,8 +545,10 @@ fn bytes_format_binary() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), - size_getter: GetBlockSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -547,7 +560,6 @@ fn bytes_format_binary() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -572,7 +584,9 @@ fn path_to_workspace() { let builder = FsTreeBuilder { root: workspace.to_path_buf(), size_getter: DEFAULT_GET_SIZE, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); data_tree.par_cull_insignificant_data(0.01); @@ -583,7 +597,6 @@ fn path_to_workspace() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); @@ -614,7 +627,9 @@ fn multiple_names() { let builder = FsTreeBuilder { root: workspace.to_path_buf().join(name), size_getter: GetApparentSize, - reporter: ErrorOnlyReporter::new(ErrorReport::SILENT), + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, }; let mut data_tree: DataTree = builder.into(); *data_tree.name_mut() = OsStringDisplay::os_string_from(name); @@ -635,11 +650,140 @@ fn multiple_names() { direction: Direction::BottomUp, bar_alignment: BarAlignment::Left, column_width_distribution: ColumnWidthDistribution::total(100), - max_depth: 10.try_into().unwrap(), }; let expected = format!("{visualizer}"); let expected = expected.trim_end(); eprintln!("EXPECTED:\n{expected}\n"); assert_eq!(actual, expected); + + let mut lines = actual.lines(); + assert!(lines.next().unwrap().contains("┌──1")); + assert!(lines.next().unwrap().contains("┌─┴0")); + assert!(lines.next().unwrap().contains("┌─┴nested")); + assert!(lines.next().unwrap().contains("│ ┌──1")); + assert!(lines.next().unwrap().contains("│ ├──2")); + assert!(lines.next().unwrap().contains("│ ├──3")); + assert!(lines.next().unwrap().contains("├─┴flat")); + assert!(lines.next().unwrap().contains("┌─┴(total)")); + assert_eq!(lines.next(), None); +} + +#[test] +fn multiple_names_max_depth_2() { + let workspace = SampleWorkspace::default(); + let actual = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--total-width=100") + .with_arg("--max-depth=2") + .with_arg("nested") + .with_arg("flat") + .with_arg("empty-dir") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("ACTUAL:\n{actual}\n"); + + let mut data_tree = ["nested", "flat", "empty-dir"] + .iter() + .map(|name| { + let builder = FsTreeBuilder { + root: workspace.to_path_buf().join(name), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 1, + }; + let mut data_tree: DataTree = builder.into(); + *data_tree.name_mut() = OsStringDisplay::os_string_from(name); + data_tree + }) + .pipe(|children| { + DataTree::dir( + OsStringDisplay::os_string_from("(total)"), + 0.into(), + children.collect(), + ) + }) + .into_par_sorted(|left, right| left.size().cmp(&right.size()).reverse()); + data_tree.par_cull_insignificant_data(0.01); + let visualizer = Visualizer:: { + data_tree: &data_tree, + bytes_format: BytesFormat::MetricUnits, + direction: Direction::BottomUp, + bar_alignment: BarAlignment::Left, + column_width_distribution: ColumnWidthDistribution::total(100), + }; + let expected = format!("{visualizer}"); + let expected = expected.trim_end(); + eprintln!("EXPECTED:\n{expected}\n"); + + assert_eq!(actual, expected); + + let mut lines = actual.lines(); + assert!(lines.next().unwrap().contains("┌──nested")); + assert!(lines.next().unwrap().contains("├──flat")); + assert!(lines.next().unwrap().contains("┌─┴(total)")); + assert_eq!(lines.next(), None); +} + +#[test] +fn multiple_names_max_depth_1() { + let workspace = SampleWorkspace::default(); + let actual = Command::new(PDU) + .with_current_dir(&workspace) + .with_arg("--quantity=apparent-size") + .with_arg("--total-width=100") + .with_arg("--max-depth=1") + .with_arg("nested") + .with_arg("flat") + .with_arg("empty-dir") + .pipe(stdio) + .output() + .expect("spawn command") + .pipe(stdout_text); + eprintln!("ACTUAL:\n{actual}\n"); + + let mut data_tree = ["nested", "flat", "empty-dir"] + .iter() + .map(|name| { + let builder = FsTreeBuilder { + root: workspace.to_path_buf().join(name), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + max_depth: 10, + }; + let mut data_tree: DataTree = builder.into(); + *data_tree.name_mut() = OsStringDisplay::os_string_from(name); + data_tree + }) + .pipe(|children| { + DataTree::dir( + OsStringDisplay::os_string_from("(total)"), + 0.into(), + children.collect(), + ) + }) + .into_par_retained(|_, _| false) + .into_par_sorted(|left, right| left.size().cmp(&right.size()).reverse()); + data_tree.par_cull_insignificant_data(0.01); + let visualizer = Visualizer:: { + data_tree: &data_tree, + bytes_format: BytesFormat::MetricUnits, + direction: Direction::BottomUp, + bar_alignment: BarAlignment::Left, + column_width_distribution: ColumnWidthDistribution::total(100), + }; + let expected = format!("{visualizer}"); + let expected = expected.trim_end(); + eprintln!("EXPECTED:\n{expected}\n"); + + assert_eq!(actual, expected); + + let mut lines = actual.lines(); + assert!(lines.next().unwrap().contains("┌──(total)")); + assert_eq!(lines.next(), None); } diff --git a/tests/visualizer.rs b/tests/visualizer.rs index e693f28b..8c839f26 100644 --- a/tests/visualizer.rs +++ b/tests/visualizer.rs @@ -1,5 +1,3 @@ -#![recursion_limit = "256"] - use parallel_disk_usage::{ bytes_format::BytesFormat::*, data_tree::DataTree, @@ -7,7 +5,7 @@ use parallel_disk_usage::{ visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, }; use pretty_assertions::assert_eq; -use std::{cmp::Ordering, num::NonZeroUsize}; +use std::cmp::Ordering; use text_block_macros::text_block_fnl; use zero_copy_pads::Width; @@ -33,12 +31,11 @@ macro_rules! test_case { $(#[$attributes])* #[test] fn $name() { - let tree = $tree; + let mut tree = $tree; let column_width_distribution = ColumnWidthDistribution::$column_width_function($($column_width_arguments),+); - let max_depth = NonZeroUsize::new($max_depth).expect("non-zero max_depth"); + tree.par_retain(|_, depth| depth + 1 < $max_depth); let actual = Visualizer { - max_depth, column_width_distribution, data_tree: &tree, bytes_format: $bytes_format,