diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f0e5ca..8343add 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,9 @@ jobs: - uses: dtolnay/rust-toolchain@stable - name: install GNU patch on MacOS if: runner.os == 'macOS' - run: brew install gpatch + run: | + brew install gpatch + echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH" - name: set up PATH on Windows # Needed to use GNU's patch.exe instead of Strawberry Perl patch if: runner.os == 'Windows' @@ -102,8 +104,6 @@ jobs: if [ -n "${{ matrix.job.toolchain }}" ]; then TOOLCHAIN="${{ matrix.job.toolchain }}" ; fi outputs TOOLCHAIN # target-specific options - # * CARGO_FEATURES_OPTION - CARGO_FEATURES_OPTION='--all -- --check' ; ## default to '--all-features' for code coverage # * CODECOV_FLAGS CODECOV_FLAGS=$( echo "${{ matrix.job.os }}" | sed 's/[^[:alnum:]]/_/g' ) outputs CODECOV_FLAGS @@ -113,13 +113,15 @@ jobs: - run: rustup component add llvm-tools-preview - name: install GNU patch on MacOS if: runner.os == 'macOS' - run: brew install gpatch + run: | + brew install gpatch + echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH" - name: set up PATH on Windows # Needed to use GNU's patch.exe instead of Strawberry Perl patch if: runner.os == 'Windows' run: echo "C:\Program Files\Git\usr\bin" >> $env:GITHUB_PATH - name: Test - run: cargo test ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-fail-fast + run: cargo test --all-features --no-fail-fast env: CARGO_INCREMENTAL: "0" RUSTC_WRAPPER: "" @@ -158,10 +160,10 @@ jobs: grcov . --output-type lcov --output-path "${COVERAGE_REPORT_FILE}" --binary-path "${COVERAGE_REPORT_DIR}" --branch echo "report=${COVERAGE_REPORT_FILE}" >> $GITHUB_OUTPUT - name: Upload coverage results (to Codecov.io) - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ${{ steps.coverage.outputs.report }} + files: ${{ steps.coverage.outputs.report }} ## flags: IntegrationTests, UnitTests, ${{ steps.vars.outputs.CODECOV_FLAGS }} flags: ${{ steps.vars.outputs.CODECOV_FLAGS }} name: codecov-umbrella diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 589b952..8346e49 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -41,9 +41,12 @@ jobs: strategy: matrix: test-target: + - { name: fuzz_cmp, should_pass: true } + - { name: fuzz_cmp_args, should_pass: true } - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } + - { name: fuzz_side, should_pass: true } steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly diff --git a/Cargo.lock b/Cargo.lock index 974fba9..e437e99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -34,9 +34,9 @@ checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "assert_cmd" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" +checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" dependencies = [ "anstyle", "bstr", @@ -91,16 +91,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets", + "windows-link", ] [[package]] @@ -128,6 +128,7 @@ dependencies = [ "assert_cmd", "chrono", "diff", + "itoa", "predicates", "pretty_assertions", "regex", @@ -144,29 +145,41 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] name = "fastrand" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "float-cmp" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" dependencies = [ "num-traits", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi", + "windows-targets", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -190,6 +203,12 @@ dependencies = [ "cc", ] +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "js-sys" version = "0.3.69" @@ -201,15 +220,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.153" +version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" [[package]] name = "log" @@ -246,9 +265,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "predicates" -version = "3.1.2" +version = "3.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" dependencies = [ "anstyle", "difflib", @@ -276,9 +295,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -304,9 +323,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -316,9 +335,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.5" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -327,21 +346,21 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "0.38.31" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "17f8dcd64f141950290e45c99f7710ede1b600297c91818bb30b3667c0f45dc0" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -386,15 +405,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.12.0" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ - "cfg-if", "fastrand", + "getrandom", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -411,9 +430,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" [[package]] name = "wait-timeout" @@ -424,6 +443,15 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.92" @@ -519,13 +547,10 @@ dependencies = [ ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "windows-link" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] +checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" [[package]] name = "windows-sys" @@ -600,8 +625,17 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] + [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/Cargo.toml b/Cargo.toml index 761e703..6fa1a3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,9 +17,10 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" diff = "0.1.13" +itoa = "1.0.11" regex = "1.10.4" same-file = "1.0.6" -unicode-width = "0.1.12" +unicode-width = "0.2.0" [dev-dependencies] pretty_assertions = "1.4.0" diff --git a/README.md b/README.md index 552df09..fae06d6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![CodeCov](https://codecov.io/gh/uutils/diffutils/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/diffutils) -The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) in Rust. +The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) (diff, cmp, diff3, sdiff) in Rust. Based on the incomplete diff generator in https://github.com/rust-lang/rust/blob/master/src/tools/compiletest/src/runtest.rs, and made to be compatible with GNU's diff and patch tools. diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 5debf47..39efd70 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,18 @@ diffutils = { path = "../" } [workspace] members = ["."] +[[bin]] +name = "fuzz_cmp" +path = "fuzz_targets/fuzz_cmp.rs" +test = false +doc = false + +[[bin]] +name = "fuzz_cmp_args" +path = "fuzz_targets/fuzz_cmp_args.rs" +test = false +doc = false + [[bin]] name = "fuzz_patch" path = "fuzz_targets/fuzz_patch.rs" @@ -35,4 +47,8 @@ path = "fuzz_targets/fuzz_ed.rs" test = false doc = false - +[[bin]] +name = "fuzz_side" +path = "fuzz_targets/fuzz_side.rs" +test = false +doc = false \ No newline at end of file diff --git a/fuzz/dictionaries/cmp.txt b/fuzz/dictionaries/cmp.txt new file mode 100644 index 0000000..0365fef --- /dev/null +++ b/fuzz/dictionaries/cmp.txt @@ -0,0 +1,36 @@ +"-l" +"--verbose" +"-b" +"--print-bytes" +"-lb" +"-bl" +"-n" +"--bytes" +"--bytes=" +"--bytes=1024" +"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-i" +"--ignore-initial" +"--ignore-initial=" +"--ignore-initial=1024" +"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-s" +"-q" +"--quiet" +"--silent" +"-" +"--" +"1kB" +"1G" +"1GB" +"1T" +"1TB" +"1P" +"1PB" +"1Z" +"1ZB" +"1Y" +"1YB" +"1Y" +"0" +"1:2" diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs new file mode 100644 index 0000000..e9d0e4c --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp::{self, Cmp}; + +use std::ffi::OsString; +use std::fs::File; +use std::io::Write; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: (Vec, Vec)| { + let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] + .into_iter() + .map(|s| os(s)) + .peekable(); + + let (from, to) = x; + + File::create("target/fuzz.cmp.a") + .unwrap() + .write_all(&from) + .unwrap(); + + File::create("target/fuzz.cmp.b") + .unwrap() + .write_all(&to) + .unwrap(); + + let params = + cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = cmp::cmp(¶ms); + if from == to && !matches!(ret, Ok(Cmp::Equal)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", + ret + ); + } else if from != to && !matches!(ret, Ok(Cmp::Different)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.", + ret + ); + } else if ret.is_err() { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).", + ret + ); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs new file mode 100644 index 0000000..579cf34 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -0,0 +1,23 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp; + +use libfuzzer_sys::Corpus; +use std::ffi::OsString; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: Vec| -> Corpus { + if x.len() > 6 { + // Make sure we try to parse an option when we get longer args. x[0] will be + // the executable name. + if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) { + return Corpus::Reject; + } + } + let _ = cmp::parse_params(x.into_iter().peekable()); + Corpus::Keep +}); diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs new file mode 100644 index 0000000..8a69c07 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -0,0 +1,42 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; + +use diffutilslib::side_diff; + +use std::fs::File; +use std::io::Write; +use diffutilslib::params::Params; + +fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { + let (original, new, /* width, tabsize, */ expand) = x; + + // if width == 0 || tabsize == 0 { + // return; + // } + + let params = Params { + // width, + // tabsize, + expand_tabs: expand, + ..Default::default() + }; + let mut output_buf = vec![]; + side_diff::diff(&original, &new, &mut output_buf, ¶ms); + File::create("target/fuzz.file.original") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.file.new") + .unwrap() + .write_all(&new) + .unwrap(); + File::create("target/fuzz.file") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.diff") + .unwrap() + .write_all(&output_buf) + .unwrap(); +}); \ No newline at end of file diff --git a/src/cmp.rs b/src/cmp.rs new file mode 100644 index 0000000..876c3ca --- /dev/null +++ b/src/cmp.rs @@ -0,0 +1,1175 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::format_failure_to_read_input_file; +use std::env::{self, ArgsOs}; +use std::ffi::OsString; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::iter::Peekable; +use std::process::ExitCode; +use std::{cmp, fs, io}; + +#[cfg(not(target_os = "windows"))] +use std::os::fd::{AsRawFd, FromRawFd}; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::MetadataExt; + +#[cfg(target_os = "windows")] +use std::os::windows::fs::MetadataExt; + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Params { + executable: OsString, + from: OsString, + to: OsString, + print_bytes: bool, + skip_a: Option, + skip_b: Option, + max_bytes: Option, + verbose: bool, + quiet: bool, +} + +#[inline] +fn usage_string(executable: &str) -> String { + format!("Usage: {executable} ") +} + +#[cfg(not(target_os = "windows"))] +fn is_stdout_dev_null() -> bool { + let Ok(dev_null) = fs::metadata("/dev/null") else { + return false; + }; + + let stdout_fd = io::stdout().lock().as_raw_fd(); + + // SAFETY: we have exclusive access to stdout right now. + let stdout_file = unsafe { fs::File::from_raw_fd(stdout_fd) }; + let Ok(stdout) = stdout_file.metadata() else { + return false; + }; + + let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); + + // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). + std::mem::forget(stdout_file); + + is_dev_null +} + +pub fn parse_params>(mut opts: Peekable) -> Result { + let Some(executable) = opts.next() else { + return Err("Usage: ".to_string()); + }; + let executable_str = executable.to_string_lossy().to_string(); + + let parse_skip = |param: &str, skip_desc: &str| -> Result { + let suffix_start = param + .find(|b: char| !b.is_ascii_digit()) + .unwrap_or(param.len()); + let mut num = match param[..suffix_start].parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" + )) + } + }; + + if suffix_start != param.len() { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let multiplier: usize = match ¶m[suffix_start..] { + "kB" => 1_000, + "K" => 1_024, + "MB" => 1_000_000, + "M" => 1_048_576, + "GB" => 1_000_000_000, + "G" => 1_073_741_824, + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, + "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, + "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, + "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, + _ => { + return Err(format!( + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" + )); + } + }; + + num = match num.overflowing_mul(multiplier) { + (n, false) => n, + _ => usize::MAX, + } + } + + Ok(num) + }; + + let mut params = Params { + executable, + ..Default::default() + }; + let mut from = None; + let mut to = None; + let mut skip_pos1 = None; + let mut skip_pos2 = None; + while let Some(param) = opts.next() { + if param == "--" { + break; + } + if param == "-" { + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(usage_string(&executable_str)); + } + continue; + } + if param == "-b" || param == "--print-bytes" { + params.print_bytes = true; + continue; + } + if param == "-l" || param == "--verbose" { + params.verbose = true; + continue; + } + if param == "-lb" || param == "-bl" { + params.print_bytes = true; + params.verbose = true; + continue; + } + + let param_str = param.to_string_lossy().to_string(); + if param == "-n" || param_str.starts_with("--bytes=") { + let max_bytes = if param == "-n" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let max_bytes = match max_bytes.parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{executable_str}: invalid --bytes value '{max_bytes}'" + )) + } + }; + params.max_bytes = Some(max_bytes); + continue; + } + if param == "-i" || param_str.starts_with("--ignore-initial=") { + let skip_desc = if param == "-i" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let (skip_a, skip_b) = if let Some((skip_a, skip_b)) = skip_desc.split_once(':') { + ( + parse_skip(skip_a, &skip_desc)?, + parse_skip(skip_b, &skip_desc)?, + ) + } else { + let skip = parse_skip(&skip_desc, &skip_desc)?; + (skip, skip) + }; + params.skip_a = Some(skip_a); + params.skip_b = Some(skip_b); + continue; + } + if param == "-s" || param == "--quiet" || param == "--silent" { + params.quiet = true; + continue; + } + if param == "--help" { + println!("{}", usage_string(&executable_str)); + std::process::exit(0); + } + if param_str.starts_with('-') { + return Err(format!("Unknown option: {param:?}")); + } + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else if skip_pos1.is_none() { + skip_pos1 = Some(parse_skip(¶m_str, ¶m_str)?); + } else if skip_pos2.is_none() { + skip_pos2 = Some(parse_skip(¶m_str, ¶m_str)?); + } else { + return Err(usage_string(&executable_str)); + } + } + + // Do as GNU cmp, and completely disable printing if we are + // outputing to /dev/null. + #[cfg(not(target_os = "windows"))] + if is_stdout_dev_null() { + params.quiet = true; + params.verbose = false; + params.print_bytes = false; + } + + if params.quiet && params.verbose { + return Err(format!( + "{executable_str}: options -l and -s are incompatible" + )); + } + + params.from = if let Some(from) = from { + from + } else if let Some(param) = opts.next() { + param + } else { + return Err(usage_string(&executable_str)); + }; + params.to = if let Some(to) = to { + to + } else if let Some(param) = opts.next() { + param + } else { + OsString::from("-") + }; + + // GNU cmp ignores positional skip arguments if -i is provided. + if params.skip_a.is_none() { + if skip_pos1.is_some() { + params.skip_a = skip_pos1; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_a = Some(parse_skip(¶m_str, ¶m_str)?); + } + }; + if params.skip_b.is_none() { + if skip_pos2.is_some() { + params.skip_b = skip_pos2; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_b = Some(parse_skip(¶m_str, ¶m_str)?); + } + } + + Ok(params) +} + +fn prepare_reader( + path: &OsString, + skip: &Option, + params: &Params, +) -> Result, String> { + let mut reader: Box = if path == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + match fs::File::open(path) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + }; + + if let Some(skip) = skip { + if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + + Ok(reader) +} + +#[derive(Debug)] +pub enum Cmp { + Equal, + Different, +} + +pub fn cmp(params: &Params) -> Result { + let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; + let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; + + let mut offset_width = params.max_bytes.unwrap_or(usize::MAX); + + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + let (a_size, b_size) = (a_meta.size(), b_meta.size()); + + #[cfg(target_os = "windows")] + let (a_size, b_size) = (a_meta.file_size(), b_meta.file_size()); + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.quiet && a_size != b_size { + return Ok(Cmp::Different); + } + + let smaller = cmp::min(a_size, b_size) as usize; + offset_width = cmp::min(smaller, offset_width); + } + + let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize; + + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces + let mut output = Vec::::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4); + + let mut at_byte = 1; + let mut at_line = 1; + let mut start_of_line = true; + let mut stdout = BufWriter::new(io::stdout().lock()); + let mut compare = Cmp::Equal; + loop { + // Fill up our buffers. + let from_buf = match from.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.from, + &e, + )); + } + }; + + let to_buf = match to.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.to, + &e, + )); + } + }; + + // Check for EOF conditions. + if from_buf.is_empty() && to_buf.is_empty() { + break; + } + + if from_buf.is_empty() || to_buf.is_empty() { + let eof_on = if from_buf.is_empty() { + ¶ms.from.to_string_lossy() + } else { + ¶ms.to.to_string_lossy() + }; + + report_eof(at_byte, at_line, start_of_line, eof_on, params); + return Ok(Cmp::Different); + } + + // Fast path - for long files in which almost all bytes are the same we + // can do a direct comparison to let the compiler optimize. + let consumed = std::cmp::min(from_buf.len(), to_buf.len()); + if from_buf[..consumed] == to_buf[..consumed] { + let last = from_buf[..consumed].last().unwrap(); + + at_byte += consumed; + at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); + + start_of_line = *last == b'\n'; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + + from.consume(consumed); + to.consume(consumed); + + continue; + } + + // Iterate over the buffers, the zip iterator will stop us as soon as the + // first one runs out. + for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { + if from_byte != to_byte { + compare = Cmp::Different; + + if params.verbose { + format_verbose_difference( + from_byte, + to_byte, + at_byte, + offset_width, + &mut output, + params, + )?; + stdout.write_all(output.as_slice()).map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + output.clear(); + } else { + report_difference(from_byte, to_byte, at_byte, at_line, params); + return Ok(Cmp::Different); + } + } + + start_of_line = from_byte == b'\n'; + if start_of_line { + at_line += 1; + } + + at_byte += 1; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + } + + // Notify our readers about the bytes we went over. + from.consume(consumed); + to.consume(consumed); + } + + Ok(compare) +} + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-cmp.html +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = match parse_params(opts) { + Ok(param) => param, + Err(e) => { + eprintln!("{e}"); + return ExitCode::from(2); + } + }; + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + match cmp(¶ms) { + Ok(Cmp::Equal) => ExitCode::SUCCESS, + Ok(Cmp::Different) => ExitCode::from(1), + Err(e) => { + if !params.quiet { + eprintln!("{e}"); + } + ExitCode::from(2) + } + } +} + +#[inline] +fn is_ascii_printable(byte: u8) -> bool { + let c = byte as char; + c.is_ascii() && !c.is_ascii_control() +} + +#[inline] +fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { + *buf = [b' ', b' ', b'0']; + + let mut num = byte; + let mut idx = 2; // Start at the last position in the buffer + + // Generate octal digits + while num > 0 { + buf[idx] = b'0' + num % 8; + num /= 8; + idx = idx.saturating_sub(1); + } + + // SAFETY: the operations we do above always land within ascii range. + unsafe { std::str::from_utf8_unchecked(&buf[..]) } +} + +#[inline] +fn format_byte(byte: u8) -> String { + let mut byte = byte; + let mut quoted = vec![]; + + if !is_ascii_printable(byte) { + if byte >= 128 { + quoted.push(b'M'); + quoted.push(b'-'); + byte -= 128; + } + + if byte < 32 { + quoted.push(b'^'); + byte += 64; + } else if byte == 127 { + quoted.push(b'^'); + byte = b'?'; + } + assert!((byte as char).is_ascii()); + } + + quoted.push(byte); + + // SAFETY: the checks and shifts we do above match what cat and GNU + // cmp do to ensure characters fall inside the ascii range. + unsafe { String::from_utf8_unchecked(quoted) } +} + +// This function has been optimized to not use the Rust fmt system, which +// leads to a massive speed up when processing large files: cuts the time +// for comparing 2 ~36MB completely different files in half on an M1 Max. +#[inline] +fn format_verbose_difference( + from_byte: u8, + to_byte: u8, + at_byte: usize, + offset_width: usize, + output: &mut Vec, + params: &Params, +) -> Result<(), String> { + assert!(!params.quiet); + + let mut at_byte_buf = itoa::Buffer::new(); + let mut from_oct = [0u8; 3]; // for octal conversions + let mut to_oct = [0u8; 3]; + + if params.print_bytes { + // "{:>width$} {:>3o} {:4} {:>3o} {}", + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width.saturating_sub(at_byte_str.len()); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + let from_byte_str = format_byte(from_byte); + let from_byte_padding = 4 - from_byte_str.len(); + + output.extend_from_slice(from_byte_str.as_bytes()); + + for _ in 0..from_byte_padding { + output.push(b' ') + } + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_byte(to_byte).as_bytes()); + + output.push(b'\n'); + } else { + // "{:>width$} {:>3o} {:>3o}" + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b'\n'); + } + + Ok(()) +} + +#[inline] +fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { + if params.quiet { + return; + } + + if at_byte == 1 { + eprintln!( + "{}: EOF on '{}' which is empty", + params.executable.to_string_lossy(), + eof_on + ); + } else if params.verbose { + eprintln!( + "{}: EOF on '{}' after byte {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + ); + } else if start_of_line { + eprintln!( + "{}: EOF on '{}' after byte {}, line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line - 1 + ); + } else { + eprintln!( + "{}: EOF on '{}' after byte {}, in line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line + ); + } +} + +fn is_posix_locale() -> bool { + let locale = if let Ok(locale) = env::var("LC_ALL") { + locale + } else if let Ok(locale) = env::var("LC_MESSAGES") { + locale + } else if let Ok(locale) = env::var("LANG") { + locale + } else { + "C".to_string() + }; + + locale == "C" || locale == "POSIX" +} + +#[inline] +fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { + if params.quiet { + return; + } + + let term = if is_posix_locale() && !params.print_bytes { + "char" + } else { + "byte" + }; + print!( + "{} {} differ: {term} {}, line {}", + ¶ms.from.to_string_lossy(), + ¶ms.to.to_string_lossy(), + at_byte, + at_line + ); + if params.print_bytes { + let char_width = if to_byte >= 0x7F { 2 } else { 1 }; + print!( + " is {:>3o} {:char_width$} {:>3o} {:char_width$}", + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte) + ); + } + println!(); +} + +#[cfg(test)] +mod tests { + use super::*; + fn os(s: &str) -> OsString { + OsString::from(s) + } + + #[test] + fn positional() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo"), os("bar")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("-"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("--help"), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("--"), os("--help")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: None, + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2Y")] + .iter() + .cloned() + .peekable() + ) + ); + + // Bad positional arguments. + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2"), os("3")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params([os("cmp")].iter().cloned().peekable()) + ); + } + + #[test] + fn execution_modes() { + let print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + ..Default::default() + }; + assert_eq!( + Ok(print_bytes.clone()), + parse_params( + [os("cmp"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(print_bytes), + parse_params( + [os("cmp"), os("--print-bytes"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose.clone()), + parse_params( + [os("cmp"), os("-l"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose), + parse_params( + [os("cmp"), os("--verbose"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose_and_print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-l"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-lb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes), + parse_params( + [os("cmp"), os("-bl"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + quiet: true, + ..Default::default() + }), + parse_params( + [os("cmp"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + // Some options do not mix. + assert_eq!( + Err("cmp: options -l and -s are incompatible".to_string()), + parse_params( + [os("cmp"), os("-l"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn max_bytes() { + let max_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(max_bytes.clone()), + parse_params( + [os("cmp"), os("-n"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(max_bytes), + parse_params( + [os("cmp"), os("--bytes=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--bytes=99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure case + assert_eq!( + Err("cmp: invalid --bytes value '1K'".to_string()), + parse_params( + [os("cmp"), os("--bytes=1K"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn skips() { + let skips = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(skips.clone()), + parse_params( + [os("cmp"), os("-i"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(skips), + parse_params( + [os("cmp"), os("--ignore-initial=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(usize::MAX), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [os("cmp"), os("--ignore-initial=1:2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1_000_000_000), + skip_b: Some(1_152_921_504_606_846_976 * 2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1GB:2E"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // All special suffixes. + for (i, suffixes) in [ + ["kB", "K"], + ["MB", "M"], + ["GB", "G"], + ["TB", "T"], + ["PB", "P"], + ["EB", "E"], + ["ZB", "Z"], + ["YB", "Y"], + ] + .iter() + .enumerate() + { + let values = [ + 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + ]; + for (j, v) in values.iter().enumerate() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(*v), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os(&format!("1{}:2", suffixes[j])), + os("foo"), + os("bar"), + ] + .iter() + .cloned() + .peekable() + ) + ); + } + } + + // Ignores positional arguments when -i is provided. + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("1:2"), + os("foo"), + os("bar"), + os("3"), + os("4") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure cases + assert_eq!( + Err("cmp: invalid --ignore-initial value '1mb'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=1mb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '1:2:3'".to_string()), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1:2:3"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '-1'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } +} diff --git a/src/diff.rs b/src/diff.rs new file mode 100644 index 0000000..f4c0614 --- /dev/null +++ b/src/diff.rs @@ -0,0 +1,102 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::params::{parse_params, Format}; +use crate::utils::report_failure_to_read_input_file; +use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; +use std::env::ArgsOs; +use std::ffi::OsString; +use std::fs; +use std::io::{self, stdout, Read, Write}; +use std::iter::Peekable; +use std::process::{exit, ExitCode}; + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = parse_params(opts).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }); + // if from and to are the same file, no need to perform any comparison + let maybe_report_identical_files = || { + if params.report_identical_files { + println!( + "Files {} and {} are identical", + params.from.to_string_lossy(), + params.to.to_string_lossy(), + ); + } + }; + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + maybe_report_identical_files(); + return ExitCode::SUCCESS; + } + + // read files + fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } + } + let mut io_error = false; + let from_content = match read_file_contents(¶ms.from) { + Ok(from_content) => from_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); + io_error = true; + vec![] + } + }; + let to_content = match read_file_contents(¶ms.to) { + Ok(to_content) => to_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); + io_error = true; + vec![] + } + }; + if io_error { + return ExitCode::from(2); + } + + // run diff + let result: Vec = match params.format { + Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), + Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), + Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), + Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }), + Format::SideBySide => { + let mut output = stdout().lock(); + side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + } + }; + if params.brief && !result.is_empty() { + println!( + "Files {} and {} differ", + params.from.to_string_lossy(), + params.to.to_string_lossy() + ); + } else { + io::stdout().write_all(&result).unwrap(); + } + if result.is_empty() { + maybe_report_identical_files(); + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} diff --git a/src/lib.rs b/src/lib.rs index 0bb911b..342b01c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ +pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod side_diff; pub mod unified_diff; pub mod utils; @@ -10,4 +12,5 @@ pub mod utils; pub use context_diff::diff as context_diff; pub use ed_diff::diff as ed_diff; pub use normal_diff::diff as normal_diff; +pub use side_diff::diff as side_by_side_diff; pub use unified_diff::diff as unified_diff; diff --git a/src/main.rs b/src/main.rs index 7e221ea..b7c2712 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,119 +3,79 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::params::{parse_params, Format}; -use regex::Regex; -use std::env; -use std::ffi::OsString; -use std::fs; -use std::io::{self, Read, Write}; -use std::process::{exit, ExitCode}; +use std::{ + env::ArgsOs, + ffi::{OsStr, OsString}, + iter::Peekable, + path::{Path, PathBuf}, + process::ExitCode, +}; +mod cmp; mod context_diff; +mod diff; mod ed_diff; mod macros; mod normal_diff; mod params; +mod side_diff; mod unified_diff; mod utils; -fn report_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - // std::io::Error's display trait outputs "{detail} (os error {code})" - // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - eprintln!( - "{}: {}: {}", - executable.to_string_lossy(), - filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), - ); +/// # Panics +/// Panics if the binary path cannot be determined +fn binary_path(args: &mut Peekable) -> PathBuf { + match args.peek() { + Some(ref s) if !s.is_empty() => PathBuf::from(s), + _ => std::env::current_exe().unwrap(), + } +} + +/// #Panics +/// Panics if path has no UTF-8 valid name +fn name(binary_path: &Path) -> &OsStr { + binary_path.file_stem().unwrap() +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn usage(name: &str) { + println!("{name} {VERSION} (multi-call binary)\n"); + println!("Usage: {name} [function [arguments...]]\n"); + println!("Currently defined functions:\n"); + println!(" cmp, diff\n"); +} + +fn second_arg_error(name: &OsStr) -> ! { + eprintln!("Expected utility name as second argument, got nothing."); + usage(&name.to_string_lossy()); + std::process::exit(0); } -// Exit codes are documented at -// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. -// An exit status of 0 means no differences were found, -// 1 means some differences were found, -// and 2 means trouble. fn main() -> ExitCode { - let opts = env::args_os(); - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }); - // if from and to are the same file, no need to perform any comparison - let maybe_report_identical_files = || { - if params.report_identical_files { - println!( - "Files {} and {} are identical", - params.from.to_string_lossy(), - params.to.to_string_lossy(), - ); - } - }; - if params.from == "-" && params.to == "-" - || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) - { - maybe_report_identical_files(); - return ExitCode::SUCCESS; - } + let mut args = std::env::args_os().peekable(); - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] - } - }; - if io_error { - return ExitCode::from(2); - } + let exe_path = binary_path(&mut args); + let exe_name = name(&exe_path); - // run diff - let result: Vec = match params.format { - Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), - Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), - Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), - Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }), - }; - if params.brief && !result.is_empty() { - println!( - "Files {} and {} differ", - params.from.to_string_lossy(), - params.to.to_string_lossy() - ); - } else { - io::stdout().write_all(&result).unwrap(); - } - if result.is_empty() { - maybe_report_identical_files(); - ExitCode::SUCCESS + let util_name = if exe_name == "diffutils" { + // Discard the item we peeked. + let _ = args.next(); + + args.peek() + .cloned() + .unwrap_or_else(|| second_arg_error(exe_name)) } else { - ExitCode::from(1) + OsString::from(exe_name) + }; + + match util_name.to_str() { + Some("diff") => diff::main(args), + Some("cmp") => cmp::main(args), + Some(name) => { + eprintln!("{name}: utility not supported"); + ExitCode::from(2) + } + None => second_arg_error(exe_name), } } diff --git a/src/params.rs b/src/params.rs index c671180..b8483b7 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,4 +1,5 @@ use std::ffi::OsString; +use std::iter::Peekable; use std::path::PathBuf; use regex::Regex; @@ -10,6 +11,7 @@ pub enum Format { Unified, Context, Ed, + SideBySide, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -23,6 +25,7 @@ pub struct Params { pub brief: bool, pub expand_tabs: bool, pub tabsize: usize, + pub width: usize, } impl Default for Params { @@ -37,12 +40,12 @@ impl Default for Params { brief: false, expand_tabs: false, tabsize: 8, + width: 130, } } } -pub fn parse_params>(opts: I) -> Result { - let mut opts = opts.into_iter().peekable(); +pub fn parse_params>(mut opts: Peekable) -> Result { // parse CLI let Some(executable) = opts.next() else { @@ -57,6 +60,7 @@ pub fn parse_params>(opts: I) -> Result\d+)$").unwrap(); + let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); while let Some(param) = opts.next() { let next_param = opts.peek(); if param == "--" { @@ -101,6 +105,34 @@ pub fn parse_params>(opts: I) -> Result() { + Ok(num) => { + if num == 0 { + return Err("invalid width «0»".to_string()); + } + + num + } + Err(_) => return Err(format!("invalid width «{width_str}»")), + }; + continue; + } if tabsize_re.is_match(param.to_string_lossy().as_ref()) { // Because param matches the regular expression, // it is safe to assume it is valid UTF-8. @@ -112,9 +144,16 @@ pub fn parse_params>(opts: I) -> Result() { - Ok(num) => num, + Ok(num) => { + if num == 0 { + return Err("invalid tabsize «0»".to_string()); + } + + num + } Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")), }; + continue; } match match_context_diff_params(¶m, next_param, format) { @@ -156,7 +195,7 @@ pub fn parse_params>(opts: I) -> Result return Err(error), } if param.to_string_lossy().starts_with('-') { - return Err(format!("Unknown option: {:?}", param)); + return Err(format!("Unknown option: {param:?}")); } if from.is_none() { from = Some(param); @@ -323,7 +362,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -336,6 +380,7 @@ mod tests { [os("diff"), os("--normal"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -350,7 +395,12 @@ mod tests { format: Format::Ed, ..Default::default() }), - parse_params([os("diff"), os(arg), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os(arg), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); } } @@ -368,7 +418,7 @@ mod tests { format: Format::Context, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -390,7 +440,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -410,7 +460,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -427,7 +477,7 @@ mod tests { format: Format::Unified, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -449,7 +499,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -469,7 +519,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -487,6 +537,7 @@ mod tests { [os("diff"), os("-u54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -502,6 +553,7 @@ mod tests { [os("diff"), os("-U54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -517,6 +569,7 @@ mod tests { [os("diff"), os("-U"), os("54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -532,6 +585,7 @@ mod tests { [os("diff"), os("-c54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -544,7 +598,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -554,7 +613,12 @@ mod tests { report_identical_files: true, ..Default::default() }), - parse_params([os("diff"), os("-s"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -573,6 +637,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) ); } @@ -585,7 +650,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -595,7 +665,12 @@ mod tests { brief: true, ..Default::default() }), - parse_params([os("diff"), os("-q"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-q"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -609,6 +684,7 @@ mod tests { [os("diff"), os("--brief"), os("foo"), os("bar"),] .iter() .cloned() + .peekable() ) ); } @@ -621,7 +697,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); for option in ["-t", "--expand-tabs"] { assert_eq!( @@ -636,6 +717,7 @@ mod tests { [os("diff"), os(option), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -649,20 +731,26 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { executable: os("diff"), from: os("foo"), to: os("bar"), - tabsize: 0, + tabsize: 1, ..Default::default() }), parse_params( - [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] + [os("diff"), os("--tabsize=1"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -677,36 +765,42 @@ mod tests { [os("diff"), os("--tabsize=42"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert!(parse_params( [os("diff"), os("--tabsize"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize="), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( @@ -718,6 +812,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) .is_err()); } @@ -730,7 +825,12 @@ mod tests { to: os("-h"), ..Default::default() }), - parse_params([os("diff"), os("--"), os("-g"), os("-h")].iter().cloned()) + parse_params( + [os("diff"), os("--"), os("-g"), os("-h")] + .iter() + .cloned() + .peekable() + ) ); } #[test] @@ -742,7 +842,7 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("-")].iter().cloned()) + parse_params([os("diff"), os("foo"), os("-")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -751,7 +851,7 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("bar")].iter().cloned()) + parse_params([os("diff"), os("-"), os("bar")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -760,27 +860,45 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("-")].iter().cloned()) + parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!(parse_params([os("diff"), os("foo"), os("bar"), os("-")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-"), os("-"), os("-")].iter().cloned()).is_err()); + assert!(parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); } #[test] fn missing_arguments() { - assert!(parse_params([os("diff")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("foo")].iter().cloned()).is_err()); + assert!(parse_params([os("diff")].iter().cloned().peekable()).is_err()); + assert!(parse_params([os("diff"), os("foo")].iter().cloned().peekable()).is_err()); } #[test] fn unknown_argument() { + assert!(parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); assert!( - parse_params([os("diff"), os("-g"), os("foo"), os("bar")].iter().cloned()).is_err() + parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); - assert!(parse_params([os("diff"), os("-g"), os("bar")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-g")].iter().cloned()).is_err()); + assert!(parse_params([os("diff"), os("-g")].iter().cloned().peekable()).is_err()); } #[test] fn empty() { - assert!(parse_params([].iter().cloned()).is_err()); + assert!(parse_params([].iter().cloned().peekable()).is_err()); } #[test] fn conflicting_output_styles() { @@ -797,6 +915,7 @@ mod tests { [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); } diff --git a/src/side_diff.rs b/src/side_diff.rs new file mode 100644 index 0000000..56953d2 --- /dev/null +++ b/src/side_diff.rs @@ -0,0 +1,1263 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use core::cmp::{max, min}; +use diff::Result; +use std::{io::Write, vec}; +use unicode_width::UnicodeWidthStr; + +use crate::params::Params; + +const GUTTER_WIDTH_MIN: usize = 3; + +struct CharIter<'a> { + current: &'a [u8], +} + +struct Config { + sdiff_half_width: usize, + sdiff_column_two_offset: usize, + tab_size: usize, + expanded: bool, + separator_pos: usize, +} + +impl<'a> From<&'a [u8]> for CharIter<'a> { + fn from(value: &'a [u8]) -> Self { + CharIter { current: value } + } +} + +impl<'a> Iterator for CharIter<'a> { + // (bytes for the next char, visible width) + type Item = (&'a [u8], usize); + + fn next(&mut self) -> Option { + let max = self.current.len().min(4); + + // We reached the end. + if max == 0 { + return None; + } + + // Try to find the next utf-8 character, if present in the next 4 bytes. + let mut index = 1; + let mut view = &self.current[..index]; + let mut char = str::from_utf8(view); + while char.is_err() { + index += 1; + if index > max { + break; + } + view = &self.current[..index]; + char = str::from_utf8(view) + } + + match char { + Ok(c) => { + self.current = self + .current + .get(view.len()..) + .unwrap_or(&self.current[0..0]); + Some((view, UnicodeWidthStr::width(c))) + } + Err(_) => { + // We did not find an utf-8 char within the next 4 bytes, return the single byte. + self.current = &self.current[1..]; + Some((&view[..1], 1)) + } + } + } +} + +impl Config { + pub fn new(full_width: usize, tab_size: usize, expanded: bool) -> Self { + // diff uses this calculation to calculate the size of a half line + // based on the options passed (like -w, -t, etc.). It's actually + // pretty useless, because we (actually) don't have any size modifiers + // that can change this, however I just want to leave the calculate + // here, since it's not very clear and may cause some confusion + + let w = full_width as isize; + let t = tab_size as isize; + let t_plus_g = t + GUTTER_WIDTH_MIN as isize; + let unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1); + let off = unaligned_off - unaligned_off % t; + let hw = max(0, min(off - GUTTER_WIDTH_MIN as isize, w - off)) as usize; + let c2o = if hw != 0 { off as usize } else { w as usize }; + + Self { + expanded, + sdiff_column_two_offset: c2o, + tab_size, + sdiff_half_width: hw, + separator_pos: ((hw + c2o - 1) >> 1), + } + } +} + +fn format_tabs_and_spaces( + from: usize, + to: usize, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + let expanded = config.expanded; + let tab_size = config.tab_size; + let mut current = from; + + if current > to { + return Ok(()); + } + + if expanded { + while current < to { + buf.write_all(b" ")?; + current += 1; + } + return Ok(()); + } + + while current + (tab_size - current % tab_size) <= to { + let next_tab = current + (tab_size - current % tab_size); + buf.write_all(b"\t")?; + current = next_tab; + } + + while current < to { + buf.write_all(b" ")?; + current += 1; + } + + Ok(()) +} + +fn process_half_line( + s: &[u8], + max_width: usize, + is_right: bool, + white_space_gutter: bool, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + if s.is_empty() { + if !is_right { + format_tabs_and_spaces( + 0, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + return Ok(()); + } + + if max_width > config.sdiff_half_width { + return Ok(()); + } + + if max_width > config.sdiff_column_two_offset && !is_right { + return Ok(()); + } + + let expanded = config.expanded; + let tab_size = config.tab_size; + let sdiff_column_two_offset = config.sdiff_column_two_offset; + let mut current_width = 0; + let iter = CharIter::from(s); + + // the encoding will probably be compatible with utf8, so we can take advantage + // of that to get the size of the columns and iterate without breaking the encoding of anything. + // It seems like a good trade, since there is still a fallback in case it is not utf8. + // But I think it would be better if we used some lib that would allow us to handle this + // in the best way possible, in order to avoid overhead (currently 2 for loops are needed). + // There is a library called mcel (mcel.h) that is used in GNU diff, but the documentation + // about it is very scarce, nor is its use documented on the internet. In fact, from my + // research I didn't even find any information about it in the GNU lib's own documentation. + + for c in iter { + let (char, c_width) = c; + + if current_width + c_width > max_width { + break; + } + + match char { + b"\t" => { + if expanded && (current_width + tab_size - (current_width % tab_size)) <= max_width + { + let mut spaces = tab_size - (current_width % tab_size); + while spaces > 0 { + buf.write_all(b" ")?; + current_width += 1; + spaces -= 1; + } + } else if current_width + tab_size - (current_width % tab_size) <= max_width { + buf.write_all(b"\t")?; + current_width += tab_size - (current_width % tab_size); + } + } + b"\n" => { + break; + } + b"\r" => { + buf.write_all(b"\r")?; + format_tabs_and_spaces(0, sdiff_column_two_offset, config, buf)?; + current_width = 0; + } + b"\0" | b"\x07" | b"\x0C" | b"\x0B" => { + buf.write_all(char)?; + } + _ => { + buf.write_all(char)?; + current_width += c_width; + } + } + } + + // gnu sdiff do not tabulate the hole empty right line, instead, just keep the line empty + if !is_right { + // we always sum + 1 or + GUTTER_WIDTH_MIN cause we want to expand + // up to the third column of the gutter column if the gutter is gutter white space, + // otherwise we can expand to only the first column of the gutter middle column, cause + // the next is the sep char + format_tabs_and_spaces( + current_width, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + Ok(()) +} + +fn push_output( + left_ln: &[u8], + right_ln: &[u8], + symbol: u8, + output: &mut T, + config: &Config, +) -> std::io::Result<()> { + if left_ln.is_empty() && right_ln.is_empty() { + writeln!(output)?; + return Ok(()); + } + + let white_space_gutter = symbol == b' '; + let half_width = config.sdiff_half_width; + let column_two_offset = config.sdiff_column_two_offset; + let separator_pos = config.separator_pos; + let put_new_line = true; // should be false when | is allowed + + // this involves a lot of the '|' mark, however, as it is not active, + // it is better to deactivate it as it introduces visual bug if + // the line is empty. + // if !left_ln.is_empty() { + // put_new_line = put_new_line || (left_ln.last() == Some(&b'\n')); + // } + // if !right_ln.is_empty() { + // put_new_line = put_new_line || (right_ln.last() == Some(&b'\n')); + // } + + process_half_line( + left_ln, + half_width, + false, + white_space_gutter, + config, + output, + )?; + if symbol != b' ' { + // the diff always want to put all tabs possible in the usable are, + // even in the middle space between the gutters if possible. + + output.write_all(&[symbol])?; + if !right_ln.is_empty() { + format_tabs_and_spaces(separator_pos + 1, column_two_offset, config, output)?; + } + } + process_half_line( + right_ln, + half_width, + true, + white_space_gutter, + config, + output, + )?; + + if put_new_line { + writeln!(output)?; + } + + Ok(()) +} + +pub fn diff( + from_file: &[u8], + to_file: &[u8], + output: &mut T, + params: &Params, +) -> Vec { + // ^ The left file ^ The right file + + let mut left_lines: Vec<&[u8]> = from_file.split_inclusive(|&c| c == b'\n').collect(); + let mut right_lines: Vec<&[u8]> = to_file.split_inclusive(|&c| c == b'\n').collect(); + let config = Config::new(params.width, params.tabsize, params.expand_tabs); + + if left_lines.last() == Some(&&b""[..]) { + left_lines.pop(); + } + + if right_lines.last() == Some(&&b""[..]) { + right_lines.pop(); + } + + /* + DISCLAIMER: + Currently the diff engine does not produce results like the diff engine used in GNU diff, + so some results may be inaccurate. For example, the line difference marker "|", according + to the GNU documentation, appears when the same lines (only the actual line, although the + relative line may change the result, so occasionally '|' markers appear with the same lines) + are different but exist in both files. In the current solution the same result cannot be + obtained because the diff engine does not return Both if both exist but are different, + but instead returns a Left and a Right for each one, implying that two lines were added + and deleted. Furthermore, the GNU diff program apparently stores some internal state + (this internal state is just a note about how the diff engine works) about the lines. + For example, an added or removed line directly counts in the line query of the original + lines to be printed in the output. Because of this imbalance caused by additions and + deletions, the characters ( and ) are introduced. They basically represent lines without + context, which have lost their pair in the other file due to additions or deletions. Anyway, + my goal with this disclaimer is to warn that for some reason, whether it's the diff engine's + inability to determine and predict/precalculate the result of GNU's sdiff, with this software it's + not possible to reproduce results that are 100% faithful to GNU's, however, the basic premise + e of side diff of showing added and removed lines and creating edit scripts is totally possible. + More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of + the goals of this project : ) + */ + for result in diff::slice(&left_lines, &right_lines) { + match result { + Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), + Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), + Result::Both(left_ln, right_ln) => { + push_output(left_ln, right_ln, b' ', output, &config).unwrap() + } + } + } + + vec![] +} + +#[cfg(test)] +mod tests { + const DEF_TAB_SIZE: usize = 4; + + use super::*; + + mod format_tabs_and_spaces { + use super::*; + + const CONFIG_E_T: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: true, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + const CONFIG_E_F: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: false, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + #[test] + fn test_format_tabs_and_spaces_expanded_false() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_format_tabs_and_spaces_expanded_true() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 5]); + } + + #[test] + fn test_format_tabs_and_spaces_from_greater_than_to() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert!(buf.is_empty()); + } + + #[test] + fn test_format_from_non_zero_position() { + let mut buf = vec![]; + format_tabs_and_spaces(2, 7, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_multiple_full_tabs_needed() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 12, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b'\t']); + } + + #[test] + fn test_uneven_tab_boundary_with_spaces() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 10, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ']); + } + + #[test] + fn test_expanded_true_with_offset() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 9, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 6]); + } + + #[test] + fn test_exact_tab_boundary_from_midpoint() { + let mut buf = vec![]; + format_tabs_and_spaces(4, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_mixed_tabs_and_spaces_edge_case() { + let mut buf = vec![]; + format_tabs_and_spaces(5, 9, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_minimal_gap_with_tab() { + let mut buf = vec![]; + format_tabs_and_spaces(7, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_expanded_false_with_tab_at_end() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + } + + mod process_half_line { + use super::*; + + fn create_test_config(expanded: bool, tab_size: usize) -> Config { + Config { + sdiff_half_width: 30, + sdiff_column_two_offset: 60, + tab_size, + expanded, + separator_pos: 15, + } + } + + #[test] + fn test_empty_line_left_expanded_false() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_tabs_unexpanded() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\tabc", 8, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'a', b'b', b'c', b'\t', b' ']); + } + + #[test] + fn test_utf8_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "😉😉😉".as_bytes(); + process_half_line(s, 3, false, false, &config, &mut buf).unwrap(); + let mut r = vec![]; + r.write_all("😉\t".as_bytes()).unwrap(); + assert_eq!(buf, r) + } + + #[test] + fn test_newline_handling() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abc\ndef", 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'a', b'b', b'c', b'\t', b' ', b' ']); + } + + #[test] + fn test_carriage_return() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\rxyz", 5, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'x', b'y', b'z']); + assert_eq!(buf, r); + } + + #[test] + fn test_exact_width_fit() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abcd", 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, b"abcd ".to_vec()); + } + + #[test] + fn test_non_utf8_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + // ISO-8859-1 + process_half_line( + &[0x63, 0x61, 0x66, 0xE9], + 5, + false, + false, + &config, + &mut buf, + ) + .unwrap(); + assert_eq!(&buf, &[0x63, 0x61, 0x66, 0xE9, b' ', b' ']); + assert!(String::from_utf8(buf).is_err()); + } + + #[test] + fn test_non_utf8_bytes_ignore_padding_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let utf32le_bytes = [ + 0x63, 0x00, 0x00, 0x00, // 'c' + 0x61, 0x00, 0x00, 0x00, // 'a' + 0x66, 0x00, 0x00, 0x00, // 'f' + 0xE9, 0x00, 0x00, 0x00, // 'é' + ]; + // utf8 little endiand 32 bits (or 4 bytes per char) + process_half_line(&utf32le_bytes, 6, false, false, &config, &mut buf).unwrap(); + let mut r = utf32le_bytes.to_vec(); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_non_utf8_non_preserve_ascii_bytes_cut() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding + // ^ é char, start multi byte + process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter + } + + #[test] + fn test_right_line_padding() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"xyz", 5, true, true, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 3); + } + + #[test] + fn test_mixed_tabs_spaces() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\t \t", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_overflow_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "日本語".as_bytes(); + process_half_line(s, 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "日本 ".as_bytes()); + } + + #[test] + fn test_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 3, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t "); + } + + #[test] + fn test_expanded_true() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_expanded_true_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_width0_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\0\x0B\x07\x0C"; + process_half_line(s, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\0\x0B\x07\x0C\t ") + } + + #[test] + fn test_left_empty_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b""; + process_half_line(s, 9, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\t\t\t"); + } + + #[test] + fn test_s_size_eq_max_width_p1() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abcdefghij"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abcdefghij "); + } + + #[test] + fn test_mixed_tabs_and_spaces_inversion() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" \t \t "); + } + + #[test] + fn test_expanded_with_tabs() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_expanded_with_tabs_and_space_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_zero_width_unicode_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "\u{200B}".as_bytes(); + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "\u{200B}\t\t ".as_bytes()); + } + + #[test] + fn test_multiple_carriage_returns() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'\t'; 2]); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_multiple_carriage_returns_is_right_true() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + assert_eq!(buf, r); + } + + #[test] + fn test_mixed_invalid_utf8_with_valid() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\xFF\xFEdef"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert!(String::from_utf8(s.to_vec()).is_err()); + assert_eq!(buf, b"abc\xFF\xFEdef "); + } + + #[test] + fn test_max_width_zero() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"foo bar"; + process_half_line(s, 0, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b' ']); + } + + #[test] + fn test_line_only_with_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']) + } + + #[test] + fn test_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 12, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" ".repeat(13)); + } + + #[test] + fn test_mixed_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc "); + } + + #[test] + fn test_mixed_tabs_with_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc\t "); + } + + #[test] + fn test_mixed_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_mixed_tabs_expanded_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_break_if_invalid_max_width() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 61, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b""); + assert_eq!(buf.len(), 0); + } + + #[test] + fn test_new_line() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t\t "); + } + } + + mod push_output { + // almost all behavior of the push_output was tested with tests on process_half_line + + use super::*; + + impl Default for Config { + fn default() -> Self { + Config::new(130, 8, false) + } + } + + fn create_test_config_def() -> Config { + Config::default() + } + + #[test] + fn test_left_empty_right_not_added() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b"bar"; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\t\t\t\t\t\t\t >\tbar\n"); + } + + #[test] + fn test_right_empty_left_not_del() { + let config = create_test_config_def(); + let left_ln = b"bar"; + let right_ln = b""; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"bar\t\t\t\t\t\t\t >\n"); + } + + #[test] + fn test_both_empty() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b""; + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\n"); + } + + #[test] + fn test_output_cut_with_maximization() { + let config = create_test_config_def(); + let left_ln = b"a".repeat(62); + let right_ln = b"a".repeat(62); + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf.len(), 61 * 2 + 2); + assert_eq!(&buf[0..61], vec![b'a'; 61]); + assert_eq!(&buf[61..62], b"\t"); + let mut end = b"a".repeat(61); + end.push(b'\n'); + assert_eq!(&buf[62..], end); + } + + #[test] + fn test_both_lines_non_empty_with_space_symbol_max_tabs() { + let config = create_test_config_def(); + let left_ln = b"left"; + let right_ln = b"right"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "left\t\t\t\t\t\t\t\t"; + let expected_right = "right"; + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); + } + + #[test] + fn test_non_space_symbol_with_padding() { + let config = create_test_config_def(); + let left_ln = b"data"; + let right_ln = b""; + let symbol = b'<'; // impossible case, just to use different symbol + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, "data\t\t\t\t\t\t\t <\n".as_bytes()); + } + + #[test] + fn test_lines_exceeding_half_width() { + let config = create_test_config_def(); + let left_ln = vec![b'a'; 100]; + let left_ln = left_ln.as_slice(); + let right_ln = vec![b'b'; 100]; + let right_ln = right_ln.as_slice(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "a".repeat(61); + let expected_right = "b".repeat(61); + assert_eq!(buf.len(), 61 + 1 + 61 + 1); + assert_eq!(&buf[0..61], expected_left.as_bytes()); + assert_eq!(buf[61], b'\t'); + assert_eq!(&buf[62..123], expected_right.as_bytes()); + assert_eq!(&buf[123..], b"\n"); + } + + #[test] + fn test_tabs_in_lines_expanded() { + let mut config = create_test_config_def(); + config.expanded = true; + let left_ln = b"\tleft"; + let right_ln = b"\tright"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = " left".to_string() + &" ".repeat(61 - 12); + let expected_right = " right"; + assert_eq!( + buf, + format!("{}{}{}\n", expected_left, " ", expected_right).as_bytes() + ); + } + + #[test] + fn test_unicode_characters() { + let config = create_test_config_def(); + let left_ln = "áéíóú".as_bytes(); + let right_ln = "😀😃😄".as_bytes(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "áéíóú\t\t\t\t\t\t\t\t"; + let expected_right = "😀😃😄"; + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); + } + } + + mod diff { + /* + Probably this hole section should be refactored when complete sdiff + arrives. I would say that these tests are more to document the + behavior of the engine than to actually test whether it is right, + because it is right, but right up to its limitations. + */ + + use super::*; + + fn generate_params() -> Params { + Params { + tabsize: 8, + expand_tabs: false, + width: 130, + ..Default::default() + } + } + + fn contains_string(vec: &[u8], s: &str) -> usize { + let pattern = s.as_bytes(); + vec.windows(pattern.len()).filter(|s| s == &pattern).count() + } + + fn calc_lines(input: &Vec) -> usize { + let mut lines_counter = 0; + + for c in input { + if c == &b'\n' { + lines_counter += 1; + } + } + + lines_counter + } + + #[test] + fn test_equal_lines() { + let params = generate_params(); + let from_file = b"equal"; + let to_file = b"equal"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 1); + assert!(!output.contains(&b'<')); + assert!(!output.contains(&b'>')); + assert_eq!(contains_string(&output, "equal"), 2) + } + + #[test] + fn test_different_lines() { + let params = generate_params(); + let from_file = b"eq"; + let to_file = b"ne"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 2); + assert!(output.contains(&b'>')); + assert!(output.contains(&b'<')); + assert_eq!(contains_string(&output, "eq"), 1); + assert_eq!(contains_string(&output, "ne"), 1); + } + + #[test] + fn test_added_line() { + let params = generate_params(); + let from_file = b""; + let to_file = b"new line"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, ">"), 1); + assert_eq!(contains_string(&output, "new line"), 1); + } + + #[test] + fn test_removed_line() { + let params = generate_params(); + let from_file = b"old line"; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, "old line"), 1); + } + + #[test] + fn test_multiple_changes() { + let params = generate_params(); + let from_file = b"line1\nline2\nline3"; + let to_file = b"line1\nmodified\nline4"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 5); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_unicode_and_special_chars() { + let params = generate_params(); + let from_file = "á\t€".as_bytes(); + let to_file = "€\t😊".as_bytes(); + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(String::from_utf8_lossy(&output).contains("á")); + assert!(String::from_utf8_lossy(&output).contains("€")); + assert!(String::from_utf8_lossy(&output).contains("😊")); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_mixed_whitespace() { + let params = generate_params(); + let from_file = b" \tspaces"; + let to_file = b"\t\t tabs"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(output.contains(&b'<')); + assert!(output.contains(&b'>')); + assert!(String::from_utf8_lossy(&output).contains("spaces")); + assert!(String::from_utf8_lossy(&output).contains("tabs")); + } + + #[test] + fn test_empty_files() { + let params = generate_params(); + let from_file = b""; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(output, vec![]); + } + + #[test] + fn test_partially_matching_lines() { + let params = generate_params(); + let from_file = b"match\nchange"; + let to_file = b"match\nupdated"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 3); + assert_eq!(contains_string(&output, "match"), 2); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_interleaved_add_remove() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD"; + let to_file = b"B\nX\nD\nY"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "A"), 1); + assert_eq!(contains_string(&output, "X"), 1); + assert_eq!(contains_string(&output, "Y"), 1); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_swapped_lines() { + let params = generate_params(); + let from_file = b"1\n2\n3\n4"; + let to_file = b"4\n3\n2\n1"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_gap_between_changes() { + let params = generate_params(); + let from_file = b"Start\nKeep1\nRemove\nKeep2\nEnd"; + let to_file = b"Start\nNew1\nKeep1\nKeep2\nNew2\nEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "Remove"), 1); + assert_eq!(contains_string(&output, "New1"), 1); + assert_eq!(contains_string(&output, "New2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_mixed_operations_complex() { + let params = generate_params(); + let from_file = b"Same\nOld1\nSameMid\nOld2\nSameEnd"; + let to_file = b"Same\nNew1\nSameMid\nNew2\nNew3\nSameEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_insert_remove_middle() { + let params = generate_params(); + let from_file = b"Header\nContent1\nFooter"; + let to_file = b"Header\nContent2\nFooter"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 4); + assert_eq!(contains_string(&output, "Content1"), 1); + assert_eq!(contains_string(&output, "Content2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_multiple_adjacent_changes() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD\nE"; + let to_file = b"A\nX\nY\nD\nZ"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + } + + mod config { + use super::*; + + fn create_config(full_width: usize, tab_size: usize, expanded: bool) -> Config { + Config::new(full_width, tab_size, expanded) + } + + #[test] + fn test_full_width_80_tab_4() { + let config = create_config(80, 4, false); + assert_eq!(config.sdiff_half_width, 37); + assert_eq!(config.sdiff_column_two_offset, 40); + assert_eq!(config.separator_pos, 38); + } + + #[test] + fn test_full_width_40_tab_8() { + let config = create_config(40, 8, true); + assert_eq!(config.sdiff_half_width, 16); + assert_eq!(config.sdiff_column_two_offset, 24); + assert_eq!(config.separator_pos, 19); // (16 +24 -1) /2 = 19.5 + } + + #[test] + fn test_full_width_30_tab_2() { + let config = create_config(30, 2, false); + assert_eq!(config.sdiff_half_width, 13); + assert_eq!(config.sdiff_column_two_offset, 16); + assert_eq!(config.separator_pos, 14); + } + + #[test] + fn test_small_width_10_tab_4() { + let config = create_config(10, 4, false); + assert_eq!(config.sdiff_half_width, 2); + assert_eq!(config.sdiff_column_two_offset, 8); + assert_eq!(config.separator_pos, 4); + } + + #[test] + fn test_minimal_width_3_tab_4() { + let config = create_config(3, 4, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 3); + assert_eq!(config.separator_pos, 1); + } + + #[test] + fn test_odd_width_7_tab_3() { + let config = create_config(7, 3, false); + assert_eq!(config.sdiff_half_width, 1); + assert_eq!(config.sdiff_column_two_offset, 6); + assert_eq!(config.separator_pos, 3); + } + + #[test] + fn test_tab_size_larger_than_width() { + let config = create_config(5, 10, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 5); + assert_eq!(config.separator_pos, 2); + } + } +} diff --git a/src/utils.rs b/src/utils.rs index 561f2b9..daca18d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,8 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::io::Write; - +use regex::Regex; +use std::{ffi::OsString, io::Write}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -71,6 +71,33 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +pub fn format_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) -> String { + // std::io::Error's display trait outputs "{detail} (os error {code})" + // but we want only the {detail} (error string) part + let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); + format!( + "{}: {}: {}", + executable.to_string_lossy(), + filepath.to_string_lossy(), + error_code_re.replace(error.to_string().as_str(), ""), + ) +} + +pub fn report_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) { + eprintln!( + "{}", + format_failure_to_read_input_file(executable, filepath, error) + ); +} + #[cfg(test)] mod tests { use super::*; @@ -101,10 +128,11 @@ mod tests { // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms - // it is displayed as a single emoji and should have a print size of 2 columns, - // but terminal emulators tend to not support this, and display the two emojis - // side by side, thus accounting for a print size of 4 columns. - assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); + // it is displayed as a single emoji and has a print size of 2 columns. + // Terminal emulators tend to not support this, and display the two emojis + // side by side, thus accounting for a print size of 4 columns, but the + // unicode_width crate reports a correct size of 2. + assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); } #[test] diff --git a/tests/integration.rs b/tests/integration.rs index f8ad515..cfbf529 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -4,298 +4,871 @@ // files that was distributed with this source code. use assert_cmd::cmd::Command; -use diffutilslib::assert_diff_eq; use predicates::prelude::*; use std::fs::File; +#[cfg(not(windows))] +use std::fs::OpenOptions; use std::io::Write; use tempfile::{tempdir, NamedTempFile}; // Integration tests for the diffutils command +mod common { + use super::*; -#[test] -fn unknown_param() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("--foobar"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); - Ok(()) -} + #[test] + fn unknown_param() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("patch"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::eq("patch: utility not supported\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::starts_with( + "Expected utility name as second argument, got nothing.\n", + )); + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); + } + Ok(()) + } -#[test] -fn cannot_read_files() -> Result<(), Box> { - let file = NamedTempFile::new()?; + #[test] + fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let nofile = NamedTempFile::new()?; + let nopath = nofile.into_temp_path(); + std::fs::remove_file(&nopath)?; + + #[cfg(not(windows))] + let error_message = "No such file or directory"; + #[cfg(windows)] + let error_message = "The system cannot find the file specified."; + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(&nopath).arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(file.path()).arg(&nopath); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + } - let nofile = NamedTempFile::new()?; - let nopath = nofile.into_temp_path(); - std::fs::remove_file(&nopath)?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg(&nopath).arg(&nopath); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + )) + .count(2), + ); - #[cfg(not(windows))] - let error_message = "No such file or directory"; - #[cfg(windows)] - let error_message = "The system cannot find the file specified."; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(&nopath).arg(file.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(file.path()).arg(&nopath); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); - - Ok(()) + Ok(()) + } } -#[test] -fn no_differences() -> Result<(), Box> { - let file = NamedTempFile::new()?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); +mod diff { + use diffutilslib::assert_diff_eq; + + use super::*; + + #[test] + fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) + } + + #[test] + fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) + } + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) + } + + #[test] + fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); } - cmd.arg(file.path()).arg(file.path()); + Ok(()) + } + + #[test] + fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-e").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("No newline at end of file")); + Ok(()) + } + + #[test] + fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg("-").arg("-"); cmd.assert() .code(predicate::eq(0)) .success() .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) + } + + #[test] + fn compare_file_to_directory() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let directory = tmp_dir.path().join("d"); + let _ = std::fs::create_dir(&directory); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let da_path = directory.join("a"); + let mut da = File::create(&da_path).unwrap(); + da.write_all(b"da\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&directory).arg(&a_path); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", + da_path.display(), + a_path.display() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&a_path).arg(&directory); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", + a_path.display(), + da_path.display() + ) + ); + + Ok(()) } - Ok(()) } -#[test] -fn no_differences_report_identical_files() -> Result<(), Box> { - // same file - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { +mod cmp { + use super::*; + + #[test] + fn cmp_incompatible_params() -> Result<(), Box> { let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-s"); + cmd.arg("/etc/passwd").arg("/etc/group"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with( + ": options -l and -s are incompatible\n", + )); + + Ok(()) + } + + #[test] + fn cmp_stdin() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("a\n"); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file1.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("b\n"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); + + Ok(()) } - // two files with the same content - let mut file2 = NamedTempFile::new()?; - file2.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + + #[test] + fn cmp_equal_files() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"a\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) } - Ok(()) -} -#[test] -fn differences() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_one_file_empty() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let _ = File::create(&b_path).unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::str::is_empty().not()); + .stderr(predicate::str::contains(" EOF on ")) + .stderr(predicate::str::ends_with(" which is empty\n")); + + Ok(()) } - Ok(()) -} -#[test] -fn differences_brief() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_immediate_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"bcd\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::eq(format!( - "Files {} and {} differ\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy() - ))); + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 1, line 1 is 141 a 142 b\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq( + "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", + )); + + Ok(()) } - Ok(()) -} -#[test] -fn missing_newline() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-e").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("No newline at end of file")); - Ok(()) -} + #[test] + fn cmp_newline_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\ndefg").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abc\ndef\ng").unwrap(); -#[test] -fn read_from_stdin() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg(file1.path()) - .arg("-") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg("-") - .arg(file2.path()) - .write_stdin("foo\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file2.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg("-").arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - - #[cfg(unix)] - { let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg(file1.path()) - .arg("/dev/stdin") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 8, line 2 is 147 g 12 ^J\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 12\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + Ok(()) } - Ok(()) -} + #[test] + fn cmp_max_bytes() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("13"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n")); + Ok(()) + } + + #[test] + fn cmp_skip_args_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"---abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"###abc\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); -#[test] -fn compare_file_to_directory() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let directory = tmp_dir.path().join("d"); - let _ = std::fs::create_dir(&directory); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let da_path = directory.join("a"); - let mut da = File::create(&da_path).unwrap(); - da.write_all(b"da\n").unwrap(); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg(&directory).arg(&a_path); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", - da_path.display(), - a_path.display() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg(&a_path).arg(&directory); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", - a_path.display(), - da_path.display() - ) - ); - - Ok(()) + // Positional skips should be ignored + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("1").arg("1"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Single positional argument should only affect first file. + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip_suffix_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + writeln!(a, "{}c", "a".repeat(1024)).unwrap(); + a.flush().unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + writeln!(b, "{}c", "b".repeat(1024)).unwrap(); + b.flush().unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("--ignore-initial=1K"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("8"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 4, line 1 is 40 150 h\n", + )); + + Ok(()) + } + + #[test] + fn cmp_binary() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + + bytes.reverse(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(concat!( + " 1 0 ^@ 377 M-^?\n", + " 2 17 ^O 376 M-~\n", + " 3 37 ^_ 310 M-H\n", + " 4 40 201 M-^A\n", + " 5 41 ! 200 M-^@\n", + " 6 50 ( 177 ^?\n", + " 7 100 @ 176 ~\n", + " 8 176 ~ 100 @\n", + " 9 177 ^? 50 (\n", + "10 200 M-^@ 41 !\n", + "11 201 M-^A 40 \n", + "12 310 M-H 37 ^_\n", + "13 376 M-~ 17 ^O\n", + "14 377 M-^? 0 ^@\n" + ))); + + Ok(()) + } + + #[test] + #[cfg(not(windows))] + fn cmp_fast_paths() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + // This test mimics one found in the GNU cmp test suite. It is used for + // validating the /dev/null optimization. + let a_path = tmp_dir.path().join("a"); + let a = File::create(&a_path).unwrap(); + a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let b_path = tmp_dir.path().join("b"); + let b = File::create(&b_path).unwrap(); + b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); + + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils")) + .arg("cmp") + .arg(&a_path) + .arg(&b_path) + .stdout(dev_null) + .spawn() + .unwrap(); + + std::thread::sleep(std::time::Duration::from_millis(100)); + + assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1)); + + // Two stdins should be equal + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-"); + cmd.arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Files with longer than block size equal segments should still report + // the correct line number for the difference. Assumes 8KB block size (see + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), + // create a 24KB equality. + let mut bytes = " ".repeat(4095); + bytes.push('\n'); + bytes.push_str(&" ".repeat(4096)); + + let bytes = bytes.repeat(3); + let bytes = bytes.as_bytes(); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(bytes).unwrap(); + a.write_all(b"A").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(bytes).unwrap(); + b.write_all(b"B").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); + + Ok(()) + } } diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cb59834..2593eb2 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -21,7 +21,7 @@ # (e.g. 'dev' or 'test'). # Unless overridden by the $TESTS environment variable, all tests in the test # suite will be run. Tests targeting a command that is not yet implemented -# (e.g. cmp, diff3 or sdiff) are skipped. +# (e.g. diff3 or sdiff) are skipped. scriptpath=$(dirname "$(readlink -f "$0")") rev=$(git rev-parse HEAD) @@ -57,8 +57,13 @@ upstreamrev=$(git rev-parse HEAD) mkdir src cd src ln -s "$binary" diff +ln -s "$binary" cmp cd ../tests +# Fetch tests/init.sh from the gnulib repository (needed since +# https://git.savannah.gnu.org/cgit/diffutils.git/commit/tests?id=1d2456f539) +curl -s "$gitserver/gitweb/?p=gnulib.git;a=blob_plain;f=tests/init.sh;hb=HEAD" -o init.sh + if [[ -n "$TESTS" ]] then tests="$TESTS" @@ -71,7 +76,6 @@ total=$(echo "$tests" | wc -w) echo "Running $total tests" export LC_ALL=C export KEEP=yes -exitcode=0 timestamp=$(date -Iseconds) urlroot="$gitserver/cgit/diffutils.git/tree/tests/" passed=0 @@ -82,35 +86,43 @@ for test in $tests do result="FAIL" url="$urlroot$test?id=$upstreamrev" - # Run only the tests that invoke `diff`, + # Run only the tests that invoke `diff` or `cmp`, # because other binaries aren't implemented yet - if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" + if ! grep -E -s -q "(diff3|sdiff)" "$test" then - sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 - json+="{\"test\":\"$test\",\"result\":\"$result\"," - json+="\"url\":\"$url\"," - json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\"," - json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\"," - json+="\"files\":{" - cd gt-$test.* - # Note: this doesn't include the contents of subdirectories, - # but there isn't much value added in doing so - for file in * - do - [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\"," - done - json="${json%,}}}," - cd - > /dev/null - [[ "$result" = "PASS" ]] && (( passed++ )) - [[ "$result" = "FAIL" ]] && (( failed++ )) + sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" + if [[ $? = 77 ]] + then + result="SKIP" + else + json+="{\"test\":\"$test\",\"result\":\"$result\"," + json+="\"url\":\"$url\"," + json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\"," + json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\"," + json+="\"files\":{" + cd gt-$test.* + # Note: this doesn't include the contents of subdirectories, + # but there isn't much value added in doing so + for file in * + do + [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\"," + done + json="${json%,}}}," + cd - > /dev/null + [[ "$result" = "PASS" ]] && (( passed++ )) + [[ "$result" = "FAIL" ]] && (( failed++ )) + fi else result="SKIP" - (( skipped++ )) - json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"}," fi color=2 # green [[ "$result" = "FAIL" ]] && color=1 # red - [[ "$result" = "SKIP" ]] && color=3 # yellow + if [[ $result = "SKIP" ]] + then + (( skipped++ )) + json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"}," + color=3 # yellow + fi printf " %-40s $(tput setaf $color)$result$(tput sgr0)\n" "$test" done echo "" @@ -138,4 +150,5 @@ resultsfile="test-results.json" echo "$json" | jq > "$resultsfile" echo "Results written to $scriptpath/$resultsfile" -exit $exitcode +(( failed > 0 )) && exit 1 +exit 0