From d5bce65a29fbf36e12c0ed36d65c2c7225f2ff76 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:41:54 +0000 Subject: [PATCH 01/42] Update Rust crate pretty_assertions to v1.4.1 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 974fba9..7fdb99d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -602,6 +602,6 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" From 7c9c2a1ab280d0cde2de6f37837bbb8fd88a1f49 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 19 Sep 2024 20:09:56 +0000 Subject: [PATCH 02/42] Update Rust crate unicode-width to v0.1.14 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fdb99d..f5589ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -411,9 +411,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "wait-timeout" From d8b91fd60eabc584ebdbad86d6ace6bbfea9355a Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Thu, 19 Sep 2024 22:33:33 +0200 Subject: [PATCH 03/42] Update unit test expectation --- src/utils.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 561f2b9..df1390d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -101,10 +101,11 @@ mod tests { // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms - // it is displayed as a single emoji and should have a print size of 2 columns, - // but terminal emulators tend to not support this, and display the two emojis - // side by side, thus accounting for a print size of 4 columns. - assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); + // it is displayed as a single emoji and has a print size of 2 columns. + // Terminal emulators tend to not support this, and display the two emojis + // side by side, thus accounting for a print size of 4 columns, but the + // unicode_width crate reports a correct size of 2. + assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); } #[test] From 7574243de14cc33ba2222ebdafba5dcc3192ec36 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 20 Sep 2024 05:14:22 +0000 Subject: [PATCH 04/42] Update Rust crate unicode-width to 0.2.0 --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5589ee..24fc712 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -411,9 +411,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "wait-timeout" diff --git a/Cargo.toml b/Cargo.toml index 761e703..477467c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ chrono = "0.4.38" diff = "0.1.13" regex = "1.10.4" same-file = "1.0.6" -unicode-width = "0.1.12" +unicode-width = "0.2.0" [dev-dependencies] pretty_assertions = "1.4.0" From c1b66e4a47ab398e441e9414dfb89ada5017114d Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Thu, 26 Sep 2024 22:44:56 +0200 Subject: [PATCH 05/42] When running the upstream test suite, fetch missing tests/init.sh (fixes #90) --- tests/run-upstream-testsuite.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cb59834..cfc20a9 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -59,6 +59,10 @@ cd src ln -s "$binary" diff cd ../tests +# Fetch tests/init.sh from the gnulib repository (needed since +# https://git.savannah.gnu.org/cgit/diffutils.git/commit/tests?id=1d2456f539) +curl -s "$gitserver/gitweb/?p=gnulib.git;a=blob_plain;f=tests/init.sh;hb=HEAD" -o init.sh + if [[ -n "$TESTS" ]] then tests="$TESTS" From 72c7802f0694e44b19832db7f87d9da0c32db5c6 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Sun, 22 Sep 2024 22:48:36 -0300 Subject: [PATCH 06/42] Take utility name as first parameter on diffutils This is in preparation for adding the other diffutils commands, cmp, diff3, sdiff. We use a similar strategy to uutils/coreutils, with the single binary acting as one of the supported tools if called through a symlink with the appropriate name. When using the multi-tool binary directly, the utility needds to be the first parameter. --- src/diff.rs | 98 +++++++++++++++++++++++++++ src/main.rs | 154 ++++++++++++++++--------------------------- src/params.rs | 136 ++++++++++++++++++++++++++++++-------- src/utils.rs | 19 +++++- tests/integration.rs | 16 +++++ 5 files changed, 296 insertions(+), 127 deletions(-) create mode 100644 src/diff.rs diff --git a/src/diff.rs b/src/diff.rs new file mode 100644 index 0000000..6998e2b --- /dev/null +++ b/src/diff.rs @@ -0,0 +1,98 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::params::{parse_params, Format}; +use crate::utils::report_failure_to_read_input_file; +use crate::{context_diff, ed_diff, normal_diff, unified_diff}; +use std::env::ArgsOs; +use std::ffi::OsString; +use std::fs; +use std::io::{self, Read, Write}; +use std::iter::Peekable; +use std::process::{exit, ExitCode}; + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub(crate) fn main(opts: Peekable) -> ExitCode { + let params = parse_params(opts).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }); + // if from and to are the same file, no need to perform any comparison + let maybe_report_identical_files = || { + if params.report_identical_files { + println!( + "Files {} and {} are identical", + params.from.to_string_lossy(), + params.to.to_string_lossy(), + ); + } + }; + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + maybe_report_identical_files(); + return ExitCode::SUCCESS; + } + + // read files + fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } + } + let mut io_error = false; + let from_content = match read_file_contents(¶ms.from) { + Ok(from_content) => from_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); + io_error = true; + vec![] + } + }; + let to_content = match read_file_contents(¶ms.to) { + Ok(to_content) => to_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); + io_error = true; + vec![] + } + }; + if io_error { + return ExitCode::from(2); + } + + // run diff + let result: Vec = match params.format { + Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), + Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), + Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), + Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }), + }; + if params.brief && !result.is_empty() { + println!( + "Files {} and {} differ", + params.from.to_string_lossy(), + params.to.to_string_lossy() + ); + } else { + io::stdout().write_all(&result).unwrap(); + } + if result.is_empty() { + maybe_report_identical_files(); + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} diff --git a/src/main.rs b/src/main.rs index 7e221ea..824b45c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,15 +3,16 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::params::{parse_params, Format}; -use regex::Regex; -use std::env; -use std::ffi::OsString; -use std::fs; -use std::io::{self, Read, Write}; -use std::process::{exit, ExitCode}; +use std::{ + env::ArgsOs, + ffi::OsString, + iter::Peekable, + path::{Path, PathBuf}, + process::ExitCode, +}; mod context_diff; +mod diff; mod ed_diff; mod macros; mod normal_diff; @@ -19,103 +20,60 @@ mod params; mod unified_diff; mod utils; -fn report_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - // std::io::Error's display trait outputs "{detail} (os error {code})" - // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - eprintln!( - "{}: {}: {}", - executable.to_string_lossy(), - filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), - ); +/// # Panics +/// Panics if the binary path cannot be determined +fn binary_path(args: &mut Peekable) -> PathBuf { + match args.peek() { + Some(ref s) if !s.is_empty() => PathBuf::from(s), + _ => std::env::current_exe().unwrap(), + } +} + +fn name(binary_path: &Path) -> Option<&str> { + binary_path.file_stem()?.to_str() +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn usage(name: &str) { + println!("{name} {VERSION} (multi-call binary)\n"); + println!("Usage: {name} [function [arguments...]]\n"); + println!("Currently defined functions:\n"); + println!(" diff\n"); +} + +fn second_arg_error(name: &str) -> ! { + println!("Expected utility name as second argument, got nothing."); + usage(name); + std::process::exit(0); } -// Exit codes are documented at -// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. -// An exit status of 0 means no differences were found, -// 1 means some differences were found, -// and 2 means trouble. fn main() -> ExitCode { - let opts = env::args_os(); - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); + let mut args = std::env::args_os().peekable(); + + let exe_path = binary_path(&mut args); + let exe_name = name(&exe_path).unwrap_or_else(|| { + usage(""); + std::process::exit(1); }); - // if from and to are the same file, no need to perform any comparison - let maybe_report_identical_files = || { - if params.report_identical_files { - println!( - "Files {} and {} are identical", - params.from.to_string_lossy(), - params.to.to_string_lossy(), - ); - } - }; - if params.from == "-" && params.to == "-" - || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) - { - maybe_report_identical_files(); - return ExitCode::SUCCESS; - } - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] - } - }; - if io_error { - return ExitCode::from(2); - } + let util_name = if exe_name == "diffutils" { + // Discard the item we peeked. + let _ = args.next(); - // run diff - let result: Vec = match params.format { - Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), - Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), - Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), - Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }), - }; - if params.brief && !result.is_empty() { - println!( - "Files {} and {} differ", - params.from.to_string_lossy(), - params.to.to_string_lossy() - ); - } else { - io::stdout().write_all(&result).unwrap(); - } - if result.is_empty() { - maybe_report_identical_files(); - ExitCode::SUCCESS + args.peek() + .cloned() + .unwrap_or_else(|| second_arg_error(exe_name)) } else { - ExitCode::from(1) + OsString::from(exe_name) + }; + + match util_name.to_str() { + Some("diff") => diff::main(args), + Some(name) => { + usage(&format!("{}: utility not supported", name)); + ExitCode::from(1) + } + None => second_arg_error(exe_name), } } diff --git a/src/params.rs b/src/params.rs index c671180..9b3abc4 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,4 +1,5 @@ use std::ffi::OsString; +use std::iter::Peekable; use std::path::PathBuf; use regex::Regex; @@ -41,8 +42,7 @@ impl Default for Params { } } -pub fn parse_params>(opts: I) -> Result { - let mut opts = opts.into_iter().peekable(); +pub fn parse_params>(mut opts: Peekable) -> Result { // parse CLI let Some(executable) = opts.next() else { @@ -323,7 +323,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -336,6 +341,7 @@ mod tests { [os("diff"), os("--normal"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -350,7 +356,12 @@ mod tests { format: Format::Ed, ..Default::default() }), - parse_params([os("diff"), os(arg), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os(arg), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); } } @@ -368,7 +379,7 @@ mod tests { format: Format::Context, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -390,7 +401,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -410,7 +421,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -427,7 +438,7 @@ mod tests { format: Format::Unified, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -449,7 +460,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -469,7 +480,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -487,6 +498,7 @@ mod tests { [os("diff"), os("-u54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -502,6 +514,7 @@ mod tests { [os("diff"), os("-U54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -517,6 +530,7 @@ mod tests { [os("diff"), os("-U"), os("54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -532,6 +546,7 @@ mod tests { [os("diff"), os("-c54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -544,7 +559,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -554,7 +574,12 @@ mod tests { report_identical_files: true, ..Default::default() }), - parse_params([os("diff"), os("-s"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -573,6 +598,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) ); } @@ -585,7 +611,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -595,7 +626,12 @@ mod tests { brief: true, ..Default::default() }), - parse_params([os("diff"), os("-q"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-q"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -609,6 +645,7 @@ mod tests { [os("diff"), os("--brief"), os("foo"), os("bar"),] .iter() .cloned() + .peekable() ) ); } @@ -621,7 +658,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); for option in ["-t", "--expand-tabs"] { assert_eq!( @@ -636,6 +678,7 @@ mod tests { [os("diff"), os(option), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -649,7 +692,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -663,6 +711,7 @@ mod tests { [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -677,36 +726,42 @@ mod tests { [os("diff"), os("--tabsize=42"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert!(parse_params( [os("diff"), os("--tabsize"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize="), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( @@ -718,6 +773,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) .is_err()); } @@ -730,7 +786,12 @@ mod tests { to: os("-h"), ..Default::default() }), - parse_params([os("diff"), os("--"), os("-g"), os("-h")].iter().cloned()) + parse_params( + [os("diff"), os("--"), os("-g"), os("-h")] + .iter() + .cloned() + .peekable() + ) ); } #[test] @@ -742,7 +803,7 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("-")].iter().cloned()) + parse_params([os("diff"), os("foo"), os("-")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -751,7 +812,7 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("bar")].iter().cloned()) + parse_params([os("diff"), os("-"), os("bar")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -760,27 +821,45 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("-")].iter().cloned()) + parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!(parse_params([os("diff"), os("foo"), os("bar"), os("-")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-"), os("-"), os("-")].iter().cloned()).is_err()); + assert!(parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); } #[test] fn missing_arguments() { - assert!(parse_params([os("diff")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("foo")].iter().cloned()).is_err()); + assert!(parse_params([os("diff")].iter().cloned().peekable()).is_err()); + assert!(parse_params([os("diff"), os("foo")].iter().cloned().peekable()).is_err()); } #[test] fn unknown_argument() { + assert!(parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); assert!( - parse_params([os("diff"), os("-g"), os("foo"), os("bar")].iter().cloned()).is_err() + parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); - assert!(parse_params([os("diff"), os("-g"), os("bar")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-g")].iter().cloned()).is_err()); + assert!(parse_params([os("diff"), os("-g")].iter().cloned().peekable()).is_err()); } #[test] fn empty() { - assert!(parse_params([].iter().cloned()).is_err()); + assert!(parse_params([].iter().cloned().peekable()).is_err()); } #[test] fn conflicting_output_styles() { @@ -797,6 +876,7 @@ mod tests { [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); } diff --git a/src/utils.rs b/src/utils.rs index df1390d..a216784 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,8 +3,9 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::io::Write; +use std::{ffi::OsString, io::Write}; +use regex::Regex; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -71,6 +72,22 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +pub fn report_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) { + // std::io::Error's display trait outputs "{detail} (os error {code})" + // but we want only the {detail} (error string) part + let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); + eprintln!( + "{}: {}: {}", + executable.to_string_lossy(), + filepath.to_string_lossy(), + error_code_re.replace(error.to_string().as_str(), ""), + ); +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/integration.rs b/tests/integration.rs index f8ad515..2b3fd4f 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -15,6 +15,7 @@ use tempfile::{tempdir, NamedTempFile}; #[test] fn unknown_param() -> Result<(), Box> { let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("--foobar"); cmd.assert() .code(predicate::eq(2)) @@ -37,6 +38,7 @@ fn cannot_read_files() -> Result<(), Box> { let error_message = "The system cannot find the file specified."; let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg(&nopath).arg(file.path()); cmd.assert() .code(predicate::eq(2)) @@ -47,6 +49,7 @@ fn cannot_read_files() -> Result<(), Box> { ))); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg(file.path()).arg(&nopath); cmd.assert() .code(predicate::eq(2)) @@ -57,6 +60,7 @@ fn cannot_read_files() -> Result<(), Box> { ))); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg(&nopath).arg(&nopath); cmd.assert().code(predicate::eq(2)).failure().stderr( predicate::str::contains(format!( @@ -74,6 +78,7 @@ fn no_differences() -> Result<(), Box> { let file = NamedTempFile::new()?; for option in ["", "-u", "-c", "-e"] { let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); } @@ -93,6 +98,7 @@ fn no_differences_report_identical_files() -> Result<(), Box Result<(), Box Result<(), Box> { file2.write_all("bar\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); } @@ -155,6 +163,7 @@ fn differences_brief() -> Result<(), Box> { file2.write_all("bar\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); } @@ -178,6 +187,7 @@ fn missing_newline() -> Result<(), Box> { let mut file2 = NamedTempFile::new()?; file2.write_all("bar".as_bytes())?; let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-e").arg(file1.path()).arg(file2.path()); cmd.assert() .code(predicate::eq(2)) @@ -194,6 +204,7 @@ fn read_from_stdin() -> Result<(), Box> { file2.write_all("bar\n".as_bytes())?; let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u") .arg(file1.path()) .arg("-") @@ -210,6 +221,7 @@ fn read_from_stdin() -> Result<(), Box> { ); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u") .arg("-") .arg(file2.path()) @@ -226,6 +238,7 @@ fn read_from_stdin() -> Result<(), Box> { ); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u").arg("-").arg("-"); cmd.assert() .code(predicate::eq(0)) @@ -235,6 +248,7 @@ fn read_from_stdin() -> Result<(), Box> { #[cfg(unix)] { let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u") .arg(file1.path()) .arg("/dev/stdin") @@ -270,6 +284,7 @@ fn compare_file_to_directory() -> Result<(), Box> { da.write_all(b"da\n").unwrap(); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u").arg(&directory).arg(&a_path); cmd.assert().code(predicate::eq(1)).failure(); @@ -284,6 +299,7 @@ fn compare_file_to_directory() -> Result<(), Box> { ); let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); cmd.arg("-u").arg(&a_path).arg(&directory); cmd.assert().code(predicate::eq(1)).failure(); From f75c1879711f8ec05922f5be00be7447e1f735af Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Fri, 27 Sep 2024 19:45:34 +0200 Subject: [PATCH 07/42] Upstream test suite: correctly handle tests that are skipped (fixes #92) --- tests/run-upstream-testsuite.sh | 52 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cfc20a9..44c56f3 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -75,7 +75,6 @@ total=$(echo "$tests" | wc -w) echo "Running $total tests" export LC_ALL=C export KEEP=yes -exitcode=0 timestamp=$(date -Iseconds) urlroot="$gitserver/cgit/diffutils.git/tree/tests/" passed=0 @@ -90,31 +89,39 @@ do # because other binaries aren't implemented yet if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" then - sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 - json+="{\"test\":\"$test\",\"result\":\"$result\"," - json+="\"url\":\"$url\"," - json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\"," - json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\"," - json+="\"files\":{" - cd gt-$test.* - # Note: this doesn't include the contents of subdirectories, - # but there isn't much value added in doing so - for file in * - do - [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\"," - done - json="${json%,}}}," - cd - > /dev/null - [[ "$result" = "PASS" ]] && (( passed++ )) - [[ "$result" = "FAIL" ]] && (( failed++ )) + sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" + if [[ $? = 77 ]] + then + result="SKIP" + else + json+="{\"test\":\"$test\",\"result\":\"$result\"," + json+="\"url\":\"$url\"," + json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\"," + json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\"," + json+="\"files\":{" + cd gt-$test.* + # Note: this doesn't include the contents of subdirectories, + # but there isn't much value added in doing so + for file in * + do + [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\"," + done + json="${json%,}}}," + cd - > /dev/null + [[ "$result" = "PASS" ]] && (( passed++ )) + [[ "$result" = "FAIL" ]] && (( failed++ )) + fi else result="SKIP" - (( skipped++ )) - json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"}," fi color=2 # green [[ "$result" = "FAIL" ]] && color=1 # red - [[ "$result" = "SKIP" ]] && color=3 # yellow + if [[ $result = "SKIP" ]] + then + (( skipped++ )) + json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"}," + color=3 # yellow + fi printf " %-40s $(tput setaf $color)$result$(tput sgr0)\n" "$test" done echo "" @@ -142,4 +149,5 @@ resultsfile="test-results.json" echo "$json" | jq > "$resultsfile" echo "Results written to $scriptpath/$resultsfile" -exit $exitcode +(( failed > 0 )) && exit 1 +exit 0 From bfdbf6d7b227207e9fc0ff2db16b1f8a0112b3f7 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:37:34 +0000 Subject: [PATCH 08/42] Update Rust crate tempfile to v3.13.0 --- Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24fc712..5afcb6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "float-cmp" @@ -201,15 +201,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.153" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "log" @@ -333,9 +333,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ "bitflags", "errno", @@ -386,9 +386,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.12.0" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", From 26bcc102c0c71b747e1102b7de7fef1adae5ed19 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 29 Sep 2024 15:37:34 +0000 Subject: [PATCH 09/42] Update Rust crate regex to v1.11.0 --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5afcb6d..3330d21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -304,9 +304,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", @@ -316,9 +316,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.5" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -327,9 +327,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" From 50057412bdf43159abb2cd0c412969f5a505be1b Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Mon, 23 Sep 2024 16:27:40 -0300 Subject: [PATCH 10/42] Add cmp utility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utility should support all the arguments supported by GNU cmp and perform slightly better. On a "bad" scenario, ~36M files which are completely different, our version runs in ~72% of the time of the original on my M1 Max: > hyperfine --warmup 1 -i --output=pipe \ 'cmp -l huge huge.3' Benchmark 1: cmp -l huge huge.3 Time (mean ± σ): 3.237 s ± 0.014 s [User: 2.891 s, System: 0.341 s] Range (min … max): 3.221 s … 3.271 s 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 2.392 s ± 0.009 s [User: 1.978 s, System: 0.406 s] Range (min … max): 2.378 s … 2.406 s 10 runs Warning: Ignoring non-zero exit code. Our cmp runs in ~116% of the time when comparing libxul.so to the chromium-browser binary with -l and -b. In a best case scenario of comparing 2 files which are the same except for the last byte, our tool is slightly faster. --- .github/workflows/fuzzing.yml | 2 + fuzz/Cargo.toml | 12 + fuzz/dictionaries/cmp.txt | 36 + fuzz/fuzz_targets/fuzz_cmp.rs | 51 ++ fuzz/fuzz_targets/fuzz_cmp_args.rs | 23 + src/cmp.rs | 1115 ++++++++++++++++++++++++++++ src/diff.rs | 2 +- src/lib.rs | 1 + src/main.rs | 27 +- src/utils.rs | 17 +- tests/integration.rs | 1077 ++++++++++++++++++++------- tests/run-upstream-testsuite.sh | 7 +- 12 files changed, 2089 insertions(+), 281 deletions(-) create mode 100644 fuzz/dictionaries/cmp.txt create mode 100644 fuzz/fuzz_targets/fuzz_cmp.rs create mode 100644 fuzz/fuzz_targets/fuzz_cmp_args.rs create mode 100644 src/cmp.rs diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 589b952..9ad1c17 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -41,6 +41,8 @@ jobs: strategy: matrix: test-target: + - { name: fuzz_cmp, should_pass: true } + - { name: fuzz_cmp_args, should_pass: true } - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 5debf47..8b0b521 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,18 @@ diffutils = { path = "../" } [workspace] members = ["."] +[[bin]] +name = "fuzz_cmp" +path = "fuzz_targets/fuzz_cmp.rs" +test = false +doc = false + +[[bin]] +name = "fuzz_cmp_args" +path = "fuzz_targets/fuzz_cmp_args.rs" +test = false +doc = false + [[bin]] name = "fuzz_patch" path = "fuzz_targets/fuzz_patch.rs" diff --git a/fuzz/dictionaries/cmp.txt b/fuzz/dictionaries/cmp.txt new file mode 100644 index 0000000..0365fef --- /dev/null +++ b/fuzz/dictionaries/cmp.txt @@ -0,0 +1,36 @@ +"-l" +"--verbose" +"-b" +"--print-bytes" +"-lb" +"-bl" +"-n" +"--bytes" +"--bytes=" +"--bytes=1024" +"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-i" +"--ignore-initial" +"--ignore-initial=" +"--ignore-initial=1024" +"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-s" +"-q" +"--quiet" +"--silent" +"-" +"--" +"1kB" +"1G" +"1GB" +"1T" +"1TB" +"1P" +"1PB" +"1Z" +"1ZB" +"1Y" +"1YB" +"1Y" +"0" +"1:2" diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs new file mode 100644 index 0000000..e9d0e4c --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp::{self, Cmp}; + +use std::ffi::OsString; +use std::fs::File; +use std::io::Write; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: (Vec, Vec)| { + let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] + .into_iter() + .map(|s| os(s)) + .peekable(); + + let (from, to) = x; + + File::create("target/fuzz.cmp.a") + .unwrap() + .write_all(&from) + .unwrap(); + + File::create("target/fuzz.cmp.b") + .unwrap() + .write_all(&to) + .unwrap(); + + let params = + cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = cmp::cmp(¶ms); + if from == to && !matches!(ret, Ok(Cmp::Equal)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", + ret + ); + } else if from != to && !matches!(ret, Ok(Cmp::Different)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.", + ret + ); + } else if ret.is_err() { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).", + ret + ); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs new file mode 100644 index 0000000..579cf34 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -0,0 +1,23 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp; + +use libfuzzer_sys::Corpus; +use std::ffi::OsString; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: Vec| -> Corpus { + if x.len() > 6 { + // Make sure we try to parse an option when we get longer args. x[0] will be + // the executable name. + if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) { + return Corpus::Reject; + } + } + let _ = cmp::parse_params(x.into_iter().peekable()); + Corpus::Keep +}); diff --git a/src/cmp.rs b/src/cmp.rs new file mode 100644 index 0000000..29b8775 --- /dev/null +++ b/src/cmp.rs @@ -0,0 +1,1115 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::format_failure_to_read_input_file; +use std::env::{self, ArgsOs}; +use std::ffi::OsString; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::iter::Peekable; +use std::process::ExitCode; +use std::{fs, io}; + +#[cfg(not(target_os = "windows"))] +use std::os::fd::{AsRawFd, FromRawFd}; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::MetadataExt; + +#[cfg(target_os = "windows")] +use std::os::windows::fs::MetadataExt; + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Params { + executable: OsString, + from: OsString, + to: OsString, + print_bytes: bool, + skip_a: Option, + skip_b: Option, + max_bytes: Option, + verbose: bool, + quiet: bool, +} + +#[inline] +fn usage_string(executable: &str) -> String { + format!("Usage: {} ", executable) +} + +#[cfg(not(target_os = "windows"))] +fn is_stdout_dev_null() -> bool { + let Ok(dev_null) = fs::metadata("/dev/null") else { + return false; + }; + + let stdout_fd = io::stdout().lock().as_raw_fd(); + + // SAFETY: we have exclusive access to stdout right now. + let stdout_file = unsafe { fs::File::from_raw_fd(stdout_fd) }; + let Ok(stdout) = stdout_file.metadata() else { + return false; + }; + + let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); + + // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). + std::mem::forget(stdout_file); + + is_dev_null +} + +pub fn parse_params>(mut opts: Peekable) -> Result { + let Some(executable) = opts.next() else { + return Err("Usage: ".to_string()); + }; + let executable_str = executable.to_string_lossy().to_string(); + + let parse_skip = |param: &str, skip_desc: &str| -> Result { + let suffix_start = param + .find(|b: char| !b.is_ascii_digit()) + .unwrap_or(param.len()); + let mut num = match param[..suffix_start].parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )) + } + }; + + if suffix_start != param.len() { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let multiplier: usize = match ¶m[suffix_start..] { + "kB" => 1_000, + "K" => 1_024, + "MB" => 1_000_000, + "M" => 1_048_576, + "GB" => 1_000_000_000, + "G" => 1_073_741_824, + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, + "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, + "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, + "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, + _ => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )); + } + }; + + num = match num.overflowing_mul(multiplier) { + (n, false) => n, + _ => usize::MAX, + } + } + + Ok(num) + }; + + let mut params = Params { + executable, + ..Default::default() + }; + let mut from = None; + let mut to = None; + let mut skip_pos1 = None; + let mut skip_pos2 = None; + while let Some(param) = opts.next() { + if param == "--" { + break; + } + if param == "-" { + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(usage_string(&executable_str)); + } + continue; + } + if param == "-b" || param == "--print-bytes" { + params.print_bytes = true; + continue; + } + if param == "-l" || param == "--verbose" { + params.verbose = true; + continue; + } + if param == "-lb" || param == "-bl" { + params.print_bytes = true; + params.verbose = true; + continue; + } + + let param_str = param.to_string_lossy().to_string(); + if param == "-n" || param_str.starts_with("--bytes=") { + let max_bytes = if param == "-n" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let max_bytes = match max_bytes.parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --bytes value '{}'", + executable_str, max_bytes + )) + } + }; + params.max_bytes = Some(max_bytes); + continue; + } + if param == "-i" || param_str.starts_with("--ignore-initial=") { + let skip_desc = if param == "-i" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let (skip_a, skip_b) = if let Some((skip_a, skip_b)) = skip_desc.split_once(':') { + ( + parse_skip(skip_a, &skip_desc)?, + parse_skip(skip_b, &skip_desc)?, + ) + } else { + let skip = parse_skip(&skip_desc, &skip_desc)?; + (skip, skip) + }; + params.skip_a = Some(skip_a); + params.skip_b = Some(skip_b); + continue; + } + if param == "-s" || param == "--quiet" || param == "--silent" { + params.quiet = true; + continue; + } + if param == "--help" { + println!("{}", usage_string(&executable_str)); + std::process::exit(0); + } + if param_str.starts_with('-') { + return Err(format!("Unknown option: {:?}", param)); + } + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else if skip_pos1.is_none() { + skip_pos1 = Some(parse_skip(¶m_str, ¶m_str)?); + } else if skip_pos2.is_none() { + skip_pos2 = Some(parse_skip(¶m_str, ¶m_str)?); + } else { + return Err(usage_string(&executable_str)); + } + } + + // Do as GNU cmp, and completely disable printing if we are + // outputing to /dev/null. + #[cfg(not(target_os = "windows"))] + if is_stdout_dev_null() { + params.quiet = true; + params.verbose = false; + params.print_bytes = false; + } + + if params.quiet && params.verbose { + return Err(format!( + "{}: options -l and -s are incompatible", + executable_str + )); + } + + params.from = if let Some(from) = from { + from + } else if let Some(param) = opts.next() { + param + } else { + return Err(usage_string(&executable_str)); + }; + params.to = if let Some(to) = to { + to + } else if let Some(param) = opts.next() { + param + } else { + OsString::from("-") + }; + + // GNU cmp ignores positional skip arguments if -i is provided. + if params.skip_a.is_none() { + if skip_pos1.is_some() { + params.skip_a = skip_pos1; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_a = Some(parse_skip(¶m_str, ¶m_str)?); + } + }; + if params.skip_b.is_none() { + if skip_pos2.is_some() { + params.skip_b = skip_pos2; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_b = Some(parse_skip(¶m_str, ¶m_str)?); + } + } + + Ok(params) +} + +fn prepare_reader( + path: &OsString, + skip: &Option, + params: &Params, +) -> Result, String> { + let mut reader: Box = if path == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + match fs::File::open(path) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + }; + + if let Some(skip) = skip { + if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + + Ok(reader) +} + +#[derive(Debug)] +pub enum Cmp { + Equal, + Different, +} + +pub fn cmp(params: &Params) -> Result { + let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; + let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; + + let mut at_byte = 1; + let mut at_line = 1; + let mut start_of_line = true; + let mut verbose_diffs = vec![]; + loop { + // Fill up our buffers. + let from_buf = match from.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.from, + &e, + )); + } + }; + + let to_buf = match to.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.to, + &e, + )); + } + }; + + // Check for EOF conditions. + if from_buf.is_empty() && to_buf.is_empty() { + break; + } + + if from_buf.is_empty() || to_buf.is_empty() { + let eof_on = if from_buf.is_empty() { + ¶ms.from.to_string_lossy() + } else { + ¶ms.to.to_string_lossy() + }; + + if params.verbose { + report_verbose_diffs(verbose_diffs, params)?; + } + + report_eof(at_byte, at_line, start_of_line, eof_on, params); + return Ok(Cmp::Different); + } + + // Fast path - for long files in which almost all bytes are the same we + // can do a direct comparison to let the compiler optimize. + let consumed = std::cmp::min(from_buf.len(), to_buf.len()); + if from_buf[..consumed] == to_buf[..consumed] { + let last = from_buf[..consumed].last().unwrap(); + + at_byte += consumed; + at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); + + start_of_line = *last == b'\n'; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + + from.consume(consumed); + to.consume(consumed); + + continue; + } + + // Iterate over the buffers, the zip iterator will stop us as soon as the + // first one runs out. + for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { + if from_byte != to_byte { + if params.verbose { + verbose_diffs.push((at_byte, from_byte, to_byte)); + } else { + report_difference(from_byte, to_byte, at_byte, at_line, params); + return Ok(Cmp::Different); + } + } + + start_of_line = from_byte == b'\n'; + if start_of_line { + at_line += 1; + } + + at_byte += 1; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + } + + // Notify our readers about the bytes we went over. + from.consume(consumed); + to.consume(consumed); + } + + if params.verbose && !verbose_diffs.is_empty() { + report_verbose_diffs(verbose_diffs, params)?; + return Ok(Cmp::Different); + } + + Ok(Cmp::Equal) +} + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-cmp.html +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = match parse_params(opts) { + Ok(param) => param, + Err(e) => { + eprintln!("{e}"); + return ExitCode::from(2); + } + }; + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.quiet { + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + if a_meta.size() != b_meta.size() { + return ExitCode::from(1); + } + #[cfg(target_os = "windows")] + if a_meta.file_size() != b_meta.file_size() { + return ExitCode::from(1); + } + } + } + + match cmp(¶ms) { + Ok(Cmp::Equal) => ExitCode::SUCCESS, + Ok(Cmp::Different) => ExitCode::from(1), + Err(e) => { + if !params.quiet { + eprintln!("{e}"); + } + ExitCode::from(2) + } + } +} + +#[inline] +fn is_ascii_printable(byte: u8) -> bool { + let c = byte as char; + c.is_ascii() && !c.is_ascii_control() +} + +#[inline] +fn format_byte(byte: u8) -> String { + let mut byte = byte; + let mut quoted = vec![]; + + if !is_ascii_printable(byte) { + if byte >= 128 { + quoted.push(b'M'); + quoted.push(b'-'); + byte -= 128; + } + + if byte < 32 { + quoted.push(b'^'); + byte += 64; + } else if byte == 127 { + quoted.push(b'^'); + byte = b'?'; + } + assert!((byte as char).is_ascii()); + } + + quoted.push(byte); + + // SAFETY: the checks and shifts we do above match what cat and GNU + // cmp do to ensure characters fall inside the ascii range. + unsafe { String::from_utf8_unchecked(quoted) } +} + +fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { + assert!(!params.quiet); + + let mut stdout = BufWriter::new(io::stdout().lock()); + if let Some((offset, _, _)) = diffs.last() { + // Obtain the width of the first column from the last byte offset. + let width = format!("{}", offset).len(); + + if params.print_bytes { + for (at_byte, from_byte, to_byte) in diffs { + writeln!( + stdout, + "{:>width$} {:>3o} {:4} {:>3o} {}", + at_byte, + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte), + ) + .map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } else { + for (at_byte, from_byte, to_byte) in diffs { + writeln!( + stdout, + "{:>width$} {:>3o} {:>3o}", + at_byte, + from_byte, + to_byte, + width = width + ) + .map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } + } + + Ok(()) +} + +#[inline] +fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { + if params.quiet { + return; + } + + if at_byte == 1 { + eprintln!( + "{}: EOF on '{}' which is empty", + params.executable.to_string_lossy(), + eof_on + ); + } else if params.verbose { + eprintln!( + "{}: EOF on '{}' after byte {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + ); + } else if start_of_line { + eprintln!( + "{}: EOF on '{}' after byte {}, line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line - 1 + ); + } else { + eprintln!( + "{}: EOF on '{}' after byte {}, in line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line + ); + } +} + +fn is_posix_locale() -> bool { + let locale = if let Ok(locale) = env::var("LC_ALL") { + locale + } else if let Ok(locale) = env::var("LC_MESSAGES") { + locale + } else if let Ok(locale) = env::var("LANG") { + locale + } else { + "C".to_string() + }; + + locale == "C" || locale == "POSIX" +} + +#[inline] +fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { + if params.quiet { + return; + } + + let term = if is_posix_locale() && !params.print_bytes { + "char" + } else { + "byte" + }; + print!( + "{} {} differ: {term} {}, line {}", + ¶ms.from.to_string_lossy(), + ¶ms.to.to_string_lossy(), + at_byte, + at_line + ); + if params.print_bytes { + let char_width = if to_byte >= 0x7F { 2 } else { 1 }; + print!( + " is {:>3o} {:char_width$} {:>3o} {:char_width$}", + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte) + ); + } + println!(); +} + +#[cfg(test)] +mod tests { + use super::*; + fn os(s: &str) -> OsString { + OsString::from(s) + } + + #[test] + fn positional() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo"), os("bar")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("-"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("--help"), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("--"), os("--help")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: None, + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2Y")] + .iter() + .cloned() + .peekable() + ) + ); + + // Bad positional arguments. + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2"), os("3")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params([os("cmp")].iter().cloned().peekable()) + ); + } + + #[test] + fn execution_modes() { + let print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + ..Default::default() + }; + assert_eq!( + Ok(print_bytes.clone()), + parse_params( + [os("cmp"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(print_bytes), + parse_params( + [os("cmp"), os("--print-bytes"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose.clone()), + parse_params( + [os("cmp"), os("-l"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose), + parse_params( + [os("cmp"), os("--verbose"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose_and_print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-l"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-lb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes), + parse_params( + [os("cmp"), os("-bl"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + quiet: true, + ..Default::default() + }), + parse_params( + [os("cmp"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + // Some options do not mix. + assert_eq!( + Err("cmp: options -l and -s are incompatible".to_string()), + parse_params( + [os("cmp"), os("-l"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn max_bytes() { + let max_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(max_bytes.clone()), + parse_params( + [os("cmp"), os("-n"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(max_bytes), + parse_params( + [os("cmp"), os("--bytes=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--bytes=99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure case + assert_eq!( + Err("cmp: invalid --bytes value '1K'".to_string()), + parse_params( + [os("cmp"), os("--bytes=1K"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn skips() { + let skips = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(skips.clone()), + parse_params( + [os("cmp"), os("-i"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(skips), + parse_params( + [os("cmp"), os("--ignore-initial=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(usize::MAX), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [os("cmp"), os("--ignore-initial=1:2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1_000_000_000), + skip_b: Some(1_152_921_504_606_846_976 * 2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1GB:2E"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // All special suffixes. + for (i, suffixes) in [ + ["kB", "K"], + ["MB", "M"], + ["GB", "G"], + ["TB", "T"], + ["PB", "P"], + ["EB", "E"], + ["ZB", "Z"], + ["YB", "Y"], + ] + .iter() + .enumerate() + { + let values = [ + 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + ]; + for (j, v) in values.iter().enumerate() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(*v), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os(&format!("1{}:2", suffixes[j])), + os("foo"), + os("bar"), + ] + .iter() + .cloned() + .peekable() + ) + ); + } + } + + // Ignores positional arguments when -i is provided. + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("1:2"), + os("foo"), + os("bar"), + os("3"), + os("4") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure cases + assert_eq!( + Err("cmp: invalid --ignore-initial value '1mb'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=1mb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '1:2:3'".to_string()), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1:2:3"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '-1'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } +} diff --git a/src/diff.rs b/src/diff.rs index 6998e2b..f769a29 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -18,7 +18,7 @@ use std::process::{exit, ExitCode}; // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub(crate) fn main(opts: Peekable) -> ExitCode { +pub fn main(opts: Peekable) -> ExitCode { let params = parse_params(opts).unwrap_or_else(|error| { eprintln!("{error}"); exit(2); diff --git a/src/lib.rs b/src/lib.rs index 0bb911b..a20ac56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; diff --git a/src/main.rs b/src/main.rs index 824b45c..8194d00 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,12 +5,13 @@ use std::{ env::ArgsOs, - ffi::OsString, + ffi::{OsStr, OsString}, iter::Peekable, path::{Path, PathBuf}, process::ExitCode, }; +mod cmp; mod context_diff; mod diff; mod ed_diff; @@ -29,8 +30,10 @@ fn binary_path(args: &mut Peekable) -> PathBuf { } } -fn name(binary_path: &Path) -> Option<&str> { - binary_path.file_stem()?.to_str() +/// #Panics +/// Panics if path has no UTF-8 valid name +fn name(binary_path: &Path) -> &OsStr { + binary_path.file_stem().unwrap() } const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -39,12 +42,12 @@ fn usage(name: &str) { println!("{name} {VERSION} (multi-call binary)\n"); println!("Usage: {name} [function [arguments...]]\n"); println!("Currently defined functions:\n"); - println!(" diff\n"); + println!(" cmp, diff\n"); } -fn second_arg_error(name: &str) -> ! { - println!("Expected utility name as second argument, got nothing."); - usage(name); +fn second_arg_error(name: &OsStr) -> ! { + eprintln!("Expected utility name as second argument, got nothing."); + usage(&name.to_string_lossy()); std::process::exit(0); } @@ -52,10 +55,7 @@ fn main() -> ExitCode { let mut args = std::env::args_os().peekable(); let exe_path = binary_path(&mut args); - let exe_name = name(&exe_path).unwrap_or_else(|| { - usage(""); - std::process::exit(1); - }); + let exe_name = name(&exe_path); let util_name = if exe_name == "diffutils" { // Discard the item we peeked. @@ -70,9 +70,10 @@ fn main() -> ExitCode { match util_name.to_str() { Some("diff") => diff::main(args), + Some("cmp") => cmp::main(args), Some(name) => { - usage(&format!("{}: utility not supported", name)); - ExitCode::from(1) + eprintln!("{}: utility not supported", name); + ExitCode::from(2) } None => second_arg_error(exe_name), } diff --git a/src/utils.rs b/src/utils.rs index a216784..88b39ff 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -72,19 +72,30 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } -pub fn report_failure_to_read_input_file( +pub fn format_failure_to_read_input_file( executable: &OsString, filepath: &OsString, error: &std::io::Error, -) { +) -> String { // std::io::Error's display trait outputs "{detail} (os error {code})" // but we want only the {detail} (error string) part let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - eprintln!( + format!( "{}: {}: {}", executable.to_string_lossy(), filepath.to_string_lossy(), error_code_re.replace(error.to_string().as_str(), ""), + ) +} + +pub fn report_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) { + eprintln!( + "{}", + format_failure_to_read_input_file(executable, filepath, error) ); } diff --git a/tests/integration.rs b/tests/integration.rs index 2b3fd4f..4cff8ff 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -4,314 +4,869 @@ // files that was distributed with this source code. use assert_cmd::cmd::Command; -use diffutilslib::assert_diff_eq; use predicates::prelude::*; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io::Write; use tempfile::{tempdir, NamedTempFile}; // Integration tests for the diffutils command +mod common { + use super::*; -#[test] -fn unknown_param() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("--foobar"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); - Ok(()) -} + #[test] + fn unknown_param() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("patch"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::eq("patch: utility not supported\n")); -#[test] -fn cannot_read_files() -> Result<(), Box> { - let file = NamedTempFile::new()?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::starts_with( + "Expected utility name as second argument, got nothing.\n", + )); + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); + } + Ok(()) + } - let nofile = NamedTempFile::new()?; - let nopath = nofile.into_temp_path(); - std::fs::remove_file(&nopath)?; + #[test] + fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let nofile = NamedTempFile::new()?; + let nopath = nofile.into_temp_path(); + std::fs::remove_file(&nopath)?; + + #[cfg(not(windows))] + let error_message = "No such file or directory"; + #[cfg(windows)] + let error_message = "The system cannot find the file specified."; + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(&nopath).arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(file.path()).arg(&nopath); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + } - #[cfg(not(windows))] - let error_message = "No such file or directory"; - #[cfg(windows)] - let error_message = "The system cannot find the file specified."; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(&nopath).arg(file.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(file.path()).arg(&nopath); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); - - Ok(()) + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg(&nopath).arg(&nopath); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + )) + .count(2), + ); + + Ok(()) + } } -#[test] -fn no_differences() -> Result<(), Box> { - let file = NamedTempFile::new()?; - for option in ["", "-u", "-c", "-e"] { +mod diff { + use diffutilslib::assert_diff_eq; + + use super::*; + + #[test] + fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) + } + + #[test] + fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) + } + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) + } + + #[test] + fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); + } + Ok(()) + } + + #[test] + fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; let mut cmd = Command::cargo_bin("diffutils")?; cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file.path()).arg(file.path()); + cmd.arg("-e").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("No newline at end of file")); + Ok(()) + } + + #[test] + fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg("-").arg("-"); cmd.assert() .code(predicate::eq(0)) .success() .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) } - Ok(()) -} -#[test] -fn no_differences_report_identical_files() -> Result<(), Box> { - // same file - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn compare_file_to_directory() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let directory = tmp_dir.path().join("d"); + let _ = std::fs::create_dir(&directory); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let da_path = directory.join("a"); + let mut da = File::create(&da_path).unwrap(); + da.write_all(b"da\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.arg("-u").arg(&directory).arg(&a_path); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", + da_path.display(), + a_path.display() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&a_path).arg(&directory); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", + a_path.display(), + da_path.display() + ) + ); + + Ok(()) + } +} + +mod cmp { + use super::*; + + #[test] + fn cmp_incompatible_params() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-s"); + cmd.arg("/etc/passwd").arg("/etc/group"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with( + ": options -l and -s are incompatible\n", + )); + + Ok(()) + } + + #[test] + fn cmp_stdin() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("a\n"); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file1.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("b\n"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); + + Ok(()) } - // two files with the same content - let mut file2 = NamedTempFile::new()?; - file2.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + + #[test] + fn cmp_equal_files() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"a\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) } - Ok(()) -} -#[test] -fn differences() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_one_file_empty() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let _ = File::create(&b_path).unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::str::is_empty().not()); + .stderr(predicate::str::contains(" EOF on ")) + .stderr(predicate::str::ends_with(" which is empty\n")); + + Ok(()) } - Ok(()) -} -#[test] -fn differences_brief() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_immediate_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"bcd\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::eq(format!( - "Files {} and {} differ\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy() - ))); + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 1, line 1 is 141 a 142 b\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq( + "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", + )); + + Ok(()) } - Ok(()) -} -#[test] -fn missing_newline() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-e").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("No newline at end of file")); - Ok(()) -} + #[test] + fn cmp_newline_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\ndefg").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abc\ndef\ng").unwrap(); -#[test] -fn read_from_stdin() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("-") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg("-") - .arg(file2.path()) - .write_stdin("foo\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file2.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg("-").arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - - #[cfg(unix)] - { let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("/dev/stdin") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 8, line 2 is 147 g 12 ^J\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 12\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + Ok(()) } - Ok(()) -} + #[test] + fn cmp_max_bytes() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("13"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n8 40 150 h\n")); + Ok(()) + } + + #[test] + fn cmp_skip_args_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"---abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"###abc\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Positional skips should be ignored + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("1").arg("1"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Single positional argument should only affect first file. + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip_suffix_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + write!(a, "{}c\n", "a".repeat(1024)).unwrap(); + a.flush().unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + write!(b, "{}c\n", "b".repeat(1024)).unwrap(); + b.flush().unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("--ignore-initial=1K"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("8"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 4, line 1 is 40 150 h\n", + )); + + Ok(()) + } + + #[test] + fn cmp_binary() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + + bytes.reverse(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); -#[test] -fn compare_file_to_directory() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let directory = tmp_dir.path().join("d"); - let _ = std::fs::create_dir(&directory); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let da_path = directory.join("a"); - let mut da = File::create(&da_path).unwrap(); - da.write_all(b"da\n").unwrap(); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg(&directory).arg(&a_path); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", - da_path.display(), - a_path.display() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg(&a_path).arg(&directory); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", - a_path.display(), - da_path.display() - ) - ); - - Ok(()) + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(concat!( + " 1 0 ^@ 377 M-^?\n", + " 2 17 ^O 376 M-~\n", + " 3 37 ^_ 310 M-H\n", + " 4 40 201 M-^A\n", + " 5 41 ! 200 M-^@\n", + " 6 50 ( 177 ^?\n", + " 7 100 @ 176 ~\n", + " 8 176 ~ 100 @\n", + " 9 177 ^? 50 (\n", + "10 200 M-^@ 41 !\n", + "11 201 M-^A 40 \n", + "12 310 M-H 37 ^_\n", + "13 376 M-~ 17 ^O\n", + "14 377 M-^? 0 ^@\n" + ))); + + Ok(()) + } + + #[test] + #[cfg(not(windows))] + fn cmp_fast_paths() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + // This test mimics one found in the GNU cmp test suite. It is used for + // validating the /dev/null optimization. + let a_path = tmp_dir.path().join("a"); + let a = File::create(&a_path).unwrap(); + a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let b_path = tmp_dir.path().join("b"); + let b = File::create(&b_path).unwrap(); + b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); + + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils")) + .arg("cmp") + .arg(&a_path) + .arg(&b_path) + .stdout(dev_null) + .spawn() + .unwrap(); + + std::thread::sleep(std::time::Duration::from_millis(100)); + + assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1)); + + // Two stdins should be equal + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-"); + cmd.arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Files with longer than block size equal segments should still report + // the correct line number for the difference. Assumes 8KB block size (see + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), + // create a 24KB equality. + let mut bytes = " ".repeat(4095); + bytes.push('\n'); + bytes.push_str(&" ".repeat(4096)); + + let bytes = bytes.repeat(3); + let bytes = bytes.as_bytes(); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + a.write_all(b"A").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + b.write_all(b"B").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); + + Ok(()) + } } diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cb59834..f75b0b3 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -21,7 +21,7 @@ # (e.g. 'dev' or 'test'). # Unless overridden by the $TESTS environment variable, all tests in the test # suite will be run. Tests targeting a command that is not yet implemented -# (e.g. cmp, diff3 or sdiff) are skipped. +# (e.g. diff3 or sdiff) are skipped. scriptpath=$(dirname "$(readlink -f "$0")") rev=$(git rev-parse HEAD) @@ -57,6 +57,7 @@ upstreamrev=$(git rev-parse HEAD) mkdir src cd src ln -s "$binary" diff +ln -s "$binary" cmp cd ../tests if [[ -n "$TESTS" ]] @@ -82,9 +83,9 @@ for test in $tests do result="FAIL" url="$urlroot$test?id=$upstreamrev" - # Run only the tests that invoke `diff`, + # Run only the tests that invoke `diff` or `cmp`, # because other binaries aren't implemented yet - if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" + if ! grep -E -s -q "(diff3|sdiff)" "$test" then sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 json+="{\"test\":\"$test\",\"result\":\"$result\"," From 2e681301b4c652fd8eab304570eaedc3f7ffd663 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Tue, 24 Sep 2024 22:15:12 -0300 Subject: [PATCH 11/42] cmp: avoid using advanced rust formatting for -l Octal conversion and simple integer to string both show up in profiling. This change improves comparing ~36M completely different files wth both -l and -b by ~11-13%. --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/cmp.rs | 40 ++++++++++++++++++++++++++++++++-------- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24fc712..3291450 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,6 +128,7 @@ dependencies = [ "assert_cmd", "chrono", "diff", + "itoa", "predicates", "pretty_assertions", "regex", @@ -190,6 +191,12 @@ dependencies = [ "cc", ] +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + [[package]] name = "js-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 477467c..6fa1a3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" diff = "0.1.13" +itoa = "1.0.11" regex = "1.10.4" same-file = "1.0.6" unicode-width = "0.2.0" diff --git a/src/cmp.rs b/src/cmp.rs index 29b8775..a337c12 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -483,6 +483,24 @@ fn is_ascii_printable(byte: u8) -> bool { c.is_ascii() && !c.is_ascii_control() } +#[inline] +fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { + *buf = [b' ', b' ', b'0']; + + let mut num = byte; + let mut idx = 2; // Start at the last position in the buffer + + // Generate octal digits + while num > 0 { + buf[idx] = b'0' + num % 8; + num /= 8; + idx = idx.saturating_sub(1); + } + + // SAFETY: the operations we do above always land within ascii range. + unsafe { std::str::from_utf8_unchecked(&buf[..]) } +} + #[inline] fn format_byte(byte: u8) -> String { let mut byte = byte; @@ -520,15 +538,20 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< // Obtain the width of the first column from the last byte offset. let width = format!("{}", offset).len(); + let mut at_byte_buf = itoa::Buffer::new(); + let mut from_oct = [0u8; 3]; // for octal conversions + let mut to_oct = [0u8; 3]; + if params.print_bytes { for (at_byte, from_byte, to_byte) in diffs { + let at_byte_str = at_byte_buf.format(at_byte); writeln!( stdout, - "{:>width$} {:>3o} {:4} {:>3o} {}", - at_byte, - from_byte, + "{:>width$} {} {:4} {} {}", + at_byte_str, + format_octal(from_byte, &mut from_oct), format_byte(from_byte), - to_byte, + format_octal(to_byte, &mut to_oct), format_byte(to_byte), ) .map_err(|e| { @@ -540,12 +563,13 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< } } else { for (at_byte, from_byte, to_byte) in diffs { + let at_byte_str = at_byte_buf.format(at_byte); writeln!( stdout, - "{:>width$} {:>3o} {:>3o}", - at_byte, - from_byte, - to_byte, + "{:>width$} {} {}", + at_byte_str, + format_octal(from_byte, &mut from_oct), + format_octal(to_byte, &mut to_oct), width = width ) .map_err(|e| { From fac8dab1823e8b7c7cfe5c468528dc0842a5daab Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Tue, 24 Sep 2024 22:31:35 -0300 Subject: [PATCH 12/42] cmp: completely avoid Rust fmt in verbose mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the code less readable, but gets us a massive improvement to performance. Comparing ~36M completely different files now takes ~40% of the time. Compared to GNU cmp, we now run the same comparison in ~26% of the time. This also improves comparing binary files. A comparison of chromium and libxul now takes ~60% of the time. We also beat GNU cmpi by about the same margin. Before: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 2.000 s ± 0.016 s [User: 1.603 s, System: 0.392 s] Range (min … max): 1.989 s … 2.043 s 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 24.704 s ± 0.162 s [User: 21.948 s, System: 2.700 s] Range (min … max): 24.359 s … 24.889 s 10 runs Warning: Ignoring non-zero exit code. After: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 849.5 ms ± 6.2 ms [User: 538.3 ms, System: 306.8 ms] Range (min … max): 839.4 ms … 857.7 ms 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 14.646 s ± 0.040 s [User: 12.328 s, System: 2.286 s] Range (min … max): 14.585 s … 14.702 s 10 runs Warning: Ignoring non-zero exit code. --- src/cmp.rs | 84 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/src/cmp.rs b/src/cmp.rs index a337c12..1d9ca9e 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -530,6 +530,9 @@ fn format_byte(byte: u8) -> String { unsafe { String::from_utf8_unchecked(quoted) } } +// This function has been optimized to not use the Rust fmt system, which +// leads to a massive speed up when processing large files: cuts the time +// for comparing 2 ~36MB completely different files in half on an M1 Max. fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { assert!(!params.quiet); @@ -542,19 +545,49 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< let mut from_oct = [0u8; 3]; // for octal conversions let mut to_oct = [0u8; 3]; + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces + let mut output = Vec::::with_capacity(width + 3 * 2 + 4 + 2 + 4); + if params.print_bytes { for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:4} {:>3o} {}", let at_byte_str = at_byte_buf.format(at_byte); - writeln!( - stdout, - "{:>width$} {} {:4} {} {}", - at_byte_str, - format_octal(from_byte, &mut from_oct), - format_byte(from_byte), - format_octal(to_byte, &mut to_oct), - format_byte(to_byte), - ) - .map_err(|e| { + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + let from_byte_str = format_byte(from_byte); + let from_byte_padding = 4 - from_byte_str.len(); + + output.extend_from_slice(from_byte_str.as_bytes()); + + for _ in 0..from_byte_padding { + output.push(b' ') + } + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_byte(to_byte).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { format!( "{}: error printing output: {e}", params.executable.to_string_lossy() @@ -563,16 +596,29 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< } } else { for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:>3o}" let at_byte_str = at_byte_buf.format(at_byte); - writeln!( - stdout, - "{:>width$} {} {}", - at_byte_str, - format_octal(from_byte, &mut from_oct), - format_octal(to_byte, &mut to_oct), - width = width - ) - .map_err(|e| { + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { format!( "{}: error printing output: {e}", params.executable.to_string_lossy() From 0bf04b439536931c4052cc6a48c1cf39e7abcc01 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 2 Oct 2024 14:00:43 +0200 Subject: [PATCH 13/42] README.md: be explicit with the list of tools (#99) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 552df09..fae06d6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![CodeCov](https://codecov.io/gh/uutils/diffutils/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/diffutils) -The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) in Rust. +The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) (diff, cmp, diff3, sdiff) in Rust. Based on the incomplete diff generator in https://github.com/rust-lang/rust/blob/master/src/tools/compiletest/src/runtest.rs, and made to be compatible with GNU's diff and patch tools. From a31626260389160fd3d0e87906af8d1ab2358e31 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Sat, 5 Oct 2024 08:09:27 -0300 Subject: [PATCH 14/42] cmp: print verbose diffs as we find them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change, we would first find all changes so we could obtain the largest offset we will report and use that to set up the padding. Now we use the file sizes to estimate the largest possible offset. Not only does this allow us to print earlier, reduces memory usage, as we do not store diffs to report later, but it also fixes a case in which our output was different to GNU cmp's - because it also seems to estimate based on size. Memory usage drops by a factor of 1000(!), without losing performance while comparing 2 binaries of hundreds of MBs: Before: Maximum resident set size (kbytes): 2489260 Benchmark 1: ../target/release/diffutils \ cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 14.466 s ± 0.166 s [User: 12.367 s, System: 2.012 s] Range (min … max): 14.350 s … 14.914 s 10 runs After: Maximum resident set size (kbytes): 2636 Benchmark 1: ../target/release/diffutils \ cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 13.724 s ± 0.038 s [User: 12.263 s, System: 1.372 s] Range (min … max): 13.667 s … 13.793 s 10 runs --- src/cmp.rs | 190 +++++++++++++++++++++---------------------- tests/integration.rs | 2 +- 2 files changed, 93 insertions(+), 99 deletions(-) diff --git a/src/cmp.rs b/src/cmp.rs index 1d9ca9e..c0fc397 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -9,7 +9,7 @@ use std::ffi::OsString; use std::io::{BufRead, BufReader, BufWriter, Read, Write}; use std::iter::Peekable; use std::process::ExitCode; -use std::{fs, io}; +use std::{cmp, fs, io}; #[cfg(not(target_os = "windows"))] use std::os::fd::{AsRawFd, FromRawFd}; @@ -320,10 +320,35 @@ pub fn cmp(params: &Params) -> Result { let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; + let mut offset_width = params.max_bytes.unwrap_or(usize::MAX); + + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + let (a_size, b_size) = (a_meta.size(), b_meta.size()); + + #[cfg(target_os = "windows")] + let (a_size, b_size) = (a_meta.file_size(), b_meta.file_size()); + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.quiet && a_size != b_size { + return Ok(Cmp::Different); + } + + let smaller = cmp::min(a_size, b_size) as usize; + offset_width = cmp::min(smaller, offset_width); + } + + let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize; + + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces + let mut output = Vec::::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4); + let mut at_byte = 1; let mut at_line = 1; let mut start_of_line = true; - let mut verbose_diffs = vec![]; + let mut stdout = BufWriter::new(io::stdout().lock()); + let mut compare = Cmp::Equal; loop { // Fill up our buffers. let from_buf = match from.fill_buf() { @@ -360,10 +385,6 @@ pub fn cmp(params: &Params) -> Result { ¶ms.to.to_string_lossy() }; - if params.verbose { - report_verbose_diffs(verbose_diffs, params)?; - } - report_eof(at_byte, at_line, start_of_line, eof_on, params); return Ok(Cmp::Different); } @@ -395,8 +416,24 @@ pub fn cmp(params: &Params) -> Result { // first one runs out. for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { if from_byte != to_byte { + compare = Cmp::Different; + if params.verbose { - verbose_diffs.push((at_byte, from_byte, to_byte)); + format_verbose_difference( + from_byte, + to_byte, + at_byte, + offset_width, + &mut output, + params, + )?; + stdout.write_all(output.as_slice()).map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + output.clear(); } else { report_difference(from_byte, to_byte, at_byte, at_line, params); return Ok(Cmp::Different); @@ -422,12 +459,7 @@ pub fn cmp(params: &Params) -> Result { to.consume(consumed); } - if params.verbose && !verbose_diffs.is_empty() { - report_verbose_diffs(verbose_diffs, params)?; - return Ok(Cmp::Different); - } - - Ok(Cmp::Equal) + Ok(compare) } // Exit codes are documented at @@ -450,21 +482,6 @@ pub fn main(opts: Peekable) -> ExitCode { return ExitCode::SUCCESS; } - // If the files have different sizes, we already know they are not identical. If we have not - // been asked to show even the first difference, we can quit early. - if params.quiet { - if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { - #[cfg(not(target_os = "windows"))] - if a_meta.size() != b_meta.size() { - return ExitCode::from(1); - } - #[cfg(target_os = "windows")] - if a_meta.file_size() != b_meta.file_size() { - return ExitCode::from(1); - } - } - } - match cmp(¶ms) { Ok(Cmp::Equal) => ExitCode::SUCCESS, Ok(Cmp::Different) => ExitCode::from(1), @@ -533,99 +550,76 @@ fn format_byte(byte: u8) -> String { // This function has been optimized to not use the Rust fmt system, which // leads to a massive speed up when processing large files: cuts the time // for comparing 2 ~36MB completely different files in half on an M1 Max. -fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { +#[inline] +fn format_verbose_difference( + from_byte: u8, + to_byte: u8, + at_byte: usize, + offset_width: usize, + output: &mut Vec, + params: &Params, +) -> Result<(), String> { assert!(!params.quiet); - let mut stdout = BufWriter::new(io::stdout().lock()); - if let Some((offset, _, _)) = diffs.last() { - // Obtain the width of the first column from the last byte offset. - let width = format!("{}", offset).len(); - - let mut at_byte_buf = itoa::Buffer::new(); - let mut from_oct = [0u8; 3]; // for octal conversions - let mut to_oct = [0u8; 3]; - - // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces - let mut output = Vec::::with_capacity(width + 3 * 2 + 4 + 2 + 4); - - if params.print_bytes { - for (at_byte, from_byte, to_byte) in diffs { - output.clear(); - - // "{:>width$} {:>3o} {:4} {:>3o} {}", - let at_byte_str = at_byte_buf.format(at_byte); - let at_byte_padding = width - at_byte_str.len(); + let mut at_byte_buf = itoa::Buffer::new(); + let mut from_oct = [0u8; 3]; // for octal conversions + let mut to_oct = [0u8; 3]; - for _ in 0..at_byte_padding { - output.push(b' ') - } - - output.extend_from_slice(at_byte_str.as_bytes()); - - output.push(b' '); + if params.print_bytes { + // "{:>width$} {:>3o} {:4} {:>3o} {}", + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width.saturating_sub(at_byte_str.len()); - output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + for _ in 0..at_byte_padding { + output.push(b' ') + } - output.push(b' '); + output.extend_from_slice(at_byte_str.as_bytes()); - let from_byte_str = format_byte(from_byte); - let from_byte_padding = 4 - from_byte_str.len(); + output.push(b' '); - output.extend_from_slice(from_byte_str.as_bytes()); + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); - for _ in 0..from_byte_padding { - output.push(b' ') - } + output.push(b' '); - output.push(b' '); + let from_byte_str = format_byte(from_byte); + let from_byte_padding = 4 - from_byte_str.len(); - output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + output.extend_from_slice(from_byte_str.as_bytes()); - output.push(b' '); + for _ in 0..from_byte_padding { + output.push(b' ') + } - output.extend_from_slice(format_byte(to_byte).as_bytes()); + output.push(b' '); - output.push(b'\n'); + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); - stdout.write_all(output.as_slice()).map_err(|e| { - format!( - "{}: error printing output: {e}", - params.executable.to_string_lossy() - ) - })?; - } - } else { - for (at_byte, from_byte, to_byte) in diffs { - output.clear(); + output.push(b' '); - // "{:>width$} {:>3o} {:>3o}" - let at_byte_str = at_byte_buf.format(at_byte); - let at_byte_padding = width - at_byte_str.len(); + output.extend_from_slice(format_byte(to_byte).as_bytes()); - for _ in 0..at_byte_padding { - output.push(b' ') - } + output.push(b'\n'); + } else { + // "{:>width$} {:>3o} {:>3o}" + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width - at_byte_str.len(); - output.extend_from_slice(at_byte_str.as_bytes()); + for _ in 0..at_byte_padding { + output.push(b' ') + } - output.push(b' '); + output.extend_from_slice(at_byte_str.as_bytes()); - output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + output.push(b' '); - output.push(b' '); + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); - output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + output.push(b' '); - output.push(b'\n'); + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); - stdout.write_all(output.as_slice()).map_err(|e| { - format!( - "{}: error printing output: {e}", - params.executable.to_string_lossy() - ) - })?; - } - } + output.push(b'\n'); } Ok(()) diff --git a/tests/integration.rs b/tests/integration.rs index 4cff8ff..5619b1a 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -616,7 +616,7 @@ mod cmp { .code(predicate::eq(1)) .failure() .stderr(predicate::str::is_empty()) - .stdout(predicate::eq("4 40 144 d\n8 40 150 h\n")); + .stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n")); Ok(()) } From c70cc1921c7739a406b052b9d2af8643343e7313 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 18 Oct 2024 09:08:07 +0200 Subject: [PATCH 15/42] Fix warnings from write_with_newline lint --- tests/integration.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index 5619b1a..c2ef299 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -691,12 +691,12 @@ mod cmp { let a_path = tmp_dir.path().join("a"); let mut a = File::create(&a_path).unwrap(); - write!(a, "{}c\n", "a".repeat(1024)).unwrap(); + writeln!(a, "{}c", "a".repeat(1024)).unwrap(); a.flush().unwrap(); let b_path = tmp_dir.path().join("b"); let mut b = File::create(&b_path).unwrap(); - write!(b, "{}c\n", "b".repeat(1024)).unwrap(); + writeln!(b, "{}c", "b".repeat(1024)).unwrap(); b.flush().unwrap(); let mut cmd = Command::cargo_bin("diffutils")?; From 1910cbfe5866649fdf121ae3966db82664e7a2a3 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 18 Oct 2024 09:10:03 +0200 Subject: [PATCH 16/42] Fix warnings from needless_borrow lint --- tests/integration.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index c2ef299..c11726e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -851,12 +851,12 @@ mod cmp { let a_path = tmp_dir.path().join("a"); let mut a = File::create(&a_path).unwrap(); - a.write_all(&bytes).unwrap(); + a.write_all(bytes).unwrap(); a.write_all(b"A").unwrap(); let b_path = tmp_dir.path().join("b"); let mut b = File::create(&b_path).unwrap(); - b.write_all(&bytes).unwrap(); + b.write_all(bytes).unwrap(); b.write_all(b"B").unwrap(); let mut cmd = Command::cargo_bin("diffutils")?; From 3de1930bbe6fc705375bc8399855a7bd7c6bc961 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:05:12 +0000 Subject: [PATCH 17/42] fix(deps): update rust crate regex to v1.11.1 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe461de..3b9e4da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -311,9 +311,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", From 14b062251fbfbb6138a84b271ec8a293511cc92a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 8 Nov 2024 03:47:37 +0000 Subject: [PATCH 18/42] chore(deps): update rust crate tempfile to v3.14.0 --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b9e4da..bebb053 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -208,9 +208,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "linux-raw-sys" @@ -340,9 +340,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" dependencies = [ "bitflags", "errno", @@ -393,9 +393,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", From 4f2f86902118f7f74f3212f8ab1d32b85b9ff5f0 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 8 Nov 2024 09:06:33 +0100 Subject: [PATCH 19/42] Fix "unused import" warning on Windows --- tests/integration.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration.rs b/tests/integration.rs index c11726e..cfbf529 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -5,7 +5,9 @@ use assert_cmd::cmd::Command; use predicates::prelude::*; -use std::fs::{File, OpenOptions}; +use std::fs::File; +#[cfg(not(windows))] +use std::fs::OpenOptions; use std::io::Write; use tempfile::{tempdir, NamedTempFile}; From 90bed40046f6e261532763a3dd4967a1a032789e Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 8 Nov 2024 09:25:17 +0100 Subject: [PATCH 20/42] ci: remove CARGO_FEATURES_OPTION --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f0e5ca..732fa2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,8 +102,6 @@ jobs: if [ -n "${{ matrix.job.toolchain }}" ]; then TOOLCHAIN="${{ matrix.job.toolchain }}" ; fi outputs TOOLCHAIN # target-specific options - # * CARGO_FEATURES_OPTION - CARGO_FEATURES_OPTION='--all -- --check' ; ## default to '--all-features' for code coverage # * CODECOV_FLAGS CODECOV_FLAGS=$( echo "${{ matrix.job.os }}" | sed 's/[^[:alnum:]]/_/g' ) outputs CODECOV_FLAGS @@ -119,7 +117,7 @@ jobs: if: runner.os == 'Windows' run: echo "C:\Program Files\Git\usr\bin" >> $env:GITHUB_PATH - name: Test - run: cargo test ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-fail-fast + run: cargo test --all-features --no-fail-fast env: CARGO_INCREMENTAL: "0" RUSTC_WRAPPER: "" From 39e092488b8b0e88ac867f8e73d20ac4f5404091 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 16 Nov 2024 07:49:05 +0000 Subject: [PATCH 21/42] chore(deps): update codecov/codecov-action action to v5 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 732fa2d..cb118ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -156,7 +156,7 @@ jobs: grcov . --output-type lcov --output-path "${COVERAGE_REPORT_FILE}" --binary-path "${COVERAGE_REPORT_DIR}" --branch echo "report=${COVERAGE_REPORT_FILE}" >> $GITHUB_OUTPUT - name: Upload coverage results (to Codecov.io) - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} file: ${{ steps.coverage.outputs.report }} From 4ff2d6b1829a11361945a581e644b8e68230e929 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sat, 16 Nov 2024 09:42:50 +0100 Subject: [PATCH 22/42] ci: fix deprecated codecov argument --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb118ac..97434f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -159,7 +159,7 @@ jobs: uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ${{ steps.coverage.outputs.report }} + files: ${{ steps.coverage.outputs.report }} ## flags: IntegrationTests, UnitTests, ${{ steps.vars.outputs.CODECOV_FLAGS }} flags: ${{ steps.vars.outputs.CODECOV_FLAGS }} name: codecov-umbrella From 44c195c0b22f653f9f3d0ad89f789df06b35dbb5 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Tue, 4 Mar 2025 10:51:09 +0100 Subject: [PATCH 23/42] ci: make sure gpatch is actually being used for tests on MacOS --- .github/workflows/ci.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97434f0..8343add 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,9 @@ jobs: - uses: dtolnay/rust-toolchain@stable - name: install GNU patch on MacOS if: runner.os == 'macOS' - run: brew install gpatch + run: | + brew install gpatch + echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH" - name: set up PATH on Windows # Needed to use GNU's patch.exe instead of Strawberry Perl patch if: runner.os == 'Windows' @@ -111,7 +113,9 @@ jobs: - run: rustup component add llvm-tools-preview - name: install GNU patch on MacOS if: runner.os == 'macOS' - run: brew install gpatch + run: | + brew install gpatch + echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH" - name: set up PATH on Windows # Needed to use GNU's patch.exe instead of Strawberry Perl patch if: runner.os == 'Windows' From f9553984f40853418f4c13d96473553895c18620 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:09:06 +0000 Subject: [PATCH 24/42] fix(deps): update rust crate itoa to v1.0.15 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bebb053..502f487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" From bbdfa1b765ded59f20df3b64fea83872fe97c22b Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:29:12 +0000 Subject: [PATCH 25/42] fix(deps): update rust crate chrono to v0.4.40 --- Cargo.lock | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 502f487..85b6dd6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,16 +91,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets", + "windows-link", ] [[package]] @@ -525,6 +525,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" + [[package]] name = "windows-sys" version = "0.52.0" From ba1cac3c20fda9baa86a38d2f638f742b6295001 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:40:57 +0000 Subject: [PATCH 26/42] chore(deps): update rust crate predicates to v3.1.3 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85b6dd6..744c848 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -161,9 +161,9 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "float-cmp" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" dependencies = [ "num-traits", ] @@ -253,9 +253,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "predicates" -version = "3.1.2" +version = "3.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" dependencies = [ "anstyle", "difflib", From d573c3ae1d5ab2d7413b9f4f581ebf8ecc45f29b Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:51:49 +0000 Subject: [PATCH 27/42] chore(deps): update rust crate tempfile to v3.17.1 --- Cargo.lock | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 744c848..23f7a4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi", + "windows-targets", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -393,12 +405,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.14.0" +version = "3.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" dependencies = [ "cfg-if", "fastrand", + "getrandom", "once_cell", "rustix", "windows-sys 0.59.0", @@ -431,6 +444,15 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.92" @@ -613,6 +635,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] + [[package]] name = "yansi" version = "1.0.1" From b53d4f427cf7a1e4a54d4b790298d1d5c1d9a31c Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 6 Mar 2025 23:24:15 +0000 Subject: [PATCH 28/42] chore(deps): update rust crate tempfile to v3.18.0 --- Cargo.lock | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23f7a4a..205dcf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,12 +145,12 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -220,15 +220,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.162" +version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" +checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" [[package]] name = "log" @@ -352,15 +352,15 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "0.38.39" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +checksum = "17f8dcd64f141950290e45c99f7710ede1b600297c91818bb30b3667c0f45dc0" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -405,16 +405,16 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.17.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567" dependencies = [ "cfg-if", "fastrand", "getrandom", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -553,15 +553,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.59.0" From 26ee98dfaaa852d9c49e7337cf0f511591cd4353 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 14 Mar 2025 02:47:05 +0000 Subject: [PATCH 29/42] chore(deps): update rust crate tempfile to v3.19.0 --- Cargo.lock | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 205dcf0..24ee927 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,11 +405,10 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.18.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567" +checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600" dependencies = [ - "cfg-if", "fastrand", "getrandom", "once_cell", From 0d7e4d82aee107f6a29b0b2e744acceae81138d0 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 22:53:36 +0000 Subject: [PATCH 30/42] chore(deps): update rust crate tempfile to v3.19.1 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24ee927..38b7e90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600" +checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ "fastrand", "getrandom", From 87ccc8e4c2008e11caa9862c1c0708bf6187d6b0 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 16 Apr 2025 19:55:52 +0000 Subject: [PATCH 31/42] chore(deps): update rust crate assert_cmd to v2.0.17 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 38b7e90..29a8623 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,9 +34,9 @@ checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "assert_cmd" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" +checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" dependencies = [ "anstyle", "bstr", From 199c7f169c12e3d7715f847350c7e6c429eac918 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 15:45:04 +0000 Subject: [PATCH 32/42] fix(deps): update rust crate chrono to v0.4.41 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29a8623..2c5103a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.40" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", From b31df0b5e88d733d001843d0bb25e959ddad2962 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 11 May 2025 22:12:26 +0000 Subject: [PATCH 33/42] chore(deps): update rust crate tempfile to v3.20.0 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c5103a..6ff8fb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.19.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ "fastrand", "getrandom", From 8105420bb4b665763cca72ec0b9851906fc85ad1 Mon Sep 17 00:00:00 2001 From: Sami Daniel Date: Sat, 19 Apr 2025 01:01:51 -0300 Subject: [PATCH 34/42] Create the side-by-side option (-y) feature for the diff command (Incomplete). - Create the function, in the utils package, limited_string that allows you to truncate a string based on a delimiter (May break the encoding of the character where it was cut) - Create tests for limited_string function - Add support for -y and --side-by-side flags that enables diff output for side-by-side mode - Create implementation of the diff -y (SideBySide) command, base command for sdiff, using the crate diff as engine. Currently it does not fully represent GNU diff -y, some flags (|, (, ), , /) could not be developed due to the limitation of the engine we currently use (crate diff), which did not allow perform logic around it. Only the use of '<' and '>' were enabled. - Create tests for SideBySide implementation --- src/diff.rs | 3 +- src/lib.rs | 2 ++ src/main.rs | 1 + src/params.rs | 8 +++++ src/side_diff.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++ src/utils.rs | 72 ++++++++++++++++++++++++++++++++++++++-- 6 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 src/side_diff.rs diff --git a/src/diff.rs b/src/diff.rs index f769a29..bbb725d 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -5,7 +5,7 @@ use crate::params::{parse_params, Format}; use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, unified_diff}; +use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; use std::env::ArgsOs; use std::ffi::OsString; use std::fs; @@ -79,6 +79,7 @@ pub fn main(opts: Peekable) -> ExitCode { eprintln!("{error}"); exit(2); }), + Format::SideBySide => side_diff::diff(&from_content, &to_content), }; if params.brief && !result.is_empty() { println!( diff --git a/src/lib.rs b/src/lib.rs index a20ac56..342b01c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod side_diff; pub mod unified_diff; pub mod utils; @@ -11,4 +12,5 @@ pub mod utils; pub use context_diff::diff as context_diff; pub use ed_diff::diff as ed_diff; pub use normal_diff::diff as normal_diff; +pub use side_diff::diff as side_by_side_diff; pub use unified_diff::diff as unified_diff; diff --git a/src/main.rs b/src/main.rs index 8194d00..badaaa0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod side_diff; mod unified_diff; mod utils; diff --git a/src/params.rs b/src/params.rs index 9b3abc4..9f5c07d 100644 --- a/src/params.rs +++ b/src/params.rs @@ -11,6 +11,7 @@ pub enum Format { Unified, Context, Ed, + SideBySide, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -101,6 +102,13 @@ pub fn parse_params>(mut opts: Peekable) -> Resu format = Some(Format::Ed); continue; } + if param == "-y" || param == "--side-by-side" { + if format.is_some() && format != Some(Format::SideBySide) { + return Err("Conflicting output style option".to_string()); + } + format = Some(Format::SideBySide); + continue; + } if tabsize_re.is_match(param.to_string_lossy().as_ref()) { // Because param matches the regular expression, // it is safe to assume it is valid UTF-8. diff --git a/src/side_diff.rs b/src/side_diff.rs new file mode 100644 index 0000000..71bf4b7 --- /dev/null +++ b/src/side_diff.rs @@ -0,0 +1,86 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::limited_string; +use diff::Result; +use std::{ + io::{stdout, StdoutLock, Write}, + vec, +}; + +fn push_output( + output: &mut StdoutLock, + left_ln: &[u8], + right_ln: &[u8], + symbol: &[u8], + tab_size: usize, +) -> std::io::Result<()> { + // The reason why this function exists, is that we cannot + // assume a enconding for our left or right line, and the + // writeln!() macro obligattes us to do it. + + // side-by-side diff usually prints the output like: + // {left_line}{tab}{space_char}{symbol(|, < or >)}{space_char}{right_line}{EOL} + + // recalculate how many spaces are nescessary, cause we need to take into + // consideration the lenght of the word before print it. + let tab_size = (tab_size as isize - left_ln.len() as isize).max(0); + let ident = vec![b' '; tab_size as usize]; + output.write_all(left_ln)?; // {left_line} + output.write_all(&ident)?; // {tab} + output.write_all(b" ")?; // {space_char} + output.write_all(symbol)?; // {symbol} + output.write_all(b" ")?; // {space_char} + output.write_all(right_ln)?; // {right_line} + + writeln!(output)?; // {EOL} + + Ok(()) +} + +pub fn diff(from_file: &[u8], to_file: &[u8]) -> Vec { + // ^ The left file ^ The right file + + let mut output = stdout().lock(); + let left_lines: Vec<&[u8]> = from_file.split(|&c| c == b'\n').collect(); + let right_lines: Vec<&[u8]> = to_file.split(|&c| c == b'\n').collect(); + let tab_size = 61; // for some reason the tab spaces are 61 not 60 + for result in diff::slice(&left_lines, &right_lines) { + match result { + Result::Left(left_ln) => { + push_output( + &mut output, + limited_string(left_ln, tab_size), + &[], + b"<", + tab_size, + ) + .unwrap(); + } + Result::Right(right_ln) => { + push_output( + &mut output, + &[], + limited_string(right_ln, tab_size), + b">", + tab_size, + ) + .unwrap(); + } + Result::Both(left_ln, right_ln) => { + push_output( + &mut output, + limited_string(left_ln, tab_size), + limited_string(right_ln, tab_size), + b" ", + tab_size, + ) + .unwrap(); + } + } + } + + vec![] +} diff --git a/src/utils.rs b/src/utils.rs index 88b39ff..b0d0232 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,9 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::{ffi::OsString, io::Write}; - use regex::Regex; +use std::{ffi::OsString, io::Write}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -99,6 +98,15 @@ pub fn report_failure_to_read_input_file( ); } +/// Limits a string at a certain limiter position. This can break the +/// encoding of a specific char where it has been cut. +#[must_use] +pub fn limited_string(orig: &[u8], limiter: usize) -> &[u8] { + // TODO: Verify if we broke the encoding of the char + // when we cut it. + &orig[..orig.len().min(limiter)] +} + #[cfg(test)] mod tests { use super::*; @@ -205,4 +213,64 @@ mod tests { assert!(m_time > current_time); } } + + mod limited_string { + use super::*; + use std::str; + + #[test] + fn empty_orig_returns_empty() { + let orig: &[u8] = b""; + let result = limited_string(&orig, 10); + assert!(result.is_empty()); + } + + #[test] + fn zero_limit_returns_empty() { + let orig: &[u8] = b"foo"; + let result = limited_string(&orig, 0); + assert!(result.is_empty()); + } + + #[test] + fn limit_longer_than_orig_returns_full() { + let orig: &[u8] = b"foo"; + let result = limited_string(&orig, 10); + assert_eq!(result, orig); + } + + #[test] + fn ascii_limit_in_middle() { + let orig: &[u8] = b"foobar"; + let result = limited_string(&orig, 3); + assert_eq!(result, b"foo"); + assert!(str::from_utf8(&result).is_ok()); // All are ascii chars, we do not broke the enconding + } + + #[test] + fn utf8_multibyte_cut_invalidates() { + let orig = "áéíóú".as_bytes(); + let result = limited_string(&orig, 1); + // should contain only the first byte of mult-byte char + assert_eq!(result, vec![0xC3]); + assert!(str::from_utf8(&result).is_err()); + } + + #[test] + fn utf8_limit_at_codepoint_boundary() { + let orig = "áéí".as_bytes(); + let bytes = &orig; + let result = limited_string(&orig, bytes.len()); + + assert_eq!(result, *bytes); + assert!(str::from_utf8(&result).is_ok()); + } + + #[test] + fn works_with_byte_vec_input() { + let orig_bytes = b"hello".to_vec(); + let result = limited_string(&orig_bytes, 3); + assert_eq!(result, b"hel"); + } + } } From dff98a29695ddab3ff7e010e16484c0356bf8021 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 15:45:04 +0000 Subject: [PATCH 35/42] fix(deps): update rust crate chrono to v0.4.41 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29a8623..2c5103a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.40" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", From 1ef6923b7dac69edb3b6044d5842d21ccc78bed0 Mon Sep 17 00:00:00 2001 From: "Sami Daniel (Tsoi)" Date: Mon, 26 May 2025 08:26:10 -0300 Subject: [PATCH 36/42] Add side by side diff (partial) Create the diff -y utility, this time introducing tests and changes focused mainly on the construction of the utility and issues related to alignment and response tabulation. New parameters were introduced such as the size of the total width of the output in the parameters. A new calculation was introduced to determine the size of the output columns and the maximum total column size. The tab and spacing mechanism has the same behavior as the original diff, with tabs and spaces formatted in the same way. - Introducing tests for the diff 'main' function - Introducing fuzzing for side diff utility - Introducing tests for internal mechanisms - Modular functions that allow consistent changes across the entire project --- fuzz/Cargo.toml | 6 +- fuzz/fuzz_targets/fuzz_side.rs | 42 ++ src/diff.rs | 7 +- src/params.rs | 37 +- src/side_diff.rs | 1302 ++++++++++++++++++++++++++++++-- src/utils.rs | 69 -- 6 files changed, 1327 insertions(+), 136 deletions(-) create mode 100644 fuzz/fuzz_targets/fuzz_side.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 8b0b521..39efd70 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -47,4 +47,8 @@ path = "fuzz_targets/fuzz_ed.rs" test = false doc = false - +[[bin]] +name = "fuzz_side" +path = "fuzz_targets/fuzz_side.rs" +test = false +doc = false \ No newline at end of file diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs new file mode 100644 index 0000000..8a69c07 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -0,0 +1,42 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; + +use diffutilslib::side_diff; + +use std::fs::File; +use std::io::Write; +use diffutilslib::params::Params; + +fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { + let (original, new, /* width, tabsize, */ expand) = x; + + // if width == 0 || tabsize == 0 { + // return; + // } + + let params = Params { + // width, + // tabsize, + expand_tabs: expand, + ..Default::default() + }; + let mut output_buf = vec![]; + side_diff::diff(&original, &new, &mut output_buf, ¶ms); + File::create("target/fuzz.file.original") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.file.new") + .unwrap() + .write_all(&new) + .unwrap(); + File::create("target/fuzz.file") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.diff") + .unwrap() + .write_all(&output_buf) + .unwrap(); +}); \ No newline at end of file diff --git a/src/diff.rs b/src/diff.rs index bbb725d..f4c0614 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -9,7 +9,7 @@ use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; use std::env::ArgsOs; use std::ffi::OsString; use std::fs; -use std::io::{self, Read, Write}; +use std::io::{self, stdout, Read, Write}; use std::iter::Peekable; use std::process::{exit, ExitCode}; @@ -79,7 +79,10 @@ pub fn main(opts: Peekable) -> ExitCode { eprintln!("{error}"); exit(2); }), - Format::SideBySide => side_diff::diff(&from_content, &to_content), + Format::SideBySide => { + let mut output = stdout().lock(); + side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + } }; if params.brief && !result.is_empty() { println!( diff --git a/src/params.rs b/src/params.rs index 9f5c07d..c64b3fc 100644 --- a/src/params.rs +++ b/src/params.rs @@ -25,6 +25,7 @@ pub struct Params { pub brief: bool, pub expand_tabs: bool, pub tabsize: usize, + pub width: usize, } impl Default for Params { @@ -39,6 +40,7 @@ impl Default for Params { brief: false, expand_tabs: false, tabsize: 8, + width: 130, } } } @@ -58,6 +60,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu let mut format = None; let mut context = None; let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); + let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); while let Some(param) = opts.next() { let next_param = opts.peek(); if param == "--" { @@ -109,6 +112,27 @@ pub fn parse_params>(mut opts: Peekable) -> Resu format = Some(Format::SideBySide); continue; } + if width_re.is_match(param.to_string_lossy().as_ref()) { + let param = param.into_string().unwrap(); + let width_str: &str = width_re + .captures(param.as_str()) + .unwrap() + .name("long") + .unwrap() + .as_str(); + + params.width = match width_str.parse::() { + Ok(num) => { + if num == 0 { + return Err("invalid width «0»".to_string()); + } + + num + } + Err(_) => return Err(format!("invalid width «{width_str}»")), + }; + continue; + } if tabsize_re.is_match(param.to_string_lossy().as_ref()) { // Because param matches the regular expression, // it is safe to assume it is valid UTF-8. @@ -120,9 +144,16 @@ pub fn parse_params>(mut opts: Peekable) -> Resu .unwrap() .as_str(); params.tabsize = match tabsize_str.parse::() { - Ok(num) => num, + Ok(num) => { + if num == 0 { + return Err("invalid tabsize «0»".to_string()); + } + + num + } Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")), }; + continue; } match match_context_diff_params(¶m, next_param, format) { @@ -712,11 +743,11 @@ mod tests { executable: os("diff"), from: os("foo"), to: os("bar"), - tabsize: 0, + tabsize: 1, ..Default::default() }), parse_params( - [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] + [os("diff"), os("--tabsize=1"), os("foo"), os("bar")] .iter() .cloned() .peekable() diff --git a/src/side_diff.rs b/src/side_diff.rs index 71bf4b7..72673d4 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -3,84 +3,1264 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::utils::limited_string; +use core::cmp::{max, min}; use diff::Result; -use std::{ - io::{stdout, StdoutLock, Write}, - vec, -}; +use std::{io::Write, vec}; +use unicode_width::UnicodeWidthStr; -fn push_output( - output: &mut StdoutLock, +use crate::params::Params; + +const GUTTER_WIDTH_MIN: usize = 3; + +struct CharIter<'a> { + current: &'a [u8], +} + +struct Config { + sdiff_half_width: usize, + sdiff_column_two_offset: usize, + tab_size: usize, + expanded: bool, + separator_pos: usize, +} + +impl<'a> From<&'a [u8]> for CharIter<'a> { + fn from(value: &'a [u8]) -> Self { + CharIter { current: value } + } +} + +impl<'a> Iterator for CharIter<'a> { + // (bytes for the next char, visible width) + type Item = (&'a [u8], usize); + + fn next(&mut self) -> Option { + let max = self.current.len().min(4); + + // We reached the end. + if max == 0 { + return None; + } + + // Try to find the next utf-8 character, if present in the next 4 bytes. + let mut index = 1; + let mut view = &self.current[..index]; + let mut char = str::from_utf8(view); + while char.is_err() { + index += 1; + if index > max { + break; + } + view = &self.current[..index]; + char = str::from_utf8(view) + } + + match char { + Ok(c) => { + self.current = self + .current + .get(view.len()..) + .unwrap_or(&self.current[0..0]); + Some((view, UnicodeWidthStr::width(c))) + } + Err(_) => { + // We did not find an utf-8 char within the next 4 bytes, return the single byte. + self.current = &self.current[1..]; + Some((&view[..1], 1)) + } + } + } +} + +impl Config { + pub fn new(full_width: usize, tab_size: usize, expanded: bool) -> Self { + // diff uses this calculation to calculate the size of a half line + // based on the options passed (like -w, -t, etc.). It's actually + // pretty useless, because we (actually) don't have any size modifiers + // that can change this, however I just want to leave the calculate + // here, since it's not very clear and may cause some confusion + + let w = full_width as isize; + let t = tab_size as isize; + let t_plus_g = t + GUTTER_WIDTH_MIN as isize; + let unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1); + let off = unaligned_off - unaligned_off % t; + let hw = max(0, min(off - GUTTER_WIDTH_MIN as isize, w - off)) as usize; + let c2o = if hw != 0 { off as usize } else { w as usize }; + + Self { + expanded, + sdiff_column_two_offset: c2o, + tab_size, + sdiff_half_width: hw, + separator_pos: ((hw + c2o - 1) >> 1), + } + } +} + +fn format_tabs_and_spaces( + from: usize, + to: usize, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + let expanded = config.expanded; + let tab_size = config.tab_size; + let mut current = from; + + if current > to { + return Ok(()); + } + + if expanded { + while current < to { + buf.write_all(b" ")?; + current += 1; + } + return Ok(()); + } + + while current + (tab_size - current % tab_size) <= to { + let next_tab = current + (tab_size - current % tab_size); + buf.write_all(b"\t")?; + current = next_tab; + } + + while current < to { + buf.write_all(b" ")?; + current += 1; + } + + Ok(()) +} + +fn process_half_line( + s: &[u8], + max_width: usize, + is_right: bool, + white_space_gutter: bool, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + if s.is_empty() { + if !is_right { + format_tabs_and_spaces( + 0, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + return Ok(()); + } + + if max_width > config.sdiff_half_width { + return Ok(()); + } + + if max_width > config.sdiff_column_two_offset && !is_right { + return Ok(()); + } + + let expanded = config.expanded; + let tab_size = config.tab_size; + let sdiff_column_two_offset = config.sdiff_column_two_offset; + let mut current_width = 0; + let iter = CharIter::from(s); + + // the encoding will probably be compatible with utf8, so we can take advantage + // of that to get the size of the columns and iterate without breaking the encoding of anything. + // It seems like a good trade, since there is still a fallback in case it is not utf8. + // But I think it would be better if we used some lib that would allow us to handle this + // in the best way possible, in order to avoid overhead (currently 2 for loops are needed). + // There is a library called mcel (mcel.h) that is used in GNU diff, but the documentation + // about it is very scarce, nor is its use documented on the internet. In fact, from my + // research I didn't even find any information about it in the GNU lib's own documentation. + + for c in iter { + let (char, c_width) = c; + + if current_width + c_width > max_width { + break; + } + + match char { + b"\t" => { + if expanded && (current_width + tab_size - (current_width % tab_size)) <= max_width + { + let mut spaces = tab_size - (current_width % tab_size); + while spaces > 0 { + buf.write_all(b" ")?; + current_width += 1; + spaces -= 1; + } + } else if current_width + tab_size - (current_width % tab_size) <= max_width { + buf.write_all(b"\t")?; + current_width += tab_size - (current_width % tab_size); + } + } + b"\n" => { + break; + } + b"\r" => { + buf.write_all(b"\r")?; + format_tabs_and_spaces(0, sdiff_column_two_offset, config, buf)?; + current_width = 0; + } + b"\0" | b"\x07" | b"\x0C" | b"\x0B" => { + buf.write_all(char)?; + } + _ => { + buf.write_all(char)?; + current_width += c_width; + } + } + } + + // gnu sdiff do not tabulate the hole empty right line, instead, just keep the line empty + if !is_right { + // we always sum + 1 or + GUTTER_WIDTH_MIN cause we want to expand + // up to the third column of the gutter column if the gutter is gutter white space, + // otherwise we can expand to only the first column of the gutter middle column, cause + // the next is the sep char + format_tabs_and_spaces( + current_width, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + Ok(()) +} + +fn push_output( left_ln: &[u8], right_ln: &[u8], - symbol: &[u8], - tab_size: usize, + symbol: u8, + output: &mut T, + config: &Config, ) -> std::io::Result<()> { - // The reason why this function exists, is that we cannot - // assume a enconding for our left or right line, and the - // writeln!() macro obligattes us to do it. - - // side-by-side diff usually prints the output like: - // {left_line}{tab}{space_char}{symbol(|, < or >)}{space_char}{right_line}{EOL} - - // recalculate how many spaces are nescessary, cause we need to take into - // consideration the lenght of the word before print it. - let tab_size = (tab_size as isize - left_ln.len() as isize).max(0); - let ident = vec![b' '; tab_size as usize]; - output.write_all(left_ln)?; // {left_line} - output.write_all(&ident)?; // {tab} - output.write_all(b" ")?; // {space_char} - output.write_all(symbol)?; // {symbol} - output.write_all(b" ")?; // {space_char} - output.write_all(right_ln)?; // {right_line} - - writeln!(output)?; // {EOL} + if left_ln.is_empty() && right_ln.is_empty() { + writeln!(output)?; + return Ok(()); + } + + let white_space_gutter = symbol == b' '; + let half_width = config.sdiff_half_width; + let column_two_offset = config.sdiff_column_two_offset; + let separator_pos = config.separator_pos; + let put_new_line = true; // should be false when | is allowed + + // this involves a lot of the '|' mark, however, as it is not active, + // it is better to deactivate it as it introduces visual bug if + // the line is empty. + // if !left_ln.is_empty() { + // put_new_line = put_new_line || (left_ln.last() == Some(&b'\n')); + // } + // if !right_ln.is_empty() { + // put_new_line = put_new_line || (right_ln.last() == Some(&b'\n')); + // } + + process_half_line( + left_ln, + half_width, + false, + white_space_gutter, + config, + output, + )?; + if symbol != b' ' { + // the diff always want to put all tabs possible in the usable are, + // even in the middle space between the gutters if possible. + + output.write_all(&[symbol])?; + if !right_ln.is_empty() { + format_tabs_and_spaces(separator_pos + 1, column_two_offset, config, output)?; + } + } + process_half_line( + right_ln, + half_width, + true, + white_space_gutter, + config, + output, + )?; + + if put_new_line { + writeln!(output)?; + } Ok(()) } -pub fn diff(from_file: &[u8], to_file: &[u8]) -> Vec { +pub fn diff( + from_file: &[u8], + to_file: &[u8], + output: &mut T, + params: &Params, +) -> Vec { // ^ The left file ^ The right file - let mut output = stdout().lock(); - let left_lines: Vec<&[u8]> = from_file.split(|&c| c == b'\n').collect(); - let right_lines: Vec<&[u8]> = to_file.split(|&c| c == b'\n').collect(); - let tab_size = 61; // for some reason the tab spaces are 61 not 60 + let mut left_lines: Vec<&[u8]> = from_file.split_inclusive(|&c| c == b'\n').collect(); + let mut right_lines: Vec<&[u8]> = to_file.split_inclusive(|&c| c == b'\n').collect(); + let config = Config::new(params.width, params.tabsize, params.expand_tabs); + + if left_lines.last() == Some(&&b""[..]) { + left_lines.pop(); + } + + if right_lines.last() == Some(&&b""[..]) { + right_lines.pop(); + } + + /* + DISCLAIMER: + Currently the diff engine does not produce results like the diff engine used in GNU diff, + so some results may be inaccurate. For example, the line difference marker "|", according + to the GNU documentation, appears when the same lines (only the actual line, although the + relative line may change the result, so occasionally '|' markers appear with the same lines) + are different but exist in both files. In the current solution the same result cannot be + obtained because the diff engine does not return Both if both exist but are different, + but instead returns a Left and a Right for each one, implying that two lines were added + and deleted. Furthermore, the GNU diff program apparently stores some internal state + (this internal state is just a note about how the diff engine works) about the lines. + For example, an added or removed line directly counts in the line query of the original + lines to be printed in the output. Because of this imbalance caused by additions and + deletions, the characters ( and ) are introduced. They basically represent lines without + context, which have lost their pair in the other file due to additions or deletions. Anyway, + my goal with this disclaimer is to warn that for some reason, whether it's the diff engine's + inability to determine and predict/precalculate the result of GNU's sdiff, with this software it's + not possible to reproduce results that are 100% faithful to GNU's, however, the basic premise + e of side diff of showing added and removed lines and creating edit scripts is totally possible. + More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of + the goals of this project : ) + */ for result in diff::slice(&left_lines, &right_lines) { match result { - Result::Left(left_ln) => { - push_output( - &mut output, - limited_string(left_ln, tab_size), - &[], - b"<", - tab_size, - ) - .unwrap(); - } - Result::Right(right_ln) => { - push_output( - &mut output, - &[], - limited_string(right_ln, tab_size), - b">", - tab_size, - ) - .unwrap(); - } + Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), + Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), Result::Both(left_ln, right_ln) => { - push_output( - &mut output, - limited_string(left_ln, tab_size), - limited_string(right_ln, tab_size), - b" ", - tab_size, - ) - .unwrap(); + push_output(left_ln, right_ln, b' ', output, &config).unwrap() } } } vec![] } + +#[cfg(test)] +mod tests { + const DEF_TAB_SIZE: usize = 4; + + use super::*; + + mod format_tabs_and_spaces { + use super::*; + + const CONFIG_E_T: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: true, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + const CONFIG_E_F: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: false, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + #[test] + fn test_format_tabs_and_spaces_expanded_false() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_format_tabs_and_spaces_expanded_true() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 5]); + } + + #[test] + fn test_format_tabs_and_spaces_from_greater_than_to() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert!(buf.is_empty()); + } + + #[test] + fn test_format_from_non_zero_position() { + let mut buf = vec![]; + format_tabs_and_spaces(2, 7, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_multiple_full_tabs_needed() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 12, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b'\t']); + } + + #[test] + fn test_uneven_tab_boundary_with_spaces() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 10, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ']); + } + + #[test] + fn test_expanded_true_with_offset() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 9, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 6]); + } + + #[test] + fn test_exact_tab_boundary_from_midpoint() { + let mut buf = vec![]; + format_tabs_and_spaces(4, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_mixed_tabs_and_spaces_edge_case() { + let mut buf = vec![]; + format_tabs_and_spaces(5, 9, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_minimal_gap_with_tab() { + let mut buf = vec![]; + format_tabs_and_spaces(7, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_expanded_false_with_tab_at_end() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + } + + mod process_half_line { + use super::*; + + fn create_test_config(expanded: bool, tab_size: usize) -> Config { + Config { + sdiff_half_width: 30, + sdiff_column_two_offset: 60, + tab_size, + expanded, + separator_pos: 15, + } + } + + #[test] + fn test_empty_line_left_expanded_false() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_tabs_unexpanded() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\tabc", 8, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'a', b'b', b'c', b'\t', b' ']); + } + + #[test] + fn test_utf8_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "😉😉😉".as_bytes(); + process_half_line(s, 3, false, false, &config, &mut buf).unwrap(); + let mut r = vec![]; + r.write_all("😉\t".as_bytes()).unwrap(); + assert_eq!(buf, r) + } + + #[test] + fn test_newline_handling() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abc\ndef", 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'a', b'b', b'c', b'\t', b' ', b' ']); + } + + #[test] + fn test_carriage_return() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\rxyz", 5, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'x', b'y', b'z']); + assert_eq!(buf, r); + } + + #[test] + fn test_exact_width_fit() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abcd", 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, b"abcd ".to_vec()); + } + + #[test] + fn test_non_utf8_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + // ISO-8859-1 + process_half_line( + &[0x63, 0x61, 0x66, 0xE9], + 5, + false, + false, + &config, + &mut buf, + ) + .unwrap(); + assert_eq!(&buf, &[0x63, 0x61, 0x66, 0xE9, b' ', b' ']); + assert!(String::from_utf8(buf).is_err()); + } + + #[test] + fn test_non_utf8_bytes_ignore_padding_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let utf32le_bytes = [ + 0x63, 0x00, 0x00, 0x00, // 'c' + 0x61, 0x00, 0x00, 0x00, // 'a' + 0x66, 0x00, 0x00, 0x00, // 'f' + 0xE9, 0x00, 0x00, 0x00, // 'é' + ]; + // utf8 little endiand 32 bits (or 4 bytes per char) + process_half_line(&utf32le_bytes, 6, false, false, &config, &mut buf).unwrap(); + let mut r = utf32le_bytes.to_vec(); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_non_utf8_non_preserve_ascii_bytes_cut() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding + // ^ é char, start multi byte + process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter + } + + #[test] + fn test_right_line_padding() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"xyz", 5, true, true, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 3); + } + + #[test] + fn test_mixed_tabs_spaces() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\t \t", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_overflow_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "日本語".as_bytes(); + process_half_line(s, 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "日本 ".as_bytes()); + } + + #[test] + fn test_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 3, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t "); + } + + #[test] + fn test_expanded_true() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_expanded_true_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_width0_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\0\x0B\x07\x0C"; + process_half_line(s, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\0\x0B\x07\x0C\t ") + } + + #[test] + fn test_left_empty_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b""; + process_half_line(s, 9, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\t\t\t"); + } + + #[test] + fn test_s_size_eq_max_width_p1() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abcdefghij"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abcdefghij "); + } + + #[test] + fn test_mixed_tabs_and_spaces_inversion() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" \t \t "); + } + + #[test] + fn test_expanded_with_tabs() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_expanded_with_tabs_and_space_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_zero_width_unicode_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "\u{200B}".as_bytes(); + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "\u{200B}\t\t ".as_bytes()); + } + + #[test] + fn test_multiple_carriage_returns() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'\t'; 2]); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_multiple_carriage_returns_is_right_true() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + assert_eq!(buf, r); + } + + #[test] + fn test_mixed_invalid_utf8_with_valid() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\xFF\xFEdef"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert!(String::from_utf8(s.to_vec()).is_err()); + assert_eq!(buf, b"abc\xFF\xFEdef "); + } + + #[test] + fn test_max_width_zero() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"foo bar"; + process_half_line(s, 0, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b' ']); + } + + #[test] + fn test_line_only_with_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']) + } + + #[test] + fn test_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 12, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" ".repeat(13)); + } + + #[test] + fn test_mixed_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc "); + } + + #[test] + fn test_mixed_tabs_with_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc\t "); + } + + #[test] + fn test_mixed_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_mixed_tabs_expanded_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_break_if_invalid_max_width() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 61, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b""); + assert_eq!(buf.len(), 0); + } + + #[test] + fn test_new_line() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t\t "); + } + } + + mod push_output { + // almost all behavior of the push_output was tested with tests on process_half_line + + use super::*; + + impl Default for Config { + fn default() -> Self { + Config::new(130, 8, false) + } + } + + fn create_test_config_def() -> Config { + Config::default() + } + + #[test] + fn test_left_empty_right_not_added() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b"bar"; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\t\t\t\t\t\t\t >\tbar\n"); + } + + #[test] + fn test_right_empty_left_not_del() { + let config = create_test_config_def(); + let left_ln = b"bar"; + let right_ln = b""; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"bar\t\t\t\t\t\t\t >\n"); + } + + #[test] + fn test_both_empty() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b""; + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\n"); + } + + #[test] + fn test_output_cut_with_maximization() { + let config = create_test_config_def(); + let left_ln = b"a".repeat(62); + let right_ln = b"a".repeat(62); + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf.len(), 61 * 2 + 2); + assert_eq!(&buf[0..61], vec![b'a'; 61]); + assert_eq!(&buf[61..62], b"\t"); + let mut end = b"a".repeat(61); + end.push(b'\n'); + assert_eq!(&buf[62..], end); + } + + #[test] + fn test_both_lines_non_empty_with_space_symbol_max_tabs() { + let config = create_test_config_def(); + let left_ln = b"left"; + let right_ln = b"right"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "left\t\t\t\t\t\t\t\t"; + let expected_right = "right"; + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); + } + + #[test] + fn test_non_space_symbol_with_padding() { + let config = create_test_config_def(); + let left_ln = b"data"; + let right_ln = b""; + let symbol = b'<'; // impossible case, just to use different symbol + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, format!("data\t\t\t\t\t\t\t <\n").as_bytes()); + } + + #[test] + fn test_lines_exceeding_half_width() { + let config = create_test_config_def(); + let left_ln = vec![b'a'; 100]; + let left_ln = left_ln.as_slice(); + let right_ln = vec![b'b'; 100]; + let right_ln = right_ln.as_slice(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "a".repeat(61); + let expected_right = "b".repeat(61); + assert_eq!(buf.len(), 61 + 1 + 61 + 1); + assert_eq!(&buf[0..61], expected_left.as_bytes()); + assert_eq!(buf[61], b'\t'); + assert_eq!(&buf[62..123], expected_right.as_bytes()); + assert_eq!(&buf[123..], b"\n"); + } + + #[test] + fn test_tabs_in_lines_expanded() { + let mut config = create_test_config_def(); + config.expanded = true; + let left_ln = b"\tleft"; + let right_ln = b"\tright"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = " left".to_string() + &" ".repeat(61 - 12); + let expected_right = " right"; + assert_eq!( + buf, + format!("{}{}{}\n", expected_left, " ", expected_right).as_bytes() + ); + } + + #[test] + fn test_unicode_characters() { + let config = create_test_config_def(); + let left_ln = "áéíóú".as_bytes(); + let right_ln = "😀😃😄".as_bytes(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = format!("áéíóú\t\t\t\t\t\t\t\t"); + let expected_right = "😀😃😄"; + assert_eq!( + buf, + format!("{}{}\n", expected_left, expected_right).as_bytes() + ); + } + } + + mod diff { + /* + Probably this hole section should be refactored when complete sdiff + arrives. I would say that these tests are more to document the + behavior of the engine than to actually test whether it is right, + because it is right, but right up to its limitations. + */ + + use super::*; + + fn generate_params() -> Params { + Params { + tabsize: 8, + expand_tabs: false, + width: 130, + ..Default::default() + } + } + + fn contains_string(vec: &Vec, s: &str) -> usize { + let pattern = s.as_bytes(); + vec.windows(pattern.len()).filter(|s| s == &pattern).count() + } + + fn calc_lines(input: &Vec) -> usize { + let mut lines_counter = 0; + + for c in input { + if c == &b'\n' { + lines_counter += 1; + } + } + + lines_counter + } + + #[test] + fn test_equal_lines() { + let params = generate_params(); + let from_file = b"equal"; + let to_file = b"equal"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 1); + assert!(!output.contains(&b'<')); + assert!(!output.contains(&b'>')); + assert_eq!(contains_string(&output, "equal"), 2) + } + + #[test] + fn test_different_lines() { + let params = generate_params(); + let from_file = b"eq"; + let to_file = b"ne"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 2); + assert!(output.contains(&b'>')); + assert!(output.contains(&b'<')); + assert_eq!(contains_string(&output, "eq"), 1); + assert_eq!(contains_string(&output, "ne"), 1); + } + + #[test] + fn test_added_line() { + let params = generate_params(); + let from_file = b""; + let to_file = b"new line"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, ">"), 1); + assert_eq!(contains_string(&output, "new line"), 1); + } + + #[test] + fn test_removed_line() { + let params = generate_params(); + let from_file = b"old line"; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, "old line"), 1); + } + + #[test] + fn test_multiple_changes() { + let params = generate_params(); + let from_file = b"line1\nline2\nline3"; + let to_file = b"line1\nmodified\nline4"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 5); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_unicode_and_special_chars() { + let params = generate_params(); + let from_file = "á\t€".as_bytes(); + let to_file = "€\t😊".as_bytes(); + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(String::from_utf8_lossy(&output).contains("á")); + assert!(String::from_utf8_lossy(&output).contains("€")); + assert!(String::from_utf8_lossy(&output).contains("😊")); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_mixed_whitespace() { + let params = generate_params(); + let from_file = b" \tspaces"; + let to_file = b"\t\t tabs"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(output.contains(&b'<')); + assert!(output.contains(&b'>')); + assert!(String::from_utf8_lossy(&output).contains("spaces")); + assert!(String::from_utf8_lossy(&output).contains("tabs")); + } + + #[test] + fn test_empty_files() { + let params = generate_params(); + let from_file = b""; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(output, vec![]); + } + + #[test] + fn test_partially_matching_lines() { + let params = generate_params(); + let from_file = b"match\nchange"; + let to_file = b"match\nupdated"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 3); + assert_eq!(contains_string(&output, "match"), 2); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_interleaved_add_remove() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD"; + let to_file = b"B\nX\nD\nY"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "A"), 1); + assert_eq!(contains_string(&output, "X"), 1); + assert_eq!(contains_string(&output, "Y"), 1); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_swapped_lines() { + let params = generate_params(); + let from_file = b"1\n2\n3\n4"; + let to_file = b"4\n3\n2\n1"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_gap_between_changes() { + let params = generate_params(); + let from_file = b"Start\nKeep1\nRemove\nKeep2\nEnd"; + let to_file = b"Start\nNew1\nKeep1\nKeep2\nNew2\nEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "Remove"), 1); + assert_eq!(contains_string(&output, "New1"), 1); + assert_eq!(contains_string(&output, "New2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_mixed_operations_complex() { + let params = generate_params(); + let from_file = b"Same\nOld1\nSameMid\nOld2\nSameEnd"; + let to_file = b"Same\nNew1\nSameMid\nNew2\nNew3\nSameEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_insert_remove_middle() { + let params = generate_params(); + let from_file = b"Header\nContent1\nFooter"; + let to_file = b"Header\nContent2\nFooter"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 4); + assert_eq!(contains_string(&output, "Content1"), 1); + assert_eq!(contains_string(&output, "Content2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_multiple_adjacent_changes() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD\nE"; + let to_file = b"A\nX\nY\nD\nZ"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + } + + mod config { + use super::*; + + fn create_config(full_width: usize, tab_size: usize, expanded: bool) -> Config { + Config::new(full_width, tab_size, expanded) + } + + #[test] + fn test_full_width_80_tab_4() { + let config = create_config(80, 4, false); + assert_eq!(config.sdiff_half_width, 37); + assert_eq!(config.sdiff_column_two_offset, 40); + assert_eq!(config.separator_pos, 38); + } + + #[test] + fn test_full_width_40_tab_8() { + let config = create_config(40, 8, true); + assert_eq!(config.sdiff_half_width, 16); + assert_eq!(config.sdiff_column_two_offset, 24); + assert_eq!(config.separator_pos, 19); // (16 +24 -1) /2 = 19.5 + } + + #[test] + fn test_full_width_30_tab_2() { + let config = create_config(30, 2, false); + assert_eq!(config.sdiff_half_width, 13); + assert_eq!(config.sdiff_column_two_offset, 16); + assert_eq!(config.separator_pos, 14); + } + + #[test] + fn test_small_width_10_tab_4() { + let config = create_config(10, 4, false); + assert_eq!(config.sdiff_half_width, 2); + assert_eq!(config.sdiff_column_two_offset, 8); + assert_eq!(config.separator_pos, 4); + } + + #[test] + fn test_minimal_width_3_tab_4() { + let config = create_config(3, 4, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 3); + assert_eq!(config.separator_pos, 1); + } + + #[test] + fn test_odd_width_7_tab_3() { + let config = create_config(7, 3, false); + assert_eq!(config.sdiff_half_width, 1); + assert_eq!(config.sdiff_column_two_offset, 6); + assert_eq!(config.separator_pos, 3); + } + + #[test] + fn test_tab_size_larger_than_width() { + let config = create_config(5, 10, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 5); + assert_eq!(config.separator_pos, 2); + } + } +} diff --git a/src/utils.rs b/src/utils.rs index b0d0232..daca18d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -98,15 +98,6 @@ pub fn report_failure_to_read_input_file( ); } -/// Limits a string at a certain limiter position. This can break the -/// encoding of a specific char where it has been cut. -#[must_use] -pub fn limited_string(orig: &[u8], limiter: usize) -> &[u8] { - // TODO: Verify if we broke the encoding of the char - // when we cut it. - &orig[..orig.len().min(limiter)] -} - #[cfg(test)] mod tests { use super::*; @@ -213,64 +204,4 @@ mod tests { assert!(m_time > current_time); } } - - mod limited_string { - use super::*; - use std::str; - - #[test] - fn empty_orig_returns_empty() { - let orig: &[u8] = b""; - let result = limited_string(&orig, 10); - assert!(result.is_empty()); - } - - #[test] - fn zero_limit_returns_empty() { - let orig: &[u8] = b"foo"; - let result = limited_string(&orig, 0); - assert!(result.is_empty()); - } - - #[test] - fn limit_longer_than_orig_returns_full() { - let orig: &[u8] = b"foo"; - let result = limited_string(&orig, 10); - assert_eq!(result, orig); - } - - #[test] - fn ascii_limit_in_middle() { - let orig: &[u8] = b"foobar"; - let result = limited_string(&orig, 3); - assert_eq!(result, b"foo"); - assert!(str::from_utf8(&result).is_ok()); // All are ascii chars, we do not broke the enconding - } - - #[test] - fn utf8_multibyte_cut_invalidates() { - let orig = "áéíóú".as_bytes(); - let result = limited_string(&orig, 1); - // should contain only the first byte of mult-byte char - assert_eq!(result, vec![0xC3]); - assert!(str::from_utf8(&result).is_err()); - } - - #[test] - fn utf8_limit_at_codepoint_boundary() { - let orig = "áéí".as_bytes(); - let bytes = &orig; - let result = limited_string(&orig, bytes.len()); - - assert_eq!(result, *bytes); - assert!(str::from_utf8(&result).is_ok()); - } - - #[test] - fn works_with_byte_vec_input() { - let orig_bytes = b"hello".to_vec(); - let result = limited_string(&orig_bytes, 3); - assert_eq!(result, b"hel"); - } - } } From a3e57c950ef149e7cec1bd08e93b27f8ea013859 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 11 May 2025 22:12:26 +0000 Subject: [PATCH 37/42] chore(deps): update rust crate tempfile to v3.20.0 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c5103a..6ff8fb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.19.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ "fastrand", "getrandom", From 45b3072534bbe7e48042986a2af964917edf26a2 Mon Sep 17 00:00:00 2001 From: "Sami Daniel (Tsoi)" Date: Mon, 26 May 2025 08:45:43 -0300 Subject: [PATCH 38/42] Configure CI fuzzer for fuzz_side Configuring CI to run fuzz from fuzz_side --- .github/workflows/fuzzing.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 9ad1c17..8346e49 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -46,6 +46,7 @@ jobs: - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } + - { name: fuzz_side, should_pass: true } steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly From c7d4140fa30c3c6ade716d0e6620248ce0d6dc4a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 9 Jun 2025 23:03:42 +0000 Subject: [PATCH 39/42] fix(deps): update rust crate unicode-width to v0.2.1 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6ff8fb5..e437e99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -430,9 +430,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" [[package]] name = "wait-timeout" From 8261d790f4b7ae6298d6ff43a24395f6d5f66b92 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Jun 2025 10:45:40 +0200 Subject: [PATCH 40/42] clippy: fix warnings from uninlined_format_args --- src/cmp.rs | 16 ++++++---------- src/main.rs | 2 +- src/params.rs | 2 +- src/side_diff.rs | 5 +---- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/cmp.rs b/src/cmp.rs index c0fc397..876c3ca 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -35,7 +35,7 @@ pub struct Params { #[inline] fn usage_string(executable: &str) -> String { - format!("Usage: {} ", executable) + format!("Usage: {executable} ") } #[cfg(not(target_os = "windows"))] @@ -75,8 +75,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, Err(_) => { return Err(format!( - "{}: invalid --ignore-initial value '{}'", - executable_str, skip_desc + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" )) } }; @@ -103,8 +102,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, _ => { return Err(format!( - "{}: invalid --ignore-initial value '{}'", - executable_str, skip_desc + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" )); } }; @@ -170,8 +168,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, Err(_) => { return Err(format!( - "{}: invalid --bytes value '{}'", - executable_str, max_bytes + "{executable_str}: invalid --bytes value '{max_bytes}'" )) } }; @@ -210,7 +207,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu std::process::exit(0); } if param_str.starts_with('-') { - return Err(format!("Unknown option: {:?}", param)); + return Err(format!("Unknown option: {param:?}")); } if from.is_none() { from = Some(param); @@ -236,8 +233,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu if params.quiet && params.verbose { return Err(format!( - "{}: options -l and -s are incompatible", - executable_str + "{executable_str}: options -l and -s are incompatible" )); } diff --git a/src/main.rs b/src/main.rs index badaaa0..b7c2712 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,7 +73,7 @@ fn main() -> ExitCode { Some("diff") => diff::main(args), Some("cmp") => cmp::main(args), Some(name) => { - eprintln!("{}: utility not supported", name); + eprintln!("{name}: utility not supported"); ExitCode::from(2) } None => second_arg_error(exe_name), diff --git a/src/params.rs b/src/params.rs index c64b3fc..b8483b7 100644 --- a/src/params.rs +++ b/src/params.rs @@ -195,7 +195,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(error) => return Err(error), } if param.to_string_lossy().starts_with('-') { - return Err(format!("Unknown option: {:?}", param)); + return Err(format!("Unknown option: {param:?}")); } if from.is_none() { from = Some(param); diff --git a/src/side_diff.rs b/src/side_diff.rs index 72673d4..bb22002 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -950,10 +950,7 @@ mod tests { push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); let expected_left = format!("áéíóú\t\t\t\t\t\t\t\t"); let expected_right = "😀😃😄"; - assert_eq!( - buf, - format!("{}{}\n", expected_left, expected_right).as_bytes() - ); + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); } } From 03fe6140873498ac24d4ddaca03813a9a2592973 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Jun 2025 10:50:06 +0200 Subject: [PATCH 41/42] clippy: fix warnings from useless_format lint --- src/side_diff.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/side_diff.rs b/src/side_diff.rs index bb22002..45bd325 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -901,7 +901,7 @@ mod tests { let symbol = b'<'; // impossible case, just to use different symbol let mut buf = vec![]; push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); - assert_eq!(buf, format!("data\t\t\t\t\t\t\t <\n").as_bytes()); + assert_eq!(buf, "data\t\t\t\t\t\t\t <\n".as_bytes()); } #[test] @@ -948,7 +948,7 @@ mod tests { let symbol = b' '; let mut buf = vec![]; push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); - let expected_left = format!("áéíóú\t\t\t\t\t\t\t\t"); + let expected_left = "áéíóú\t\t\t\t\t\t\t\t"; let expected_right = "😀😃😄"; assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); } From 7df02399ba5af0e2b944112a224d8e2d69462006 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Jun 2025 10:52:37 +0200 Subject: [PATCH 42/42] clippy: fix warning from ptr_arg lint --- src/side_diff.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/side_diff.rs b/src/side_diff.rs index 45bd325..56953d2 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -973,7 +973,7 @@ mod tests { } } - fn contains_string(vec: &Vec, s: &str) -> usize { + fn contains_string(vec: &[u8], s: &str) -> usize { let pattern = s.as_bytes(); vec.windows(pattern.len()).filter(|s| s == &pattern).count() }