From c0dad2ef161eebb8db1fca975affe104ff71b844 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 7 Jul 2023 14:37:08 +0200 Subject: [PATCH 001/429] add nushell dependency analysis script --- util/deps.nu | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 util/deps.nu diff --git a/util/deps.nu b/util/deps.nu new file mode 100644 index 00000000000..a5bd94c8f51 --- /dev/null +++ b/util/deps.nu @@ -0,0 +1,155 @@ +# This is a script to analyze the dependencies of this project. +# It is a replacement of / complement to +# +# - cargo tree (used by this script) +# - cargo deps +# - cargo deny +# +# The idea is that by calling all_dep_info, you get a table of all dependencies +# in Cargo.lock, with a few additional columns based on some other tools. +# Currently, these tools are +# +# - cargo tree +# - the crates.io API +# +# The most useful columns in the table are: +# +# - `name`: the name of the crate. +# - `version`: the version of the crate. +# - `num_versions`: the number of versions in Cargo.lock. +# - `normal_dep`: whether the crate is a normal dependency. +# - `build_dep`: whether the crate is a build dependency. +# - `dev_dep`: whether the crate is a dev dependency. +# - `organisation`: the GitHub/GitLab organisation or user of the repository of the crate. +# - `repository_name`: the name of the repository the crate is in. The format is "{owner}/{repo}". +# - `dependencies`: direct dependencies of the crate (in the format of Cargo.lock). +# +# To use this script, start nushell (tested only on version 0.82.0), import the library and +# call `all_dep_info`: +# +# ``` +# > nu +# > use util/deps.nu +# > let dep = (deps all_dep_info) +# ``` +# +# Then you can perform analysis. For example, to group the dependencies by organisation: +# +# ``` +# > $dep | group-by organisation +# ``` +# +# Or to find all crates with multiple versions (like cargo deny): +# ``` +# > $dep | where versions > 1 +# ``` +# +# Ideas to expand this: +# +# - Figure out the whole dependency graph +# - Figure out which platforms and which features enable which crates +# - Figure out which utils require which crates +# - Count the number of crates on different platforms +# - Add license information +# - Add functions to perform common analyses +# - Add info from cargo bloat +# - Add MSRV info +# - Add up-to-date info (the necessary info is there, it just needs to be derived) +# - Check the number of owners/contributors +# - Make a webpage to more easily explore the data + +# Read the packages a Cargo.lock file +def read_lockfile [name: path] { + open $name | from toml | get package +} + +# Read the names output by cargo tree +export def read_tree_names [edges: string, features: string] { + cargo tree -e $edges --features $features + | rg "[a-zA-Z0-9_-]+ v[0-9.]+" -o + | lines + | each {|x| parse_name_and_version $x } +} + +def parse_name_and_version [s: string] { + let s = ($s | split row " ") + + let name = $s.0 + let version = if ($s | length) > 1 { + $s.1 | str substring 1.. + } else { + "" + } + + {name: $name, version: $version} +} + +# Read the crates.io info for a list of crates names +def read_crates_io [names: list] { + let total = ($names | length) + $names | enumerate | par-each {|el| + let key = $el.index + let name = $el.item + print $"($key)/($total): ($name)" + http get $"https://crates.io/api/v1/crates/($name)" | get crate + } +} + +def in_table [col_name, table] { + insert $col_name {|el| + $table + | any {|table_el| + $table_el.name == $el.name and $table_el.version == $el.version } + } +} + +# Add column for a dependency type +def add_dep_type [dep_type: string, features: string] { + in_table $"($dep_type)_dep" (read_tree_names $dep_type $features) +} + +export def all_dep_info [] { + let features = unix,feat_selinux + + let lock = (read_lockfile Cargo.lock) + + $lock + # Add number of versions + | join ($lock | group-by name | transpose | update column1 { length } | rename name num_versions) name + # Add dependency types + | add_dep_type normal $features + | add_dep_type build $features + | add_dep_type dev $features + | insert used {|x| $x.normal_dep or $x.build_dep or $x.dev_dep} + # Add crates.io info + | join (read_crates_io ($lock.name | uniq)) name + # Add GH org or user info + # The organisation is an indicator that crates should be treated as one dependency. + # However, there are also unrelated projects by a single organisation, so it's not + # clear. + | insert organisation {|x| + let repository = $x.repository? + if ($repository == null) { "" } else { + $repository | url parse | get path | path split | get 1 + } + } + # Add repository (truncate everything after repo name) + # If we get a url like + # https://github.com/uutils/coreutils/tree/src/uu/ls + # we only keep + # uutils/coreutils + # The idea is that crates in the same repo definitely belong to the same project and should + # be treated as one dependency. + | insert repository_name {|x| + let repository = $x.repository? + if ($repository == null) { '' } else { + $repository + | url parse + | get path + | path split + | select 1 2 + | path join + } + } +} + From 948836fb8ff099af1fb78fbf38f315ac52001142 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 9 Jul 2023 16:43:48 +0200 Subject: [PATCH 002/429] Update util/deps.nu Co-authored-by: Daniel Hofstetter --- util/deps.nu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/deps.nu b/util/deps.nu index a5bd94c8f51..a35e2d98c96 100644 --- a/util/deps.nu +++ b/util/deps.nu @@ -41,7 +41,7 @@ # # Or to find all crates with multiple versions (like cargo deny): # ``` -# > $dep | where versions > 1 +# > $dep | where num_versions > 1 # ``` # # Ideas to expand this: From a3e68d5bbd3d063cac01f77e9870121791e24012 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 2 Aug 2023 23:57:26 +0200 Subject: [PATCH 003/429] uucore: start work on a completely new printf implementation --- src/uucore/Cargo.toml | 2 +- src/uucore/src/lib/features.rs | 6 +- src/uucore/src/lib/features/format/mod.rs | 144 +++++ src/uucore/src/lib/features/format/spec.rs | 523 ++++++++++++++++++ src/uucore/src/lib/features/memo.rs | 175 ------ src/uucore/src/lib/features/tokenize/mod.rs | 5 - .../tokenize/num_format/format_field.rs | 43 -- .../features/tokenize/num_format/formatter.rs | 59 -- .../num_format/formatters/base_conv/mod.rs | 270 --------- .../num_format/formatters/base_conv/tests.rs | 56 -- .../formatters/cninetyninehexfloatf.rs | 115 ---- .../tokenize/num_format/formatters/decf.rs | 185 ------- .../num_format/formatters/float_common.rs | 377 ------------- .../tokenize/num_format/formatters/floatf.rs | 43 -- .../tokenize/num_format/formatters/intf.rs | 282 ---------- .../tokenize/num_format/formatters/mod.rs | 9 - .../tokenize/num_format/formatters/scif.rs | 43 -- .../lib/features/tokenize/num_format/mod.rs | 4 - .../tokenize/num_format/num_format.rs | 271 --------- src/uucore/src/lib/features/tokenize/sub.rs | 452 --------------- src/uucore/src/lib/features/tokenize/token.rs | 39 -- .../lib/features/tokenize/unescaped_text.rs | 279 ---------- src/uucore/src/lib/lib.rs | 4 +- 23 files changed, 672 insertions(+), 2714 deletions(-) create mode 100644 src/uucore/src/lib/features/format/mod.rs create mode 100644 src/uucore/src/lib/features/format/spec.rs delete mode 100644 src/uucore/src/lib/features/memo.rs delete mode 100644 src/uucore/src/lib/features/tokenize/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/format_field.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatter.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/num_format.rs delete mode 100644 src/uucore/src/lib/features/tokenize/sub.rs delete mode 100644 src/uucore/src/lib/features/tokenize/token.rs delete mode 100644 src/uucore/src/lib/features/tokenize/unescaped_text.rs diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index d376e807a20..1c1d4c75443 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -76,7 +76,7 @@ entries = ["libc"] fs = ["libc", "winapi-util", "windows-sys"] fsext = ["libc", "time", "windows-sys"] lines = [] -memo = ["itertools"] +format = ["itertools"] mode = ["libc"] perms = ["libc", "walkdir"] process = ["libc"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index f8a8d2d10df..fe48399877b 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -8,14 +8,12 @@ pub mod fs; pub mod fsext; #[cfg(feature = "lines")] pub mod lines; -#[cfg(feature = "memo")] -pub mod memo; +#[cfg(feature = "format")] +pub mod format; #[cfg(feature = "ringbuffer")] pub mod ringbuffer; #[cfg(feature = "sum")] pub mod sum; -#[cfg(feature = "memo")] -mod tokenize; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs new file mode 100644 index 00000000000..abd92011cc4 --- /dev/null +++ b/src/uucore/src/lib/features/format/mod.rs @@ -0,0 +1,144 @@ +//! Main entry point for our implementation of printf. +//! +//! The [`printf`] and [`sprintf`] closely match the behavior of the +//! corresponding C functions: the former renders a formatted string +//! to stdout, the latter renders to a new [`String`] object. +//! +//! In addition to the [`printf`] and [`sprintf`] functions, we expose the +//! [`Format`] struct, which represents a parsed format string. This reduces +//! the need for parsing a format string multiple times and assures that no +//! parsing errors occur during writing. +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety + +// mod num_format; +mod spec; + +use spec::Spec; +use std::io::{stdout, Write}; + +pub enum FormatError { + SpecError, + IoError(std::io::Error), + NoMoreArguments, + InvalidArgument(FormatArgument), +} + +/// A single item to format +enum FormatItem { + /// A format specifier + Spec(Spec), + /// Some plain text + Text(Vec), + /// A single character + /// + /// Added in addition to `Text` as an optimization. + Char(u8), +} + +pub enum FormatArgument { + Char(char), + String(String), + UnsignedInt(u64), + SignedInt(i64), + Float(f64), +} + +impl FormatItem { + fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator) -> Result<(), FormatError> { + match self { + FormatItem::Spec(spec) => spec.write(writer, args), + FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), + FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError), + } + } +} + +fn parse_iter(fmt: &[u8]) -> impl Iterator> + '_ { + let mut rest = fmt; + std::iter::from_fn(move || { + if rest.is_empty() { + return None; + } + + match rest.iter().position(|c| *c == b'%') { + None => { + let final_text = rest; + rest = &[]; + Some(Ok(FormatItem::Text(final_text.into()))) + } + Some(0) => { + // Handle the spec + rest = &rest[1..]; + match rest.get(0) { + None => Some(Ok(FormatItem::Char(b'%'))), + Some(b'%') => { + rest = &rest[1..]; + Some(Ok(FormatItem::Char(b'%'))) + } + Some(_) => { + let spec = match Spec::parse(&mut rest) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) + } + } + } + Some(i) => { + // The `after` slice includes the % so it will be handled correctly + // in the next iteration. + let (before, after) = rest.split_at(i); + rest = after; + return Some(Ok(FormatItem::Text(before.into()))); + } + } + }) +} + +/// Write a formatted string to stdout. +/// +/// `format_string` contains the template and `args` contains the +/// arguments to render into the template. +/// +/// See also [`sprintf`], which creates a new formatted [`String`]. +/// +/// # Examples +/// +/// ```rust +/// use uucore::format::printf; +/// +/// printf("hello %s", &["world".to_string()]).unwrap(); +/// // prints "hello world" +/// ``` +pub fn printf(format_string: &[u8], arguments: impl IntoIterator) -> Result<(), FormatError> { + printf_writer(stdout(), format_string, arguments) +} + +fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator) -> Result<(), FormatError> { + let mut args = args.into_iter(); + for item in parse_iter(format_string) { + item?.write(&mut writer, &mut args)?; + } + Ok(()) +} + +/// Create a new formatted string. +/// +/// `format_string` contains the template and `args` contains the +/// arguments to render into the template. +/// +/// See also [`printf`], which prints to stdout. +/// +/// # Examples +/// +/// ```rust +/// use uucore::format::sprintf; +/// +/// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); +/// assert_eq!(s, "hello world".to_string()); +/// ``` +pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator) -> Result, FormatError> { + let mut writer = Vec::new(); + printf_writer(&mut writer, format_string, arguments)?; + Ok(writer) +} diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs new file mode 100644 index 00000000000..4319e44d93f --- /dev/null +++ b/src/uucore/src/lib/features/format/spec.rs @@ -0,0 +1,523 @@ +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety + +use super::{FormatArgument, FormatError}; +use std::{fmt::Display, io::Write}; + +pub enum Spec { + Char { + width: Option>, + align_left: bool, + }, + String { + width: Option>, + align_left: bool, + }, + SignedInt { + width: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + }, + UnsignedInt { + variant: UnsignedIntVariant, + width: Option>, + alignment: NumberAlignment, + }, + Float { + variant: FloatVariant, + case: Case, + force_decimal: ForceDecimal, + width: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + precision: Option>, + }, +} + +#[derive(Clone, Copy)] +pub enum UnsignedIntVariant { + Decimal, + Octal(Prefix), + Hexadecimal(Case, Prefix), +} + +#[derive(Clone, Copy)] + +pub enum FloatVariant { + Decimal, + Scientific, + Shortest, + Hexadecimal, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Case { + Lowercase, + Uppercase, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Prefix { + No, + Yes, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ForceDecimal { + No, + Yes, +} + +#[derive(Clone, Copy)] +pub enum PositiveSign { + None, + Plus, + Space, +} + +#[derive(Clone, Copy)] +pub enum NumberAlignment { + Left, + RightSpace, + RightZero, +} + +/// Precision and width specified might use an asterisk to indicate that they are +/// determined by an argument. +#[derive(Clone, Copy)] +pub enum CanAsterisk { + Fixed(T), + Asterisk, +} + +/// Size of the expected type (ignored) +/// +/// We ignore this parameter entirely, but we do parse it. +/// It could be used in the future if the need arises. +enum Length { + /// signed/unsigned char ("hh") + Char, + /// signed/unsigned short int ("h") + Short, + /// signed/unsigned long int ("l") + Long, + /// signed/unsigned long long int ("ll") + LongLong, + /// intmax_t ("j") + IntMaxT, + /// size_t ("z") + SizeT, + /// ptrdiff_t ("t") + PtfDiffT, + /// long double ("L") + LongDouble, +} + +impl Spec { + pub fn parse(rest: &mut &[u8]) -> Option { + // Based on the C++ reference, the spec format looks like: + // + // %[flags][width][.precision][length]specifier + // + // However, we have already parsed the '%'. + + let mut minus = false; + let mut plus = false; + let mut space = false; + let mut hash = false; + let mut zero = false; + + while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) { + match x { + b'-' => minus = true, + b'+' => plus = true, + b' ' => space = true, + b'#' => hash = true, + b'0' => zero = true, + _ => unreachable!(), + } + *rest = &rest[1..] + } + + let width = eat_asterisk_or_number(rest); + + let precision = if let Some(b'.') = rest.get(0) { + Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0))) + } else { + None + }; + + let length = rest.get(0).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(1) { + *rest = &rest[1..]; + Length::Char + } else { + Length::Short + } + } + b'l' => { + if let Some(b'l') = rest.get(1) { + *rest = &rest[1..]; + Length::Long + } else { + Length::LongLong + } + } + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + + if length.is_some() { + *rest = &rest[1..]; + } + + Some(match rest.get(0)? { + b'c' => Spec::Char { + width, + align_left: minus, + }, + b's' => Spec::String { + width, + align_left: minus, + }, + b'd' | b'i' => Spec::SignedInt { + width, + alignment: match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }, + positive_sign: match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }, + }, + c @ (b'u' | b'o' | b'x' | b'X') => { + let prefix = match hash { + false => Prefix::No, + true => Prefix::Yes, + }; + let alignment = match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }; + let variant = match c { + b'u' => UnsignedIntVariant::Decimal, + b'o' => UnsignedIntVariant::Octal(prefix), + b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix), + b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), + _ => unreachable!(), + }; + Spec::UnsignedInt { + variant, + width, + alignment, + } + } + c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float { + width, + precision, + variant: match c { + b'f' | b'F' => FloatVariant::Decimal, + b'e' | b'E' => FloatVariant::Scientific, + b'g' | b'G' => FloatVariant::Shortest, + b'a' | b'A' => FloatVariant::Hexadecimal, + _ => unreachable!(), + }, + force_decimal: match hash { + false => ForceDecimal::No, + true => ForceDecimal::Yes, + }, + case: match c.is_ascii_uppercase() { + false => Case::Lowercase, + true => Case::Uppercase, + }, + alignment: match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }, + positive_sign: match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }, + }, + _ => return None, + }) + } + + pub fn write<'a>( + &self, + mut writer: impl Write, + mut args: impl Iterator, + ) -> Result<(), FormatError> { + match self { + &Spec::Char { width, align_left } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let arg = next_arg(&mut args)?; + match arg { + FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), + _ => Err(FormatError::InvalidArgument(arg)), + } + } + &Spec::String { width, align_left } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let arg = next_arg(&mut args)?; + match arg { + FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), + _ => Err(FormatError::InvalidArgument(arg)), + } + } + &Spec::SignedInt { + width, + positive_sign, + alignment, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + + let arg = next_arg(&mut args)?; + let FormatArgument::SignedInt(i) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + if i >= 0 { + match positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + } + + match alignment { + NumberAlignment::Left => write!(writer, "{i: write!(writer, "{i:>width$}"), + NumberAlignment::RightZero => write!(writer, "{i:0>width$}"), + } + .map_err(FormatError::IoError) + } + &Spec::UnsignedInt { + variant, + width, + alignment, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + + let arg = next_arg(args)?; + let FormatArgument::SignedInt(i) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + let s = match variant { + UnsignedIntVariant::Decimal => format!("{i}"), + UnsignedIntVariant::Octal(Prefix::No) => format!("{i:o}"), + UnsignedIntVariant::Octal(Prefix::Yes) => format!("{i:#o}"), + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => { + format!("{i:x}") + } + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => { + format!("{i:#x}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => { + format!("{i:X}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => { + format!("{i:#X}") + } + }; + + match alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + } + .map_err(FormatError::IoError) + } + &Spec::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); + + let arg = next_arg(args)?; + let FormatArgument::Float(f) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + match positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + + let s = match variant { + FloatVariant::Decimal => format_float_decimal(f, precision, case, force_decimal), + FloatVariant::Scientific => { + format_float_scientific(f, precision, case, force_decimal) + } + FloatVariant::Shortest => format_float_shortest(f, precision, case, force_decimal), + FloatVariant::Hexadecimal => todo!(), + }; + + match alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + } + .map_err(FormatError::IoError) + } + } + } +} + +fn format_float_decimal( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + if !f.is_finite() { + let mut s = format!("{f}"); + if case == Case::Lowercase { + s.make_ascii_uppercase(); + } + return s; + } + + if precision == 0 && force_decimal == ForceDecimal::Yes { + format!("{f:.0}.") + } else { + format!("{f:.*}", precision) + } +} + +fn format_float_scientific( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + // If the float is NaN, -Nan, Inf or -Inf, format like any other float + if !f.is_finite() { + let mut s = format!("{f}"); + if case == Case::Lowercase { + s.make_ascii_uppercase(); + } + return s; + } + + let exponent: i32 = f.log10().floor() as i32; + let normalized = f / 10.0_f64.powi(exponent); + + let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { + "." + } else { + "" + }; + + let exp_char = match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }; + + format!( + "{normalized:.*}{additional_dot}{exp_char}{exponent:+03}", + precision + ) +} + +// TODO: This could be optimized. It's not terribly important though. +fn format_float_shortest( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + let a = format_float_decimal(f, precision, case, force_decimal); + let b = format_float_scientific(f, precision, case, force_decimal); + + if a.len() > b.len() { + b + } else { + a + } +} + +fn resolve_asterisk( + option: Option>, + args: impl Iterator, +) -> Result, FormatError> { + Ok(match option { + None => None, + Some(CanAsterisk::Asterisk) => { + let arg = next_arg(args)?; + match arg { + FormatArgument::UnsignedInt(u) => match usize::try_from(u) { + Ok(u) => Some(u), + Err(_) => return Err(FormatError::InvalidArgument(arg)), + }, + _ => return Err(FormatError::InvalidArgument(arg)), + } + } + Some(CanAsterisk::Fixed(w)) => Some(w), + }) +} + +fn next_arg( + mut arguments: impl Iterator, +) -> Result { + arguments.next().ok_or(FormatError::NoMoreArguments) +} + +fn write_padded( + mut writer: impl Write, + text: impl Display, + width: usize, + pad_zero: bool, + left: bool, +) -> Result<(), FormatError> { + match (left, pad_zero) { + (false, false) => write!(writer, "{text: >width$}"), + (false, true) => write!(writer, "{text:0>width$}"), + // 0 is ignored if we pad left. + (true, _) => write!(writer, "{text: Option> { + if let Some(b'*') = rest.get(0) { + *rest = &rest[1..]; + Some(CanAsterisk::Asterisk) + } else { + eat_number(rest).map(CanAsterisk::Fixed) + } +} + +fn eat_number(rest: &mut &[u8]) -> Option { + match rest.iter().position(|b| !b.is_ascii_digit()) { + None | Some(0) => None, + Some(i) => { + // TODO: This might need to handle errors better + // For example in case of overflow. + let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap(); + *rest = &rest[i..]; + Some(parsed) + } + } +} diff --git a/src/uucore/src/lib/features/memo.rs b/src/uucore/src/lib/features/memo.rs deleted file mode 100644 index 47d04f5b861..00000000000 --- a/src/uucore/src/lib/features/memo.rs +++ /dev/null @@ -1,175 +0,0 @@ -//! Main entry point for our implementation of printf. -//! -//! The [`printf`] and [`sprintf`] closely match the behavior of the -//! corresponding C functions: the former renders a formatted string -//! to stdout, the latter renders to a new [`String`] object. -use crate::display::Quotable; -use crate::error::{UResult, USimpleError}; -use crate::features::tokenize::sub::SubParser; -use crate::features::tokenize::token::Token; -use crate::features::tokenize::unescaped_text::UnescapedText; -use crate::show_warning; -use itertools::put_back_n; -use std::io::{stdout, Cursor, Write}; -use std::iter::Peekable; -use std::slice::Iter; - -/// Memo runner of printf -/// Takes a format string and arguments -/// 1. tokenize format string into tokens, consuming -/// any subst. arguments along the way. -/// 2. feeds remaining arguments into function -/// that prints tokens. -struct Memo { - tokens: Vec, -} - -fn warn_excess_args(first_arg: &str) { - show_warning!( - "ignoring excess arguments, starting with {}", - first_arg.quote() - ); -} - -impl Memo { - fn new( - writer: &mut W, - pf_string: &str, - pf_args_it: &mut Peekable>, - ) -> UResult - where - W: Write, - { - let mut pm = Self { tokens: Vec::new() }; - let mut it = put_back_n(pf_string.chars()); - let mut has_sub = false; - loop { - if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) { - pm.tokens.push(x); - } - if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? { - if !has_sub { - has_sub = true; - } - pm.tokens.push(x); - } - if let Some(x) = it.next() { - it.put_back(x); - } else { - break; - } - } - if !has_sub { - let mut drain = false; - if let Some(first_arg) = pf_args_it.peek() { - warn_excess_args(first_arg); - drain = true; - } - if drain { - loop { - // drain remaining args; - if pf_args_it.next().is_none() { - break; - } - } - } - } - Ok(pm) - } - fn apply(&self, writer: &mut W, pf_args_it: &mut Peekable>) - where - W: Write, - { - for tkn in &self.tokens { - tkn.write(writer, pf_args_it); - } - } - fn run_all(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()> - where - W: Write, - { - let mut arg_it = pf_args.iter().peekable(); - let pm = Self::new(writer, pf_string, &mut arg_it)?; - loop { - if arg_it.peek().is_none() { - return Ok(()); - } - pm.apply(writer, &mut arg_it); - } - } -} - -/// Write a formatted string to stdout. -/// -/// `format_string` contains the template and `args` contains the -/// arguments to render into the template. -/// -/// See also [`sprintf`], which creates a new formatted [`String`]. -/// -/// # Examples -/// -/// ```rust -/// use uucore::memo::printf; -/// -/// printf("hello %s", &["world".to_string()]).unwrap(); -/// // prints "hello world" -/// ``` -pub fn printf(format_string: &str, args: &[String]) -> UResult<()> { - let mut writer = stdout(); - Memo::run_all(&mut writer, format_string, args) -} - -/// Create a new formatted string. -/// -/// `format_string` contains the template and `args` contains the -/// arguments to render into the template. -/// -/// See also [`printf`], which prints to stdout. -/// -/// # Examples -/// -/// ```rust -/// use uucore::memo::sprintf; -/// -/// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); -/// assert_eq!(s, "hello world".to_string()); -/// ``` -pub fn sprintf(format_string: &str, args: &[String]) -> UResult { - let mut writer = Cursor::new(vec![]); - Memo::run_all(&mut writer, format_string, args)?; - let buf = writer.into_inner(); - match String::from_utf8(buf) { - Ok(s) => Ok(s), - Err(e) => Err(USimpleError::new( - 1, - format!("failed to parse formatted string as UTF-8: {e}"), - )), - } -} - -#[cfg(test)] -mod tests { - - use crate::memo::sprintf; - - #[test] - fn test_sprintf_smoke() { - assert_eq!(sprintf("", &[]).unwrap(), "".to_string()); - } - - #[test] - fn test_sprintf_no_args() { - assert_eq!( - sprintf("hello world", &[]).unwrap(), - "hello world".to_string() - ); - } - - #[test] - fn test_sprintf_string() { - assert_eq!( - sprintf("hello %s", &["world".to_string()]).unwrap(), - "hello world".to_string() - ); - } -} diff --git a/src/uucore/src/lib/features/tokenize/mod.rs b/src/uucore/src/lib/features/tokenize/mod.rs deleted file mode 100644 index dfe44a0e56b..00000000000 --- a/src/uucore/src/lib/features/tokenize/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[allow(clippy::module_inception)] -mod num_format; -pub mod sub; -pub mod token; -pub mod unescaped_text; diff --git a/src/uucore/src/lib/features/tokenize/num_format/format_field.rs b/src/uucore/src/lib/features/tokenize/num_format/format_field.rs deleted file mode 100644 index 02998cde540..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/format_field.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! Primitives used by Sub Tokenizer -//! and num_format modules -#[derive(Clone)] -pub enum FieldType { - Strf, - Floatf, - CninetyNineHexFloatf, - Scif, - Decf, - Intf, - Charf, -} - -// #[allow(non_camel_case_types)] -// pub enum FChar { -// d, -// e, -// E, -// i, -// f, -// F, -// g, -// G, -// u, -// x, -// X, -// o -// } -// - -// a Sub Tokens' fields are stored -// as a single object so they can be more simply -// passed by ref to num_format in a Sub method -#[derive(Clone)] -pub struct FormatField<'a> { - pub min_width: Option, - pub second_field: Option, - pub field_char: &'a char, - pub field_type: &'a FieldType, - pub orig: &'a String, -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatter.rs b/src/uucore/src/lib/features/tokenize/num_format/formatter.rs deleted file mode 100644 index ed7d5a0f609..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatter.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Primitives used by num_format and sub_modules. -//! never dealt with above (e.g. Sub Tokenizer never uses these) - -use crate::{display::Quotable, show_error}; -use itertools::{put_back_n, PutBackN}; -use std::str::Chars; - -use super::format_field::FormatField; - -// contains the rough ingredients to final -// output for a number, organized together -// to allow for easy generalization of output manipulation -// (e.g. max number of digits after decimal) -#[derive(Default)] -pub struct FormatPrimitive { - pub prefix: Option, - pub pre_decimal: Option, - pub post_decimal: Option, - pub suffix: Option, -} - -#[derive(Clone, PartialEq, Eq)] -pub enum Base { - Ten = 10, - Hex = 16, - Octal = 8, -} - -// information from the beginning of a numeric argument -// the precedes the beginning of a numeric value -pub struct InitialPrefix { - pub radix_in: Base, - pub sign: i8, - pub offset: usize, -} - -pub trait Formatter { - // return a FormatPrimitive for - // particular field char(s), given the argument - // string and prefix information (sign, radix) - fn get_primitive( - &self, - field: &FormatField, - in_prefix: &InitialPrefix, - str_in: &str, - ) -> Option; - // return a string from a FormatPrimitive, - // given information about the field - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String; -} -pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN { - put_back_n(str_in[offset..].chars()) -} - -// TODO: put this somewhere better -pub fn warn_incomplete_conv(pf_arg: &str) { - // important: keep println here not print - show_error!("{}: value not completely converted", pf_arg.maybe_quote()); -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs deleted file mode 100644 index 3df9f7129bc..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs +++ /dev/null @@ -1,270 +0,0 @@ -// spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl - -pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec { - let mut carry: u16 = 0; - let mut rem: u16; - let mut new_amount: u16; - let fact: u16 = u16::from(base_ten_int_fact); - let base: u16 = u16::from(basenum); - - let mut ret_rev: Vec = Vec::new(); - let mut it = arr_num.iter().rev(); - loop { - let i = it.next(); - match i { - Some(u) => { - new_amount = (u16::from(*u) * fact) + carry; - rem = new_amount % base; - carry = (new_amount - rem) / base; - ret_rev.push(rem as u8); - } - None => { - while carry != 0 { - rem = carry % base; - carry = (carry - rem) / base; - ret_rev.push(rem as u8); - } - break; - } - } - } - let ret: Vec = ret_rev.into_iter().rev().collect(); - ret -} - -#[allow(dead_code)] -pub struct Remainder<'a> { - pub position: usize, - pub replace: Vec, - pub arr_num: &'a Vec, -} - -#[allow(dead_code)] -pub struct DivOut<'a> { - pub quotient: u8, - pub remainder: Remainder<'a>, -} - -#[allow(dead_code)] -pub fn arrnum_int_div_step<'a>( - rem_in: &'a Remainder, - radix_in: u8, - base_ten_int_divisor: u8, - after_decimal: bool, -) -> DivOut<'a> { - let mut rem_out = Remainder { - position: rem_in.position, - replace: Vec::new(), - arr_num: rem_in.arr_num, - }; - - let mut bufferval: u16 = 0; - let base: u16 = u16::from(radix_in); - let divisor: u16 = u16::from(base_ten_int_divisor); - let mut traversed = 0; - - let mut quotient = 0; - let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..]; - let mut it_replace = rem_in.replace.iter(); - let mut it_f = refd_vals.iter(); - loop { - let u = match it_replace.next() { - Some(u_rep) => u16::from(*u_rep), - None => match it_f.next() { - Some(u_orig) => u16::from(*u_orig), - None => { - if !after_decimal { - break; - } - 0 - } - }, - }; - traversed += 1; - bufferval += u; - if bufferval > divisor { - while bufferval >= divisor { - quotient += 1; - bufferval -= divisor; - } - rem_out.replace = if bufferval == 0 { - Vec::new() - } else { - let remainder_as_arrnum = unsigned_to_arrnum(bufferval); - base_conv_vec(&remainder_as_arrnum, 10, radix_in) - }; - rem_out.position += 1 + (traversed - rem_out.replace.len()); - break; - } else { - bufferval *= base; - } - } - DivOut { - quotient, - remainder: rem_out, - } -} -pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec { - let mut carry: u16 = u16::from(base_ten_int_term); - let mut rem: u16; - let mut new_amount: u16; - let base: u16 = u16::from(basenum); - - let mut ret_rev: Vec = Vec::new(); - let mut it = arrnum.iter().rev(); - loop { - let i = it.next(); - match i { - Some(u) => { - new_amount = u16::from(*u) + carry; - rem = new_amount % base; - carry = (new_amount - rem) / base; - ret_rev.push(rem as u8); - } - None => { - while carry != 0 { - rem = carry % base; - carry = (carry - rem) / base; - ret_rev.push(rem as u8); - } - break; - } - } - } - let ret: Vec = ret_rev.into_iter().rev().collect(); - ret -} - -pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec { - let mut result = vec![0]; - for i in src { - result = arrnum_int_mult(&result, radix_dest, radix_src); - result = arrnum_int_add(&result, radix_dest, *i); - } - result -} - -#[allow(dead_code)] -pub fn unsigned_to_arrnum(src: u16) -> Vec { - let mut result: Vec = Vec::new(); - let mut src_tmp: u16 = src; - while src_tmp > 0 { - result.push((src_tmp % 10) as u8); - src_tmp /= 10; - } - result.reverse(); - result -} - -// temporary needs-improvement-function -pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 { - // it would require a lot of addl code - // to implement this for arbitrary string input. - // until then, the below operates as an outline - // of how it would work. - let mut factor: f64 = 1_f64; - let radix_src_float: f64 = f64::from(radix_src); - let mut r: f64 = 0_f64; - for (i, u) in src.iter().enumerate() { - if i > 15 { - break; - } - factor /= radix_src_float; - r += factor * f64::from(*u); - } - r -} - -pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec { - let mut intermed_in: Vec = Vec::new(); - for c in src.chars() { - #[allow(clippy::single_match)] - match radix_def_src.parse_char(c) { - Some(u) => { - intermed_in.push(u); - } - None => {} //todo err msg on incorrect - } - } - intermed_in -} - -pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String { - let mut str_out = String::new(); - for u in src.iter() { - #[allow(clippy::single_match)] - match radix_def_dest.format_u8(*u) { - Some(c) => { - str_out.push(c); - } - None => {} //todo - } - } - str_out -} - -pub fn base_conv_str( - src: &str, - radix_def_src: &dyn RadixDef, - radix_def_dest: &dyn RadixDef, -) -> String { - let intermed_in: Vec = str_to_arrnum(src, radix_def_src); - let intermed_out = base_conv_vec( - &intermed_in, - radix_def_src.get_max(), - radix_def_dest.get_max(), - ); - arrnum_to_str(&intermed_out, radix_def_dest) -} - -pub trait RadixDef { - fn get_max(&self) -> u8; - fn parse_char(&self, x: char) -> Option; - fn format_u8(&self, x: u8) -> Option; -} -pub struct RadixTen; - -const ZERO_ASC: u8 = b'0'; -const UPPER_A_ASC: u8 = b'A'; -const LOWER_A_ASC: u8 = b'a'; - -impl RadixDef for RadixTen { - fn get_max(&self) -> u8 { - 10 - } - fn parse_char(&self, c: char) -> Option { - match c { - '0'..='9' => Some(c as u8 - ZERO_ASC), - _ => None, - } - } - fn format_u8(&self, u: u8) -> Option { - match u { - 0..=9 => Some((ZERO_ASC + u) as char), - _ => None, - } - } -} -pub struct RadixHex; -impl RadixDef for RadixHex { - fn get_max(&self) -> u8 { - 16 - } - fn parse_char(&self, c: char) -> Option { - match c { - '0'..='9' => Some(c as u8 - ZERO_ASC), - 'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC), - 'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC), - _ => None, - } - } - fn format_u8(&self, u: u8) -> Option { - match u { - 0..=9 => Some((ZERO_ASC + u) as char), - 10..=15 => Some((UPPER_A_ASC + (u - 10)) as char), - _ => None, - } - } -} - -mod tests; diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs deleted file mode 100644 index 903a3faf142..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs +++ /dev/null @@ -1,56 +0,0 @@ -// spell-checker:ignore (ToDO) arrnum mult - -#[cfg(test)] -use super::*; - -#[test] -fn test_arrnum_int_mult() { - // (in base 10) 12 * 4 = 48 - let factor: Vec = vec![1, 2]; - let base_num = 10; - let base_ten_int_fact: u8 = 4; - let should_output: Vec = vec![4, 8]; - - let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); - assert!(product == should_output); -} - -#[test] -fn test_arrnum_int_non_base_10() { - // (in base 3) - // 5 * 4 = 20 - let factor: Vec = vec![1, 2]; - let base_num = 3; - let base_ten_int_fact: u8 = 4; - let should_output: Vec = vec![2, 0, 2]; - - let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); - assert!(product == should_output); -} - -#[test] -fn test_arrnum_int_div_short_circuit() { - // ( - let arrnum: Vec = vec![5, 5, 5, 5, 0]; - let base_num = 10; - let base_ten_int_divisor: u8 = 41; - let remainder_passed_in = Remainder { - position: 1, - replace: vec![1, 3], - arr_num: &arrnum, - }; - - // the "replace" should mean the number being divided - // is 1350, the first time you can get 41 to go into - // 1350, its at 135, where you can get a quotient of - // 3 and a remainder of 12; - - let quotient_should_be: u8 = 3; - let remainder_position_should_be: usize = 3; - let remainder_replace_should_be = vec![1, 2]; - - let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false); - assert!(quotient_should_be == result.quotient); - assert!(remainder_position_should_be == result.remainder.position); - assert!(remainder_replace_should_be == result.remainder.replace); -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs deleted file mode 100644 index a5c51153efd..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs +++ /dev/null @@ -1,115 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for %a %F C99 Hex-floating-point subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::base_conv; -use super::base_conv::RadixDef; -use super::float_common::{primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct CninetyNineHexFloatf { - #[allow(dead_code)] - as_num: f64, -} -impl CninetyNineHexFloatf { - pub fn new() -> Self { - Self::default() - } -} - -impl Formatter for CninetyNineHexFloatf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize), - None, - true, - ); - let f = get_primitive_hex( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - *field.field_char == 'A', - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} - -// c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around) - -// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden. -fn get_primitive_hex( - initial_prefix: &InitialPrefix, - _str_in: &str, - _analysis: &FloatAnalysis, - _last_dec_place: usize, - capitalized: bool, -) -> FormatPrimitive { - let prefix = Some(String::from(if initial_prefix.sign == -1 { - "-0x" - } else { - "0x" - })); - - // TODO actual conversion, make sure to get back mantissa. - // for hex to hex, it's really just a matter of moving the - // decimal point and calculating the mantissa by its initial - // position and its moves, with every position counting for - // the addition or subtraction of 4 (2**4, because 4 bits in a hex digit) - // to the exponent. - // decimal's going to be a little more complicated. correct simulation - // of glibc will require after-decimal division to a specified precision. - // the difficult part of this (arrnum_int_div_step) is already implemented. - - // the hex float name may be a bit misleading in terms of how to go about the - // conversion. The best way to do it is to just convert the float number - // directly to base 2 and then at the end translate back to hex. - let mantissa = 0; - let suffix = Some({ - let ind = if capitalized { "P" } else { "p" }; - if mantissa >= 0 { - format!("{ind}+{mantissa}") - } else { - format!("{ind}{mantissa}") - } - }); - FormatPrimitive { - prefix, - suffix, - ..Default::default() - } -} - -#[allow(dead_code)] -fn to_hex(src: &str, before_decimal: bool) -> String { - let radix_ten = base_conv::RadixTen; - let radix_hex = base_conv::RadixHex; - if before_decimal { - base_conv::base_conv_str(src, &radix_ten, &radix_hex) - } else { - let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten); - let s = format!( - "{}", - base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max()) - ); - if s.len() > 2 { - String::from(&s[2..]) - } else { - // zero - s - } - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs deleted file mode 100644 index 2ee53882e5d..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs +++ /dev/null @@ -1,185 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! formatter for %g %G decimal subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -const SIGNIFICANT_FIGURES: usize = 6; - -// Parse a numeric string as the nearest integer with a given significance. -// This is a helper function for round(). -// Examples: -// round_to_significance("456", 1) == 500 -// round_to_significance("456", 2) == 460 -// round_to_significance("456", 9) == 456 -fn round_to_significance(input: &str, significant_figures: usize) -> u32 { - if significant_figures < input.len() { - // If the input has too many digits, use a float intermediary - // to round it before converting to an integer. Otherwise, - // converting straight to integer will truncate. - // There might be a cleaner way to do this... - let digits = &input[..significant_figures + 1]; - let float_representation = digits.parse::().unwrap(); - (float_representation / 10.0).round() as u32 - } else { - input.parse::().unwrap_or(0) - } -} - -// Removing trailing zeroes, expressing the result as an integer where -// possible. This is a helper function for round(). -fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { - if let Some(ref post_dec) = format.post_decimal { - let trimmed = post_dec.trim_end_matches('0'); - - if trimmed.is_empty() { - // If there are no nonzero digits after the decimal point, - // use integer formatting by clearing post_decimal and suffix. - format.post_decimal = Some(String::new()); - if format.suffix == Some("e+00".into()) { - format.suffix = Some(String::new()); - } - } else if trimmed.len() != post_dec.len() { - // Otherwise, update the format to remove only the trailing - // zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were - // no trailing zeroes, do nothing. - format.post_decimal = Some(trimmed.to_owned()); - } - } - format -} - -// Round a format to six significant figures and remove trailing zeroes. -fn round(mut format: FormatPrimitive) -> FormatPrimitive { - let mut significant_digits_remaining = SIGNIFICANT_FIGURES; - - // First, take as many significant digits as possible from pre_decimal, - if format.pre_decimal.is_some() { - let input = format.pre_decimal.as_ref().unwrap(); - let rounded = round_to_significance(input, significant_digits_remaining); - let mut rounded_str = rounded.to_string(); - significant_digits_remaining -= rounded_str.len(); - - // If the pre_decimal has exactly enough significant digits, - // round the input to the nearest integer. If the first - // post_decimal digit is 5 or higher, round up by incrementing - // the pre_decimal number. Otherwise, use the pre_decimal as-is. - if significant_digits_remaining == 0 { - if let Some(digits) = &format.post_decimal { - if digits.chars().next().unwrap_or('0') >= '5' { - let rounded = rounded + 1; - rounded_str = rounded.to_string(); - } - } - } - format.pre_decimal = Some(rounded_str); - } - - // If no significant digits remain, or there's no post_decimal to - // round, return the rounded pre_decimal value with no post_decimal. - // Otherwise, round the post_decimal to the remaining significance. - if significant_digits_remaining == 0 { - format.post_decimal = Some(String::new()); - } else if let Some(input) = format.post_decimal { - let leading_zeroes = input.len() - input.trim_start_matches('0').len(); - let digits = &input[leading_zeroes..]; - - // In the post_decimal, leading zeroes are significant. "01.0010" - // has one significant digit in pre_decimal, and 3 from post_decimal. - let mut post_decimal_str = String::with_capacity(significant_digits_remaining); - for _ in 0..leading_zeroes { - post_decimal_str.push('0'); - } - - if leading_zeroes < significant_digits_remaining { - // After significant leading zeroes, round the remaining digits - // to any remaining significance. - let rounded = round_to_significance(digits, significant_digits_remaining); - post_decimal_str.push_str(&rounded.to_string()); - } else if leading_zeroes == significant_digits_remaining - && digits.chars().next().unwrap_or('0') >= '5' - { - // If necessary, round up the post_decimal ("1.000009" should - // round to 1.00001, instead of truncating after the last - // significant leading zero). - post_decimal_str.pop(); - post_decimal_str.push('1'); - } else { - // If the rounded post_decimal is entirely zeroes, discard - // it and use integer formatting instead. - post_decimal_str = String::new(); - } - - format.post_decimal = Some(post_decimal_str); - } - truncate(format) -} - -// Given an exponent used in scientific notation, return whether the -// number is small enough to be expressed as a decimal instead. "Small -// enough" is based only on the number's magnitude, not the length of -// any string representation. -fn should_represent_as_decimal(suffix: &Option) -> bool { - match suffix { - Some(exponent) => { - if exponent.chars().nth(1) == Some('-') { - exponent < &"e-05".into() - } else { - exponent < &"e+06".into() - } - } - None => true, - } -} - -pub struct Decf; - -impl Decf { - pub fn new() -> Self { - Self - } -} -impl Formatter for Decf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - // default to scif interpretation so as to not truncate input vals - // (that would be displayed in scif) based on relation to decimal place - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize + 1), - None, - false, - ); - let mut f_dec = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - Some(*field.field_char == 'G'), - ); - - if should_represent_as_decimal(&f_dec.suffix) { - // Use decimal formatting instead of scientific notation - // if the input's magnitude is small. - f_dec = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - None, - ); - } - - Some(round(f_dec)) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs deleted file mode 100644 index e0a29217c4a..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs +++ /dev/null @@ -1,377 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -use super::super::format_field::FormatField; -use super::super::formatter::{ - get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix, -}; -use super::base_conv; -use super::base_conv::RadixDef; - -// if the memory, copy, and comparison cost of chars -// becomes an issue, we can always operate in vec here -// rather than just at de_hex - -pub struct FloatAnalysis { - pub len_important: usize, - // none means no decimal point. - pub decimal_pos: Option, - pub follow: Option, -} -fn has_enough_digits( - hex_input: bool, - hex_output: bool, - string_position: usize, - starting_position: usize, - limit: usize, -) -> bool { - // -1s are for rounding - if hex_output { - if hex_input { - (string_position - 1) - starting_position >= limit - } else { - false //undecidable without converting - } - } else if hex_input { - (((string_position - 1) - starting_position) * 9) / 8 >= limit - } else { - (string_position - 1) - starting_position >= limit - } -} - -impl FloatAnalysis { - #[allow(clippy::cognitive_complexity)] - pub fn analyze( - str_in: &str, - initial_prefix: &InitialPrefix, - max_sd_opt: Option, - max_after_dec_opt: Option, - hex_output: bool, - ) -> Self { - // this fn assumes - // the input string - // has no leading spaces or 0s - let str_it = get_it_at(initial_prefix.offset, str_in); - let mut ret = Self { - len_important: 0, - decimal_pos: None, - follow: None, - }; - let hex_input = match initial_prefix.radix_in { - Base::Hex => true, - Base::Ten => false, - Base::Octal => { - panic!("this should never happen: floats should never receive octal input"); - } - }; - let mut i = 0; - let mut pos_before_first_nonzero_after_decimal: Option = None; - for c in str_it { - match c { - e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => { - if !hex_input { - match e { - '0'..='9' => {} - _ => { - warn_incomplete_conv(str_in); - break; - } - } - } - if ret.decimal_pos.is_some() - && pos_before_first_nonzero_after_decimal.is_none() - && e != '0' - { - pos_before_first_nonzero_after_decimal = Some(i - 1); - } - if let Some(max_sd) = max_sd_opt { - if i == max_sd { - // follow is used in cases of %g - // where the character right after the last - // sd is considered is rounded affecting - // the previous digit in 1/2 of instances - ret.follow = Some(e); - } else if ret.decimal_pos.is_some() && i > max_sd { - break; - } - } - if let Some(max_after_dec) = max_after_dec_opt { - if let Some(p) = ret.decimal_pos { - if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) { - break; - } - } - } else if let Some(max_sd) = max_sd_opt { - if let Some(p) = pos_before_first_nonzero_after_decimal { - if has_enough_digits(hex_input, hex_output, i, p, max_sd) { - break; - } - } - } - } - '.' => { - if ret.decimal_pos.is_none() { - ret.decimal_pos = Some(i); - } else { - warn_incomplete_conv(str_in); - break; - } - } - _ => { - warn_incomplete_conv(str_in); - break; - } - }; - i += 1; - } - ret.len_important = i; - ret - } -} - -fn de_hex(src: &str, before_decimal: bool) -> String { - let radix_ten = base_conv::RadixTen; - let radix_hex = base_conv::RadixHex; - if before_decimal { - base_conv::base_conv_str(src, &radix_hex, &radix_ten) - } else { - let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex); - let s = format!( - "{}", - base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max()) - ); - if s.len() > 2 { - String::from(&s[2..]) - } else { - // zero - s - } - } -} - -// takes a string in, -// truncates to a position, -// bumps the last digit up one, -// and if the digit was nine -// propagate to the next, etc. -// If before the decimal and the most -// significant digit is a 9, it becomes a 1 -fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) { - let mut it = in_str[0..position].chars(); - let mut rev = String::new(); - let mut i = position; - let mut finished_in_dec = false; - while let Some(c) = it.next_back() { - i -= 1; - match c { - '9' => { - // If we're before the decimal - // and on the most significant digit, - // round 9 to 1, else to 0. - if before_dec && i == 0 { - rev.push('1'); - } else { - rev.push('0'); - } - } - e => { - rev.push(((e as u8) + 1) as char); - finished_in_dec = true; - break; - } - } - } - let mut fwd = String::from(&in_str[0..i]); - for ch in rev.chars().rev() { - fwd.push(ch); - } - (fwd, finished_in_dec) -} - -fn round_terminal_digit( - before_dec: String, - after_dec: String, - position: usize, -) -> (String, String, bool) { - if position < after_dec.len() { - let digit_at_pos: char; - { - digit_at_pos = after_dec[position..=position].chars().next().expect(""); - } - if let '5'..='9' = digit_at_pos { - let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false); - if finished_in_dec { - return (before_dec, new_after_dec, false); - } else { - let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true); - let mut dec_place_chg = false; - let mut before_dec_chars = new_before_dec.chars(); - if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') { - // If the first digit is a one and remaining are zeros, we have - // rounded to a new decimal place, so the decimal place must be updated. - // Only update decimal place if the before decimal != 0 - dec_place_chg = before_dec != "0"; - } - return (new_before_dec, new_after_dec, dec_place_chg); - } - // TODO - } - } - (before_dec, after_dec, false) -} - -#[allow(clippy::cognitive_complexity)] -pub fn get_primitive_dec( - initial_prefix: &InitialPrefix, - str_in: &str, - analysis: &FloatAnalysis, - last_dec_place: usize, - sci_mode: Option, -) -> FormatPrimitive { - let mut f = FormatPrimitive::default(); - - // add negative sign section - if initial_prefix.sign == -1 { - f.prefix = Some(String::from("-")); - } - - // assign the digits before and after the decimal points - // to separate slices. If no digits after decimal point, - // assign 0 - let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos { - Some(pos) => (&str_in[..pos], &str_in[pos + 1..]), - None => (str_in, "0"), - }; - if first_segment_raw.is_empty() { - first_segment_raw = "0"; - } - // convert to string, de_hexifying if input is in hex // spell-checker:disable-line - let (first_segment, second_segment) = match initial_prefix.radix_in { - Base::Hex => ( - de_hex(first_segment_raw, true), - de_hex(second_segment_raw, false), - ), - _ => ( - String::from(first_segment_raw), - String::from(second_segment_raw), - ), - }; - let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() { - if first_segment.len() > 1 { - let mut post_dec = String::from(&first_segment[1..]); - post_dec.push_str(&second_segment); - ( - String::from(&first_segment[0..1]), - post_dec, - first_segment.len() as isize - 1, - ) - } else { - match first_segment - .chars() - .next() - .expect("float_common: no chars in first segment.") - { - '0' => { - let it = second_segment.chars().enumerate(); - let mut m: isize = 0; - let mut pre = String::from("0"); - let mut post = String::from("0"); - for (i, c) in it { - match c { - '0' => {} - _ => { - m = -((i as isize) + 1); - pre = String::from(&second_segment[i..=i]); - post = String::from(&second_segment[i + 1..]); - break; - } - } - } - (pre, post, m) - } - _ => (first_segment, second_segment, 0), - } - } - } else { - (first_segment, second_segment, 0) - }; - - let (pre_dec_draft, post_dec_draft, dec_place_chg) = - round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1); - f.post_decimal = Some(post_dec_draft); - if let Some(capitalized) = sci_mode { - let si_ind = if capitalized { 'E' } else { 'e' }; - // Increase the mantissa if we're adding a decimal place - if dec_place_chg { - mantissa += 1; - } - f.suffix = Some(if mantissa >= 0 { - format!("{si_ind}+{mantissa:02}") - } else { - // negative sign is considered in format!s - // leading zeroes - format!("{si_ind}{mantissa:03}") - }); - f.pre_decimal = Some(pre_dec_draft); - } else if dec_place_chg { - // We've rounded up to a new decimal place so append 0 - f.pre_decimal = Some(pre_dec_draft + "0"); - } else { - f.pre_decimal = Some(pre_dec_draft); - } - - f -} - -pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String { - let mut final_str = String::new(); - if let Some(ref prefix) = prim.prefix { - final_str.push_str(prefix); - } - match prim.pre_decimal { - Some(ref pre_decimal) => { - final_str.push_str(pre_decimal); - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under correct \ - behavior, always have a pre_dec value." - ); - } - } - let decimal_places = field.second_field.unwrap_or(6); - match prim.post_decimal { - Some(ref post_decimal) => { - if !post_decimal.is_empty() && decimal_places > 0 { - final_str.push('.'); - let len_avail = post_decimal.len() as u32; - - if decimal_places >= len_avail { - // println!("dec {}, len avail {}", decimal_places, len_avail); - final_str.push_str(post_decimal); - - if *field.field_char != 'g' && *field.field_char != 'G' { - let diff = decimal_places - len_avail; - for _ in 0..diff { - final_str.push('0'); - } - } - } else { - // println!("printing to only {}", decimal_places); - final_str.push_str(&post_decimal[0..decimal_places as usize]); - } - } - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under correct \ - behavior, always have a pre_dec value." - ); - } - } - if let Some(ref suffix) = prim.suffix { - final_str.push_str(suffix); - } - - final_str -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs deleted file mode 100644 index cca2750dc55..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for %f %F common-notation floating-point subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct Floatf; -impl Floatf { - pub fn new() -> Self { - Self - } -} -impl Formatter for Floatf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - None, - Some(second_field as usize), - false, - ); - let f = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - None, - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs deleted file mode 100644 index 0f6e78de6f6..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs +++ /dev/null @@ -1,282 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for unsigned and signed int subs -//! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64) -//! signed int: %i %d (both base ten i64) -use super::super::format_field::FormatField; -use super::super::formatter::{ - get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix, -}; -use std::i64; -use std::u64; - -#[derive(Default)] -pub struct Intf { - _a: u32, -} - -// see the Intf::analyze() function below -struct IntAnalysis { - check_past_max: bool, - past_max: bool, - is_zero: bool, - len_digits: u8, -} - -impl Intf { - pub fn new() -> Self { - Self::default() - } - // take a ref to argument string, and basic information - // about prefix (offset, radix, sign), and analyze string - // to gain the IntAnalysis information above - // check_past_max: true if the number *may* be above max, - // but we don't know either way. One of several reasons - // we may have to parse as int. - // past_max: true if the object is past max, false if not - // in the future we should probably combine these into an - // Option - // is_zero: true if number is zero, false otherwise - // len_digits: length of digits used to create the int - // important, for example, if we run into a non-valid character - #[allow(clippy::cognitive_complexity)] - fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis { - // the maximum number of digits we could conceivably - // have before the decimal point without exceeding the - // max - let mut str_it = get_it_at(initial_prefix.offset, str_in); - let max_sd_in = if signed_out { - match initial_prefix.radix_in { - Base::Ten => 19, - Base::Octal => 21, - Base::Hex => 16, - } - } else { - match initial_prefix.radix_in { - Base::Ten => 20, - Base::Octal => 22, - Base::Hex => 16, - } - }; - let mut ret = IntAnalysis { - check_past_max: false, - past_max: false, - is_zero: false, - len_digits: 0, - }; - - // todo turn this to a while let now that we know - // no special behavior on EOI break - loop { - let c_opt = str_it.next(); - if let Some(c) = c_opt { - match c { - '0'..='9' | 'a'..='f' | 'A'..='F' => { - if ret.len_digits == 0 && c == '0' { - ret.is_zero = true; - } else if ret.is_zero { - ret.is_zero = false; - } - ret.len_digits += 1; - if ret.len_digits == max_sd_in { - if let Some(next_ch) = str_it.next() { - match next_ch { - '0'..='9' => { - ret.past_max = true; - } - _ => { - // force conversion - // to check if its above max. - // todo: spin out convert - // into fn, call it here to try - // read val, on Ok() - // save val for reuse later - // that way on same-base in and out - // we don't needlessly convert int - // to str, we can just copy it over. - ret.check_past_max = true; - str_it.put_back(next_ch); - } - } - if ret.past_max { - break; - } - } else { - ret.check_past_max = true; - } - } - } - _ => { - warn_incomplete_conv(str_in); - break; - } - } - } else { - // breaks on EOL - break; - } - } - ret - } - // get a FormatPrimitive of the maximum value for the field char - // and given sign - fn get_max(field_char: char, sign: i8) -> FormatPrimitive { - let mut fmt_primitive = FormatPrimitive::default(); - fmt_primitive.pre_decimal = Some(String::from(match field_char { - 'd' | 'i' => match sign { - 1 => "9223372036854775807", - _ => { - fmt_primitive.prefix = Some(String::from("-")); - "9223372036854775808" - } - }, - 'x' | 'X' => "ffffffffffffffff", - 'o' => "1777777777777777777777", - /* 'u' | */ _ => "18446744073709551615", - })); - fmt_primitive - } - // conv_from_segment contract: - // 1. takes - // - a string that begins with a non-zero digit, and proceeds - // with zero or more following digits until the end of the string - // - a radix to interpret those digits as - // - a char that communicates: - // whether to interpret+output the string as an i64 or u64 - // what radix to write the parsed number as. - // 2. parses it as a rust integral type - // 3. outputs FormatPrimitive with: - // - if the string falls within bounds: - // number parsed and written in the correct radix - // - if the string falls outside bounds: - // for i64 output, the int minimum or int max (depending on sign) - // for u64 output, the u64 max in the output radix - fn conv_from_segment( - segment: &str, - radix_in: Base, - field_char: char, - sign: i8, - ) -> FormatPrimitive { - match field_char { - 'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) { - Ok(i) => { - let mut fmt_prim = FormatPrimitive::default(); - if sign == -1 { - fmt_prim.prefix = Some(String::from("-")); - } - fmt_prim.pre_decimal = Some(format!("{i}")); - fmt_prim - } - Err(_) => Self::get_max(field_char, sign), - }, - _ => match u64::from_str_radix(segment, radix_in as u32) { - Ok(u) => { - let mut fmt_prim = FormatPrimitive::default(); - let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u }; - fmt_prim.pre_decimal = Some(match field_char { - 'X' => format!("{u_f:X}"), - 'x' => format!("{u_f:x}"), - 'o' => format!("{u_f:o}"), - _ => format!("{u_f}"), - }); - fmt_prim - } - Err(_) => Self::get_max(field_char, sign), - }, - } - } -} -impl Formatter for Intf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let begin = initial_prefix.offset; - - // get information about the string. see Intf::Analyze - // def above. - let convert_hints = Self::analyze( - str_in, - *field.field_char == 'i' || *field.field_char == 'd', - initial_prefix, - ); - // We always will have a format primitive to return - Some(if convert_hints.len_digits == 0 || convert_hints.is_zero { - // if non-digit or end is reached before a non-zero digit - FormatPrimitive { - pre_decimal: Some(String::from("0")), - ..Default::default() - } - } else if !convert_hints.past_max { - // if the number is or may be below the bounds limit - let radix_out = match *field.field_char { - 'd' | 'i' | 'u' => Base::Ten, - 'x' | 'X' => Base::Hex, - /* 'o' | */ _ => Base::Octal, - }; - let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in); - let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i'; - let end = begin + convert_hints.len_digits as usize; - - // convert to int if any one of these is true: - // - number of digits in int indicates it may be past max - // - we're subtracting from the max - // - we're converting the base - if convert_hints.check_past_max || decrease_from_max || radix_mismatch { - // radix of in and out is the same. - let segment = String::from(&str_in[begin..end]); - Self::conv_from_segment( - &segment, - initial_prefix.radix_in.clone(), - *field.field_char, - initial_prefix.sign, - ) - } else { - // otherwise just do a straight string copy. - let mut fmt_prim = FormatPrimitive::default(); - - // this is here and not earlier because - // zero doesn't get a sign, and conv_from_segment - // creates its format primitive separately - if initial_prefix.sign == -1 && *field.field_char == 'i' { - fmt_prim.prefix = Some(String::from("-")); - } - fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end])); - fmt_prim - } - } else { - Self::get_max(*field.field_char, initial_prefix.sign) - }) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - let mut final_str: String = String::new(); - if let Some(ref prefix) = prim.prefix { - final_str.push_str(prefix); - } - // integral second fields is zero-padded minimum-width - // which gets handled before general minimum-width - match prim.pre_decimal { - Some(ref pre_decimal) => { - if let Some(min) = field.second_field { - let mut i = min; - let len = pre_decimal.len() as u32; - while i > len { - final_str.push('0'); - i -= 1; - } - } - final_str.push_str(pre_decimal); - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under \ - correct behavior, always have a pre_dec value." - ); - } - } - final_str - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs deleted file mode 100644 index e232300718b..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -mod base_conv; -pub mod cninetyninehexfloatf; -pub mod decf; -mod float_common; -pub mod floatf; -pub mod intf; -pub mod scif; diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs deleted file mode 100644 index c871dc4e552..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -//! formatter for %e %E scientific notation subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct Scif; - -impl Scif { - pub fn new() -> Self { - Self - } -} -impl Formatter for Scif { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize + 1), - None, - false, - ); - let f = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - Some(*field.field_char == 'E'), - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/mod.rs deleted file mode 100644 index d40cf92deff..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod format_field; -mod formatter; -mod formatters; -pub mod num_format; diff --git a/src/uucore/src/lib/features/tokenize/num_format/num_format.rs b/src/uucore/src/lib/features/tokenize/num_format/num_format.rs deleted file mode 100644 index c9b1178b6ac..00000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/num_format.rs +++ /dev/null @@ -1,271 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -//! handles creating printed output for numeric substitutions - -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -use std::env; -use std::vec::Vec; - -use crate::display::Quotable; -use crate::{show_error, show_warning}; - -use super::format_field::{FieldType, FormatField}; -use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix}; -use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf; -use super::formatters::decf::Decf; -use super::formatters::floatf::Floatf; -use super::formatters::intf::Intf; -use super::formatters::scif::Scif; - -pub fn warn_expected_numeric(pf_arg: &str) { - // important: keep println here not print - show_error!("{}: expected a numeric value", pf_arg.maybe_quote()); -} - -// when character constant arguments have excess characters -// issue a warning when POSIXLY_CORRECT is not set -fn warn_char_constant_ign(remaining_bytes: &[u8]) { - match env::var("POSIXLY_CORRECT") { - Ok(_) => {} - Err(e) => { - if let env::VarError::NotPresent = e { - show_warning!( - "{:?}: character(s) following character \ - constant have been ignored", - remaining_bytes - ); - } - } - } -} - -// this function looks at the first few -// characters of an argument and returns a value if we can learn -// a value from that (e.g. no argument? return 0, char constant? ret value) -fn get_provided(str_in_opt: Option<&String>) -> Option { - const C_S_QUOTE: u8 = 39; - const C_D_QUOTE: u8 = 34; - match str_in_opt { - Some(str_in) => { - let mut byte_it = str_in.bytes(); - if let Some(ch) = byte_it.next() { - match ch { - C_S_QUOTE | C_D_QUOTE => { - Some(match byte_it.next() { - Some(second_byte) => { - let mut ignored: Vec = Vec::new(); - for cont in byte_it { - ignored.push(cont); - } - if !ignored.is_empty() { - warn_char_constant_ign(&ignored); - } - second_byte - } - // no byte after quote - None => { - let so_far = (ch as char).to_string(); - warn_expected_numeric(&so_far); - 0_u8 - } - }) - } - // first byte is not quote - _ => None, // no first byte - } - } else { - Some(0_u8) - } - } - None => Some(0), - } -} - -// takes a string and returns -// a sign, -// a base, -// and an offset for index after all -// initial spacing, sign, base prefix, and leading zeroes -#[allow(clippy::cognitive_complexity)] -fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix { - let mut str_it = str_in.chars(); - let mut ret = InitialPrefix { - radix_in: Base::Ten, - sign: 1, - offset: 0, - }; - let mut top_char = str_it.next(); - // skip spaces and ensure top_char is the first non-space char - // (or None if none exists) - while let Some(' ') = top_char { - ret.offset += 1; - top_char = str_it.next(); - } - // parse sign - match top_char { - Some('+') => { - ret.offset += 1; - top_char = str_it.next(); - } - Some('-') => { - ret.sign = -1; - ret.offset += 1; - top_char = str_it.next(); - } - _ => {} - } - // we want to exit with offset being - // the index of the first non-zero - // digit before the decimal point or - // if there is none, the zero before the - // decimal point, or, if there is none, - // the decimal point. - - // while we are determining the offset - // we will ensure as a convention - // the offset is always on the first character - // that we are yet unsure if it is the - // final offset. If the zero could be before - // a decimal point we don't move past the zero. - let mut is_hex = false; - if Some('0') == top_char { - if let Some(base) = str_it.next() { - // lead zeroes can only exist in - // octal and hex base - let mut do_clean_lead_zeroes = false; - match base { - 'x' | 'X' => { - is_hex = true; - ret.offset += 2; - ret.radix_in = Base::Hex; - do_clean_lead_zeroes = true; - } - e @ '0'..='9' => { - ret.offset += 1; - if let FieldType::Intf = *field_type { - ret.radix_in = Base::Octal; - } - if e == '0' { - do_clean_lead_zeroes = true; - } - } - _ => {} - } - if do_clean_lead_zeroes { - let mut first = true; - for ch_zero in str_it { - // see notes on offset above: - // this is why the offset for octal and decimal numbers - // that reach this branch is 1 even though - // they have already eaten the characters '00' - // this is also why when hex encounters its - // first zero it does not move its offset - // forward because it does not know for sure - // that it's current offset (of that zero) - // is not the final offset, - // whereas at that point octal knows its - // current offset is not the final offset. - match ch_zero { - '0' => { - if !(is_hex && first) { - ret.offset += 1; - } - } - // if decimal, keep last zero if one exists - // (it's possible for last zero to - // not exist at this branch if we're in hex input) - '.' => break, - // other digit, etc. - _ => { - if !(is_hex && first) { - ret.offset += 1; - } - break; - } - } - if first { - first = false; - } - } - } - } - } - ret -} - -// this is the function a Sub's print will delegate to -// if it is a numeric field, passing the field details -// and an iterator to the argument -pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option { - let field_char = field.field_char; - - // num format mainly operates by further delegating to one of - // several Formatter structs depending on the field - // see formatter.rs for more details - - // to do switch to static dispatch - let formatter: Box = match *field.field_type { - FieldType::Intf => Box::new(Intf::new()), - FieldType::Floatf => Box::new(Floatf::new()), - FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()), - FieldType::Scif => Box::new(Scif::new()), - FieldType::Decf => Box::new(Decf::new()), - _ => { - panic!("asked to do num format with non-num field type"); - } - }; - let prim_opt= - // if we can get an assumed value from looking at the first - // few characters, use that value to create the FormatPrimitive - if let Some(provided_num) = get_provided(in_str_opt) { - let mut tmp = FormatPrimitive::default(); - match field_char { - 'u' | 'i' | 'd' => { - tmp.pre_decimal = Some( - format!("{provided_num}")); - }, - 'x' | 'X' => { - tmp.pre_decimal = Some( - format!("{provided_num:x}")); - }, - 'o' => { - tmp.pre_decimal = Some( - format!("{provided_num:o}")); - }, - 'e' | 'E' | 'g' | 'G' => { - let as_str = format!("{provided_num}"); - let initial_prefix = get_initial_prefix( - &as_str, - field.field_type - ); - tmp=formatter.get_primitive(field, &initial_prefix, &as_str) - .expect("err during default provided num"); - }, - _ => { - tmp.pre_decimal = Some( - format!("{provided_num}")); - tmp.post_decimal = Some(String::from("0")); - } - } - Some(tmp) - } else { - // otherwise we'll interpret the argument as a number - // using the appropriate Formatter - let in_str = in_str_opt.expect( - "please send the devs this message: - \n get_provided is failing to ret as Some(0) on no str "); - // first get information about the beginning of the - // numeric argument that would be useful for - // any formatter (int or float) - let initial_prefix = get_initial_prefix( - in_str, - field.field_type - ); - // then get the FormatPrimitive from the Formatter - formatter.get_primitive(field, &initial_prefix, in_str) - }; - // if we have a formatPrimitive, print its results - // according to the field-char appropriate Formatter - prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone())) -} diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs deleted file mode 100644 index 5bdb24dc633..00000000000 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ /dev/null @@ -1,452 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! Sub is a token that represents a -//! segment of the format string that is a substitution -//! it is created by Sub's implementation of the Tokenizer trait -//! Subs which have numeric field chars make use of the num_format -//! submodule -use crate::error::{UError, UResult}; -use itertools::{put_back_n, PutBackN}; -use std::error::Error; -use std::fmt::Display; -use std::io::Write; -use std::iter::Peekable; -use std::process::exit; -use std::slice::Iter; -use std::str::Chars; -// use std::collections::HashSet; - -use super::num_format::format_field::{FieldType, FormatField}; -use super::num_format::num_format; -use super::token; -use super::unescaped_text::UnescapedText; - -const EXIT_ERR: i32 = 1; - -#[derive(Debug)] -pub enum SubError { - InvalidSpec(String), -} - -impl Display for SubError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - match self { - Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"), - } - } -} - -impl Error for SubError {} - -impl UError for SubError {} - -fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize { - // this is a costly way to parse the - // args used for asterisk values into integers - // from various bases. Actually doing it correctly - // (going through the pipeline to intf, but returning - // the integer instead of writing it to string and then - // back) is on the refactoring TODO - let field_type = FieldType::Intf; - let field_char = 'i'; - let field_info = FormatField { - min_width: Some(0), - second_field: Some(0), - orig: &asterisk_arg.to_string(), - field_type: &field_type, - field_char: &field_char, - }; - num_format::num_format(&field_info, Some(&asterisk_arg.to_string())) - .unwrap() - .parse::() - .unwrap() -} - -pub enum CanAsterisk { - Fixed(T), - Asterisk, -} - -// Sub is a tokenizer which creates tokens -// for substitution segments of a format string -pub struct Sub { - min_width: CanAsterisk>, - second_field: CanAsterisk>, - field_char: char, - field_type: FieldType, - orig: String, - prefix_char: char, -} -impl Sub { - pub fn new( - min_width: CanAsterisk>, - second_field: CanAsterisk>, - field_char: char, - orig: String, - prefix_char: char, - ) -> Self { - // for more dry printing, field characters are grouped - // in initialization of token. - let field_type = match field_char { - 's' | 'b' => FieldType::Strf, - 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, - 'f' | 'F' => FieldType::Floatf, - 'a' | 'A' => FieldType::CninetyNineHexFloatf, - 'e' | 'E' => FieldType::Scif, - 'g' | 'G' => FieldType::Decf, - 'c' => FieldType::Charf, - _ => { - // should be unreachable. - println!("Invalid field type"); - exit(EXIT_ERR); - } - }; - Self { - min_width, - second_field, - field_char, - field_type, - orig, - prefix_char, - } - } -} - -#[derive(Default)] -pub(crate) struct SubParser { - min_width_tmp: Option, - min_width_is_asterisk: bool, - past_decimal: bool, - second_field_tmp: Option, - second_field_is_asterisk: bool, - specifiers_found: bool, - field_char: Option, - text_so_far: String, -} - -impl SubParser { - fn new() -> Self { - Self::default() - } - pub(crate) fn from_it( - writer: &mut W, - it: &mut PutBackN, - args: &mut Peekable>, - ) -> UResult> - where - W: Write, - { - let mut parser = Self::new(); - if parser.sub_vals_retrieved(it)? { - let t = Self::build_token(parser); - t.write(writer, args); - Ok(Some(t)) - } else { - Ok(None) - } - } - fn build_token(parser: Self) -> token::Token { - // not a self method so as to allow move of sub-parser vals. - // return new Sub struct as token - let prefix_char = match &parser.min_width_tmp { - Some(width) if width.starts_with('0') => '0', - _ => ' ', - }; - - token::Token::Sub(Sub::new( - if parser.min_width_is_asterisk { - CanAsterisk::Asterisk - } else { - CanAsterisk::Fixed( - parser - .min_width_tmp - .map(|x| x.parse::().unwrap_or(1)), - ) - }, - if parser.second_field_is_asterisk { - CanAsterisk::Asterisk - } else { - CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::().unwrap())) - }, - parser.field_char.unwrap(), - parser.text_so_far, - prefix_char, - )) - } - #[allow(clippy::cognitive_complexity)] - fn sub_vals_retrieved(&mut self, it: &mut PutBackN) -> UResult { - if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? { - return Ok(false); - } - // this fn in particular is much longer than it needs to be - // .could get a lot - // of code savings just by cleaning it up. shouldn't use a regex - // though, as we want to mimic the original behavior of printing - // the field as interpreted up until the error in the field. - - let mut legal_fields = vec![ - // 'a', 'A', //c99 hex float implementation not yet complete - 'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 's', 'u', 'x', 'X', - ]; - let mut specifiers = vec!['h', 'j', 'l', 'L', 't', 'z']; - legal_fields.sort_unstable(); - specifiers.sort_unstable(); - - // divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z]) - // into min_width, second_field, field_char - for ch in it { - self.text_so_far.push(ch); - match ch { - '-' | '*' | '0'..='9' => { - if self.past_decimal { - // second field should never have a - // negative value - if self.second_field_is_asterisk || ch == '-' || self.specifiers_found { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - if self.second_field_tmp.is_none() { - self.second_field_tmp = Some(String::new()); - } - match self.second_field_tmp.as_mut() { - Some(x) => { - if ch == '*' && !x.is_empty() { - return Err( - SubError::InvalidSpec(self.text_so_far.clone()).into() - ); - } - if ch == '*' { - self.second_field_is_asterisk = true; - } - x.push(ch); - } - None => { - panic!("should be unreachable"); - } - } - } else { - if self.min_width_is_asterisk || self.specifiers_found { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - if self.min_width_tmp.is_none() { - self.min_width_tmp = Some(String::new()); - } - match self.min_width_tmp.as_mut() { - Some(x) => { - if (ch == '-' || ch == '*') && !x.is_empty() { - return Err( - SubError::InvalidSpec(self.text_so_far.clone()).into() - ); - } - if ch == '*' { - self.min_width_is_asterisk = true; - } - x.push(ch); - } - None => { - panic!("should be unreachable"); - } - } - } - } - '.' => { - if self.past_decimal { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } else { - self.past_decimal = true; - } - } - x if legal_fields.binary_search(&x).is_ok() => { - self.field_char = Some(ch); - self.text_so_far.push(ch); - break; - } - x if specifiers.binary_search(&x).is_ok() => { - if !self.past_decimal { - self.past_decimal = true; - } - if !self.specifiers_found { - self.specifiers_found = true; - } - } - _ => { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - } - } - if self.field_char.is_none() { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - let field_char_retrieved = self.field_char.unwrap(); - if self.past_decimal && self.second_field_tmp.is_none() { - self.second_field_tmp = Some(String::from("0")); - } - self.validate_field_params(field_char_retrieved)?; - // if the dot is provided without a second field - // printf interprets it as 0. - if let Some(x) = self.second_field_tmp.as_mut() { - if x.is_empty() { - self.min_width_tmp = Some(String::from("0")); - } - } - - Ok(true) - } - fn successfully_eat_prefix( - it: &mut PutBackN, - text_so_far: &mut String, - ) -> UResult { - // get next two chars, - // if they're '%%' we're not tokenizing it - // else put chars back - let preface = it.next(); - let n_ch = it.next(); - if preface == Some('%') && n_ch != Some('%') { - match n_ch { - Some(x) => { - it.put_back(x); - Ok(true) - } - None => { - text_so_far.push('%'); - Err(SubError::InvalidSpec(text_so_far.clone()).into()) - } - } - } else { - if let Some(x) = n_ch { - it.put_back(x); - }; - if let Some(x) = preface { - it.put_back(x); - }; - Ok(false) - } - } - fn validate_field_params(&self, field_char: char) -> UResult<()> { - // check for illegal combinations here when possible vs - // on each application so we check less per application - // to do: move these checks to Sub::new - if (field_char == 's' && self.min_width_tmp == Some(String::from("0"))) - || (field_char == 'c' - && (self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) - || (field_char == 'b' - && (self.min_width_tmp.is_some() - || self.past_decimal - || self.second_field_tmp.is_some())) - { - // invalid string substitution - // to do: include information about an invalid - // string substitution - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - Ok(()) - } -} - -impl Sub { - #[allow(clippy::cognitive_complexity)] - pub(crate) fn write(&self, writer: &mut W, pf_args_it: &mut Peekable>) - where - W: Write, - { - let field = FormatField { - min_width: match self.min_width { - CanAsterisk::Fixed(x) => x, - CanAsterisk::Asterisk => { - match pf_args_it.next() { - // temporary, use intf.rs instead - Some(x) => Some(convert_asterisk_arg_int(x)), - None => Some(0), - } - } - }, - second_field: match self.second_field { - CanAsterisk::Fixed(x) => x, - CanAsterisk::Asterisk => { - match pf_args_it.next() { - // temporary, use intf.rs instead - Some(x) => { - let result = convert_asterisk_arg_int(x); - if result < 0 { - None - } else { - Some(result as u32) - } - } - None => Some(0), - } - } - }, - field_char: &self.field_char, - field_type: &self.field_type, - orig: &self.orig, - }; - let pf_arg = pf_args_it.next(); - - // minimum width is handled independently of actual - // field char - let pre_min_width_opt: Option = match *field.field_type { - // if %s just return arg - // if %b use UnescapedText module's unescape-fn - // if %c return first char of arg - FieldType::Strf | FieldType::Charf => { - match pf_arg { - Some(arg_string) => { - match *field.field_char { - 's' => Some(match field.second_field { - Some(max) => String::from(&arg_string[..max as usize]), - None => arg_string.clone(), - }), - 'b' => { - let mut a_it = put_back_n(arg_string.chars()); - UnescapedText::from_it_core(writer, &mut a_it, true); - None - } - // for 'c': get iter of string vals, - // get opt of first val - // and map it to opt - /* 'c' | */ - _ => arg_string.chars().next().map(|x| x.to_string()), - } - } - None => None, - } - } - _ => { - // non string/char fields are delegated to num_format - num_format::num_format(&field, pf_arg) - } - }; - if let Some(pre_min_width) = pre_min_width_opt { - // if have a string, print it, ensuring minimum width is met. - write!( - writer, - "{}", - match field.min_width { - Some(min_width) => { - let diff: isize = min_width.abs() - pre_min_width.len() as isize; - if diff > 0 { - let mut final_str = String::new(); - // definitely more efficient ways - // to do this. - let pad_before = min_width > 0; - if !pad_before { - final_str.push_str(&pre_min_width); - } - for _ in 0..diff { - final_str.push(self.prefix_char); - } - if pad_before { - final_str.push_str(&pre_min_width); - } - final_str - } else { - pre_min_width - } - } - None => pre_min_width, - } - ) - .ok(); - } - } -} diff --git a/src/uucore/src/lib/features/tokenize/token.rs b/src/uucore/src/lib/features/tokenize/token.rs deleted file mode 100644 index b522c99a4e0..00000000000 --- a/src/uucore/src/lib/features/tokenize/token.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! Traits and enums dealing with Tokenization of printf Format String -use std::io::Write; -use std::iter::Peekable; -use std::slice::Iter; - -use crate::features::tokenize::sub::Sub; -use crate::features::tokenize::unescaped_text::UnescapedText; - -// A token object is an object that can print the expected output -// of a contiguous segment of the format string, and -// requires at most 1 argument -pub enum Token { - Sub(Sub), - UnescapedText(UnescapedText), -} - -impl Token { - pub(crate) fn write(&self, writer: &mut W, args: &mut Peekable>) - where - W: Write, - { - match self { - Self::Sub(sub) => sub.write(writer, args), - Self::UnescapedText(unescaped_text) => unescaped_text.write(writer), - } - } -} - -// A tokenizer object is an object that takes an iterator -// at a position in a format string, and sees whether -// it can return a token of a type it knows how to produce -// if so, return the token, move the iterator past the -// format string text the token represents, and if an -// argument is used move the argument iter forward one - -// creating token of a format string segment should also cause -// printing of that token's value. Essentially tokenizing -// a whole format string will print the format string and consume -// a number of arguments equal to the number of argument-using tokens diff --git a/src/uucore/src/lib/features/tokenize/unescaped_text.rs b/src/uucore/src/lib/features/tokenize/unescaped_text.rs deleted file mode 100644 index 29c657ed863..00000000000 --- a/src/uucore/src/lib/features/tokenize/unescaped_text.rs +++ /dev/null @@ -1,279 +0,0 @@ -//! UnescapedText is a tokenizer impl -//! for tokenizing character literals, -//! and escaped character literals (of allowed escapes), -//! into an unescaped text byte array - -// spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice - -use itertools::PutBackN; -use std::char::from_u32; -use std::io::Write; -use std::process::exit; -use std::str::Chars; - -use super::token; - -const EXIT_OK: i32 = 0; -const EXIT_ERR: i32 = 1; - -// by default stdout only flushes -// to console when a newline is passed. -macro_rules! write_and_flush { - ($writer:expr, $($args:tt)+) => ({ - write!($writer, "{}", $($args)+).ok(); - $writer.flush().ok(); - }) -} - -fn flush_bytes(writer: &mut W, bslice: &[u8]) -where - W: Write, -{ - writer.write_all(bslice).ok(); - writer.flush().ok(); -} - -#[derive(Default)] -pub struct UnescapedText(Vec); -impl UnescapedText { - fn new() -> Self { - Self::default() - } - // take an iterator to the format string - // consume between min and max chars - // and return it as a base-X number - fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN) -> u32 { - let mut retval: u32 = 0; - let mut found = 0; - while found < max_chars { - // if end of input break - let nc = it.next(); - match nc { - Some(digit) => { - // if end of hexchars break - match digit.to_digit(base) { - Some(d) => { - found += 1; - retval *= base; - retval += d; - } - None => { - it.put_back(digit); - break; - } - } - } - None => { - break; - } - } - } - if found < min_chars { - // only ever expected for hex - println!("missing hexadecimal number in escape"); //todo stderr - exit(EXIT_ERR); - } - retval - } - // validates against valid - // IEC 10646 vals - these values - // are pinned against the more popular - // printf so as to not disrupt when - // dropped-in as a replacement. - fn validate_iec(val: u32, eight_word: bool) { - let mut preface = 'u'; - let leading_zeros = if eight_word { - preface = 'U'; - 8 - } else { - 4 - }; - let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}"); - if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) { - println!("{err_msg}"); //todo stderr - exit(EXIT_ERR); - } - } - // pass an iterator that succeeds an '/', - // and process the remaining character - // adding the unescaped bytes - // to the passed byte_vec - // in subs_mode change octal behavior - fn handle_escaped( - writer: &mut W, - byte_vec: &mut Vec, - it: &mut PutBackN, - subs_mode: bool, - ) where - W: Write, - { - let ch = it.next().unwrap_or('\\'); - match ch { - '0'..='9' | 'x' => { - let min_len = 1; - let mut max_len = 2; - let mut base = 16; - let ignore = false; - match ch { - 'x' => {} - e @ '0'..='9' => { - max_len = 3; - base = 8; - // in practice, gnu coreutils printf - // interprets octals without a - // leading zero in %b - // but it only skips leading zeros - // in %b mode. - // if we ever want to match gnu coreutil - // printf's docs instead of its behavior - // we'd set this to true. - // if subs_mode && e != '0' - // { ignore = true; } - if !subs_mode || e != '0' { - it.put_back(ch); - } - } - _ => {} - } - if ignore { - byte_vec.push(ch as u8); - } else { - let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8; - byte_vec.push(val); - let bvec = [val]; - flush_bytes(writer, &bvec); - } - } - e => { - // only for hex and octal - // is byte encoding specified. - // otherwise, why not leave the door open - // for other encodings unless it turns out - // a bottleneck. - let mut s = String::new(); - let ch = match e { - '\\' => '\\', - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - // bell - 'a' => '\x07', - // backspace - 'b' => '\x08', - // vertical tab - 'v' => '\x0B', - // form feed - 'f' => '\x0C', - // escape character - 'e' => '\x1B', - 'c' => exit(EXIT_OK), - 'u' | 'U' => { - let len = match e { - 'u' => 4, - /* 'U' | */ _ => 8, - }; - let val = Self::base_to_u32(len, len, 16, it); - Self::validate_iec(val, false); - if let Some(c) = from_u32(val) { - c - } else { - '-' - } - } - _ => { - s.push('\\'); - ch - } - }; - s.push(ch); - write_and_flush!(writer, &s); - byte_vec.extend(s.bytes()); - } - }; - } - - // take an iterator to a string, - // and return a wrapper around a Vec of unescaped bytes - // break on encounter of sub symbol ('%[^%]') unless called - // through %b subst. - #[allow(clippy::cognitive_complexity)] - pub fn from_it_core( - writer: &mut W, - it: &mut PutBackN, - subs_mode: bool, - ) -> Option - where - W: Write, - { - let mut addchar = false; - let mut new_text = Self::new(); - let mut tmp_str = String::new(); - { - let new_vec: &mut Vec = &mut (new_text.0); - while let Some(ch) = it.next() { - if !addchar { - addchar = true; - } - match ch { - x if x != '\\' && x != '%' => { - // lazy branch eval - // remember this fn could be called - // many times in a single exec through %b - write_and_flush!(writer, ch); - tmp_str.push(ch); - } - '\\' => { - // the literal may be a literal bytecode - // and not valid utf-8. Str only supports - // valid utf-8. - // if we find the unnecessary drain - // on non hex or octal escapes is costly - // then we can make it faster/more complex - // with as-necessary draining. - if !tmp_str.is_empty() { - new_vec.extend(tmp_str.bytes()); - tmp_str = String::new(); - } - Self::handle_escaped(writer, new_vec, it, subs_mode); - } - x if x == '%' && !subs_mode => { - if let Some(follow) = it.next() { - if follow == '%' { - write_and_flush!(writer, ch); - tmp_str.push(ch); - } else { - it.put_back(follow); - it.put_back(ch); - break; - } - } else { - it.put_back(ch); - break; - } - } - _ => { - write_and_flush!(writer, ch); - tmp_str.push(ch); - } - } - } - if !tmp_str.is_empty() { - new_vec.extend(tmp_str.bytes()); - } - } - if addchar { - Some(token::Token::UnescapedText(new_text)) - } else { - None - } - } -} -impl UnescapedText { - pub(crate) fn write(&self, writer: &mut W) - where - W: Write, - { - flush_bytes(writer, &self.0[..]); - } -} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index ca9a48d258a..7f5cc99db34 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -44,8 +44,8 @@ pub use crate::features::fs; pub use crate::features::fsext; #[cfg(feature = "lines")] pub use crate::features::lines; -#[cfg(feature = "memo")] -pub use crate::features::memo; +#[cfg(feature = "format")] +pub use crate::features::format; #[cfg(feature = "ringbuffer")] pub use crate::features::ringbuffer; #[cfg(feature = "sum")] From 66eb64e41f1da4c32d692da957847c09eb478e17 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 2 Aug 2023 23:57:53 +0200 Subject: [PATCH 004/429] dd, printf, seq: update to new printf --- src/uu/dd/Cargo.toml | 2 +- src/uu/dd/src/progress.rs | 2 +- src/uu/printf/Cargo.toml | 2 +- src/uu/printf/src/printf.rs | 2 +- src/uu/seq/Cargo.toml | 2 +- src/uu/seq/src/seq.rs | 19 +++++-------------- 6 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index 0a69ae37435..aa19dc760e1 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -18,7 +18,7 @@ path = "src/dd.rs" clap = { workspace = true } gcd = { workspace = true } libc = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["format"] } [target.'cfg(any(target_os = "linux"))'.dependencies] nix = { workspace = true, features = ["fs"] } diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index a9d29ff6325..674d90984ca 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -14,7 +14,7 @@ use std::sync::mpsc; use std::time::Duration; use uucore::error::UResult; -use uucore::memo::sprintf; +use uucore::format::sprintf; use crate::numbers::{to_magnitude_and_suffix, SuffixType}; diff --git a/src/uu/printf/Cargo.toml b/src/uu/printf/Cargo.toml index eefcf33c07e..9acd2c78c9d 100644 --- a/src/uu/printf/Cargo.toml +++ b/src/uu/printf/Cargo.toml @@ -16,7 +16,7 @@ path = "src/printf.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["format"] } [[bin]] name = "printf" diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index bf79369ccab..042e0932eaf 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -4,7 +4,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use uucore::error::{UResult, UUsageError}; -use uucore::memo::printf; +use uucore::format::printf; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; diff --git a/src/uu/seq/Cargo.toml b/src/uu/seq/Cargo.toml index 2646d36095d..41b42ef83a4 100644 --- a/src/uu/seq/Cargo.toml +++ b/src/uu/seq/Cargo.toml @@ -20,7 +20,7 @@ bigdecimal = { workspace = true } clap = { workspace = true } num-bigint = { workspace = true } num-traits = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["format"] } [[bin]] name = "seq" diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 2e55efa4ada..0dd65fc3f24 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -4,15 +4,12 @@ // * file that was distributed with this source code. // spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse use std::io::{stdout, ErrorKind, Write}; -use std::process::exit; use clap::{crate_version, Arg, ArgAction, Command}; use num_traits::Zero; -use uucore::error::FromIo; use uucore::error::UResult; -use uucore::memo::printf; -use uucore::show; +use uucore::format::printf; use uucore::{format_usage, help_about, help_usage}; mod error; @@ -251,7 +248,7 @@ fn print_seq( pad: bool, padding: usize, format: Option<&str>, -) -> std::io::Result<()> { +) -> UResult<()> { let stdout = stdout(); let mut stdout = stdout.lock(); let (first, increment, last) = range; @@ -277,10 +274,7 @@ fn print_seq( match format { Some(f) => { let s = format!("{value}"); - if let Err(x) = printf(f, &[s]) { - show!(x); - exit(1); - } + printf(f, &[s])?; } None => write_value_float( &mut stdout, @@ -322,7 +316,7 @@ fn print_seq_integers( pad: bool, padding: usize, format: Option<&str>, -) -> std::io::Result<()> { +) -> UResult<()> { let stdout = stdout(); let mut stdout = stdout.lock(); let (first, increment, last) = range; @@ -342,10 +336,7 @@ fn print_seq_integers( match format { Some(f) => { let s = format!("{value}"); - if let Err(x) = printf(f, &[s]) { - show!(x); - exit(1); - } + printf(f, &[s])?; } None => write_value_int(&mut stdout, &value, padding, pad, is_first_iteration)?, } From 407bccc54f48dc54ab28fdc10da0c4ecd9c5c805 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 10 Aug 2023 23:20:20 +0200 Subject: [PATCH 005/429] some more work on printf spec --- src/uucore/src/lib/features/format/spec.rs | 78 +++++++++++++++++----- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 4319e44d93f..d1786c3d36f 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -358,20 +358,28 @@ impl Spec { return Err(FormatError::InvalidArgument(arg)); }; - match positive_sign { - PositiveSign::None => Ok(()), - PositiveSign::Plus => write!(writer, "+"), - PositiveSign::Space => write!(writer, " "), + if f.is_sign_positive() { + match positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; } - .map_err(FormatError::IoError)?; let s = match variant { - FloatVariant::Decimal => format_float_decimal(f, precision, case, force_decimal), + FloatVariant::Decimal => { + format_float_decimal(f, precision, case, force_decimal) + } FloatVariant::Scientific => { format_float_scientific(f, precision, case, force_decimal) } - FloatVariant::Shortest => format_float_shortest(f, precision, case, force_decimal), - FloatVariant::Hexadecimal => todo!(), + FloatVariant::Shortest => { + format_float_shortest(f, precision, case, force_decimal) + } + FloatVariant::Hexadecimal => { + format_float_hexadecimal(f, precision, case, force_decimal) + } }; match alignment { @@ -385,6 +393,15 @@ impl Spec { } } +fn format_float_nonfinite(f: f64, case: Case) -> String { + debug_assert!(!f.is_finite()); + let mut s = format!("{f}"); + if case == Case::Uppercase { + s.make_ascii_uppercase(); + } + return s; +} + fn format_float_decimal( f: f64, precision: usize, @@ -392,11 +409,7 @@ fn format_float_decimal( force_decimal: ForceDecimal, ) -> String { if !f.is_finite() { - let mut s = format!("{f}"); - if case == Case::Lowercase { - s.make_ascii_uppercase(); - } - return s; + return format_float_nonfinite(f, case); } if precision == 0 && force_decimal == ForceDecimal::Yes { @@ -414,11 +427,7 @@ fn format_float_scientific( ) -> String { // If the float is NaN, -Nan, Inf or -Inf, format like any other float if !f.is_finite() { - let mut s = format!("{f}"); - if case == Case::Lowercase { - s.make_ascii_uppercase(); - } - return s; + return format_float_nonfinite(f, case); } let exponent: i32 = f.log10().floor() as i32; @@ -458,6 +467,39 @@ fn format_float_shortest( } } +fn format_float_hexadecimal( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + if !f.is_finite() { + return format_float_nonfinite(f, case); + } + + let (first_digit, mantissa, exponent) = if f == 0.0 { + (0, 0, 0) + } else { + let bits = f.to_bits(); + let exponent_bits = ((bits >> 52) & 0x7fff) as i64; + let exponent = exponent_bits - 1023; + let mantissa = bits & 0xf_ffff_ffff_ffff; + (1, mantissa, exponent) + }; + + let mut s = match (precision, force_decimal) { + (0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+x}"), + (0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+x}"), + _ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}") + }; + + if case == Case::Uppercase { + s.make_ascii_uppercase(); + } + + return s; +} + fn resolve_asterisk( option: Option>, args: impl Iterator, From df6193f84c62666d9ba79ec9bf3bb3adb3f1bc36 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 24 Sep 2023 15:40:35 +0200 Subject: [PATCH 006/429] clean up deps.nu --- util/deps.nu | 81 ++++++++++++++++++---------------------------------- 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/util/deps.nu b/util/deps.nu index a35e2d98c96..368048fff45 100644 --- a/util/deps.nu +++ b/util/deps.nu @@ -20,11 +20,11 @@ # - `normal_dep`: whether the crate is a normal dependency. # - `build_dep`: whether the crate is a build dependency. # - `dev_dep`: whether the crate is a dev dependency. -# - `organisation`: the GitHub/GitLab organisation or user of the repository of the crate. +# - `organization`: the GitHub/GitLab organization or user of the repository of the crate. # - `repository_name`: the name of the repository the crate is in. The format is "{owner}/{repo}". # - `dependencies`: direct dependencies of the crate (in the format of Cargo.lock). # -# To use this script, start nushell (tested only on version 0.82.0), import the library and +# To use this script, start Nushell (tested only on version 0.82.0), import the library and # call `all_dep_info`: # # ``` @@ -33,19 +33,19 @@ # > let dep = (deps all_dep_info) # ``` # -# Then you can perform analysis. For example, to group the dependencies by organisation: +# Then you can perform analysis. For example, to group the dependencies by organization: # # ``` -# > $dep | group-by organisation +# > $dep | group-by organization # ``` # # Or to find all crates with multiple versions (like cargo deny): # ``` -# > $dep | where num_versions > 1 +# > $dep | where num_versions > 1 # ``` # # Ideas to expand this: -# +# # - Figure out the whole dependency graph # - Figure out which platforms and which features enable which crates # - Figure out which utils require which crates @@ -58,61 +58,37 @@ # - Check the number of owners/contributors # - Make a webpage to more easily explore the data -# Read the packages a Cargo.lock file -def read_lockfile [name: path] { - open $name | from toml | get package -} - # Read the names output by cargo tree -export def read_tree_names [edges: string, features: string] { - cargo tree -e $edges --features $features - | rg "[a-zA-Z0-9_-]+ v[0-9.]+" -o - | lines - | each {|x| parse_name_and_version $x } -} - -def parse_name_and_version [s: string] { - let s = ($s | split row " ") - - let name = $s.0 - let version = if ($s | length) > 1 { - $s.1 | str substring 1.. - } else { - "" - } - - {name: $name, version: $version} +export def read_tree_names [edge_kind: string, features: list]: any -> table<> { + cargo tree --edges $edge_kind --features ($features | str join ",") + | parse -r "(?P[a-zA-Z0-9_-]+) v(?P[0-9.]+)" } # Read the crates.io info for a list of crates names -def read_crates_io [names: list] { - let total = ($names | length) - $names | enumerate | par-each {|el| - let key = $el.index - let name = $el.item - print $"($key)/($total): ($name)" - http get $"https://crates.io/api/v1/crates/($name)" | get crate +def read_crates_io [names: list] -> any -> table<> { + let total = $names | length + $names | enumerate | par-each {|name| + print $"($name.index)/($total): ($name.item)" + http get $"https://crates.io/api/v1/crates/($name.item)" | get crate } } -def in_table [col_name, table] { - insert $col_name {|el| - $table - | any {|table_el| - $table_el.name == $el.name and $table_el.version == $el.version } - } -} - # Add column for a dependency type -def add_dep_type [dep_type: string, features: string] { - in_table $"($dep_type)_dep" (read_tree_names $dep_type $features) +def add_dep_type [dep_type: string, features: list]: table<> -> table<> { + let input_table = $in + let table = read_tree_names $dep_type $features + $input_table | insert $"($dep_type)_dep" {|outer| + $table | any {|inner| + $inner.name == $outer.name and $inner.version == $outer.version + } + } } export def all_dep_info [] { - let features = unix,feat_selinux - - let lock = (read_lockfile Cargo.lock) + let features = [unix, feat_selinux] + let lock = open Cargo.lock | from toml | get package + $lock # Add number of versions | join ($lock | group-by name | transpose | update column1 { length } | rename name num_versions) name @@ -124,10 +100,10 @@ export def all_dep_info [] { # Add crates.io info | join (read_crates_io ($lock.name | uniq)) name # Add GH org or user info - # The organisation is an indicator that crates should be treated as one dependency. - # However, there are also unrelated projects by a single organisation, so it's not + # The organization is an indicator that crates should be treated as one dependency. + # However, there are also unrelated projects by a single organization, so it's not # clear. - | insert organisation {|x| + | insert organization {|x| let repository = $x.repository? if ($repository == null) { "" } else { $repository | url parse | get path | path split | get 1 @@ -152,4 +128,3 @@ export def all_dep_info [] { } } } - From 94492c98a52bbb9b406ebe9599319796553d5300 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 20 Oct 2023 09:40:09 +0200 Subject: [PATCH 007/429] cp: --rem don't fail if dest is hardlink to source --- src/uu/cp/src/cp.rs | 13 +++++++++++-- tests/by-util/test_cp.rs | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index da9918365ff..5441e88ed39 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -33,8 +33,8 @@ use platform::copy_on_write; use uucore::display::Quotable; use uucore::error::{set_exit_code, UClapError, UError, UResult, UUsageError}; use uucore::fs::{ - canonicalize, is_symlink_loop, paths_refer_to_same_file, FileInformation, MissingHandling, - ResolveMode, + are_hardlinks_to_same_file, canonicalize, is_symlink_loop, paths_refer_to_same_file, + FileInformation, MissingHandling, ResolveMode, }; use uucore::{backup_control, update_control}; // These are exposed for projects (e.g. nushell) that want to create an `Options` value, which @@ -1657,6 +1657,15 @@ fn copy_file( } } + if are_hardlinks_to_same_file(source, dest) + && matches!( + options.overwrite, + OverwriteMode::Clobber(ClobberMode::RemoveDestination) + ) + { + fs::remove_file(dest)?; + } + if file_or_link_exists(dest) { handle_existing_dest(source, dest, options, source_in_command_line)?; } diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index b3cc3e0c12c..7d3ebfa709c 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -2827,6 +2827,24 @@ fn test_cp_mode_hardlink_no_dereference() { assert_eq!(at.read_symlink("z"), "slink"); } +#[cfg(not(any(windows, target_os = "android")))] +#[test] +fn test_remove_destination_with_destination_being_a_hardlink_to_source() { + let (at, mut ucmd) = at_and_ucmd!(); + let file = "file"; + let hardlink = "hardlink"; + + at.touch(file); + at.hard_link(file, hardlink); + + ucmd.args(&["--remove-destination", file, hardlink]) + .succeeds(); + + assert_eq!("", at.resolve_link(hardlink)); + assert!(at.file_exists(file)); + assert!(at.file_exists(hardlink)); +} + #[test] fn test_remove_destination_with_destination_being_a_symlink_to_source() { let (at, mut ucmd) = at_and_ucmd!(); From 69b7095eac172846ba62a511b096735ecb6b391e Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 28 Oct 2023 17:34:04 +0200 Subject: [PATCH 008/429] printf rewrite: fix compilation --- src/uu/dd/src/progress.rs | 6 ++- src/uu/printf/src/printf.rs | 8 ++-- src/uu/seq/src/seq.rs | 11 +++-- src/uucore/src/lib/features/format/mod.rs | 55 ++++++++++++++++++---- src/uucore/src/lib/features/format/spec.rs | 38 +++++++-------- 5 files changed, 80 insertions(+), 38 deletions(-) diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index f2472600927..1d9b7247d20 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -13,8 +13,8 @@ use std::io::Write; use std::sync::mpsc; use std::time::Duration; -use uucore::error::UResult; use uucore::format::sprintf; +use uucore::{error::UResult, format::FormatArgument}; use crate::numbers::{to_magnitude_and_suffix, SuffixType}; @@ -152,7 +152,9 @@ impl ProgUpdate { let (carriage_return, newline) = if rewrite { ("\r", "") } else { ("", "\n") }; // The duration should be formatted as in `printf %g`. - let duration_str = sprintf("%g", &[duration.to_string()])?; + // TODO: remove unwrap and make FormatError implement UError + let duration_str = sprintf("%g", &[FormatArgument::Float(duration)])?; + let duration_str = std::str::from_utf8(&duration_str).unwrap(); // If the number of bytes written is sufficiently large, then // print a more concise representation of the number, like diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 36b4c34535b..6e270ec2645 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -8,7 +8,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use uucore::error::{UResult, UUsageError}; -use uucore::format::printf; +use uucore::format::{printf, FormatArgument}; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; @@ -30,12 +30,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format_string = matches .get_one::(options::FORMATSTRING) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; - let values: Vec = match matches.get_many::(options::ARGUMENT) { - Some(s) => s.map(|s| s.to_string()).collect(), + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { + Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), None => vec![], }; - printf(format_string, &values[..])?; + printf(format_string, &values)?; Ok(()) } diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index f93ced9264d..217e9042833 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -9,7 +9,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use num_traits::Zero; use uucore::error::UResult; -use uucore::format::printf; +use uucore::format::{printf, FormatArgument}; use uucore::{format_usage, help_about, help_usage}; mod error; @@ -144,8 +144,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; match result { Ok(_) => Ok(()), - Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), - Err(e) => Err(e.map_err_context(|| "write error".into())), + _ => todo!(), + // Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), + // Err(e) => Err(e.map_err_context(|| "write error".into())), } } @@ -270,7 +271,7 @@ fn print_seq( match format { Some(f) => { let s = format!("{value}"); - printf(f, &[s])?; + printf(f, &[FormatArgument::String(s)])?; } None => write_value_float(&mut stdout, &value, padding, largest_dec)?, } @@ -326,7 +327,7 @@ fn print_seq_integers( match format { Some(f) => { let s = format!("{value}"); - printf(f, &[s])?; + printf(f, &[FormatArgument::String(s)])?; } None => write_value_int(&mut stdout, &value, padding, pad)?, } diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index abd92011cc4..ebb1cc360eb 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -3,7 +3,7 @@ //! The [`printf`] and [`sprintf`] closely match the behavior of the //! corresponding C functions: the former renders a formatted string //! to stdout, the latter renders to a new [`String`] object. -//! +//! //! In addition to the [`printf`] and [`sprintf`] functions, we expose the //! [`Format`] struct, which represents a parsed format string. This reduces //! the need for parsing a format string multiple times and assures that no @@ -14,8 +14,15 @@ mod spec; use spec::Spec; -use std::io::{stdout, Write}; +use std::{ + error::Error, + fmt::Display, + io::{stdout, Write}, +}; + +use crate::error::UError; +#[derive(Debug)] pub enum FormatError { SpecError, IoError(std::io::Error), @@ -23,6 +30,21 @@ pub enum FormatError { InvalidArgument(FormatArgument), } +impl Error for FormatError {} +impl UError for FormatError {} + +impl Display for FormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // TODO: Be more precise about these + match self { + FormatError::SpecError => write!(f, "invalid spec"), + FormatError::IoError(_) => write!(f, "io error"), + FormatError::NoMoreArguments => write!(f, "no more arguments"), + FormatError::InvalidArgument(_) => write!(f, "invalid argument"), + } + } +} + /// A single item to format enum FormatItem { /// A format specifier @@ -30,21 +52,28 @@ enum FormatItem { /// Some plain text Text(Vec), /// A single character - /// + /// /// Added in addition to `Text` as an optimization. Char(u8), } +#[derive(Clone, Debug)] pub enum FormatArgument { Char(char), String(String), UnsignedInt(u64), SignedInt(i64), Float(f64), + // Special argument that gets coerced into the other variants + Unparsed(String), } impl FormatItem { - fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator) -> Result<(), FormatError> { + fn write<'a>( + &self, + mut writer: impl Write, + args: &mut impl Iterator, + ) -> Result<(), FormatError> { match self { FormatItem::Spec(spec) => spec.write(writer, args), FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), @@ -110,13 +139,20 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator) -> Result<(), FormatError> { +pub fn printf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result<(), FormatError> { printf_writer(stdout(), format_string, arguments) } -fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator) -> Result<(), FormatError> { +fn printf_writer<'a>( + mut writer: impl Write, + format_string: impl AsRef<[u8]>, + args: impl IntoIterator, +) -> Result<(), FormatError> { let mut args = args.into_iter(); - for item in parse_iter(format_string) { + for item in parse_iter(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } Ok(()) @@ -137,7 +173,10 @@ fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIt /// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); /// assert_eq!(s, "hello world".to_string()); /// ``` -pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator) -> Result, FormatError> { +pub fn sprintf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result, FormatError> { let mut writer = Vec::new(); printf_writer(&mut writer, format_string, arguments)?; Ok(writer) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index d1786c3d36f..c1eb7856db1 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -257,7 +257,7 @@ impl Spec { pub fn write<'a>( &self, mut writer: impl Write, - mut args: impl Iterator, + mut args: impl Iterator, ) -> Result<(), FormatError> { match self { &Spec::Char { width, align_left } => { @@ -265,7 +265,7 @@ impl Spec { let arg = next_arg(&mut args)?; match arg { FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg)), + _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::String { width, align_left } => { @@ -273,7 +273,7 @@ impl Spec { let arg = next_arg(&mut args)?; match arg { FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg)), + _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::SignedInt { @@ -285,10 +285,10 @@ impl Spec { let arg = next_arg(&mut args)?; let FormatArgument::SignedInt(i) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; - if i >= 0 { + if *i >= 0 { match positive_sign { PositiveSign::None => Ok(()), PositiveSign::Plus => write!(writer, "+"), @@ -313,7 +313,7 @@ impl Spec { let arg = next_arg(args)?; let FormatArgument::SignedInt(i) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; let s = match variant { @@ -355,7 +355,7 @@ impl Spec { let arg = next_arg(args)?; let FormatArgument::Float(f) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; if f.is_sign_positive() { @@ -369,16 +369,16 @@ impl Spec { let s = match variant { FloatVariant::Decimal => { - format_float_decimal(f, precision, case, force_decimal) + format_float_decimal(*f, precision, case, force_decimal) } FloatVariant::Scientific => { - format_float_scientific(f, precision, case, force_decimal) + format_float_scientific(*f, precision, case, force_decimal) } FloatVariant::Shortest => { - format_float_shortest(f, precision, case, force_decimal) + format_float_shortest(*f, precision, case, force_decimal) } FloatVariant::Hexadecimal => { - format_float_hexadecimal(f, precision, case, force_decimal) + format_float_hexadecimal(*f, precision, case, force_decimal) } }; @@ -500,29 +500,29 @@ fn format_float_hexadecimal( return s; } -fn resolve_asterisk( +fn resolve_asterisk<'a>( option: Option>, - args: impl Iterator, + args: impl Iterator, ) -> Result, FormatError> { Ok(match option { None => None, Some(CanAsterisk::Asterisk) => { let arg = next_arg(args)?; match arg { - FormatArgument::UnsignedInt(u) => match usize::try_from(u) { + FormatArgument::UnsignedInt(u) => match usize::try_from(*u) { Ok(u) => Some(u), - Err(_) => return Err(FormatError::InvalidArgument(arg)), + Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), }, - _ => return Err(FormatError::InvalidArgument(arg)), + _ => return Err(FormatError::InvalidArgument(arg.clone())), } } Some(CanAsterisk::Fixed(w)) => Some(w), }) } -fn next_arg( - mut arguments: impl Iterator, -) -> Result { +fn next_arg<'a>( + mut arguments: impl Iterator, +) -> Result<&'a FormatArgument, FormatError> { arguments.next().ok_or(FormatError::NoMoreArguments) } From f117fc1bab8aaab4c26d41bad40c111904f5f9b6 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 28 Oct 2023 17:34:04 +0200 Subject: [PATCH 009/429] printf rewrite: fix compilation --- src/uu/dd/src/progress.rs | 6 ++- src/uu/printf/src/printf.rs | 8 ++-- src/uu/seq/src/seq.rs | 11 +++-- src/uucore/src/lib/features.rs | 8 +--- src/uucore/src/lib/features/format/mod.rs | 55 ++++++++++++++++++---- src/uucore/src/lib/features/format/spec.rs | 40 ++++++++-------- src/uucore/src/lib/lib.rs | 6 +-- 7 files changed, 85 insertions(+), 49 deletions(-) diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index f2472600927..1d9b7247d20 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -13,8 +13,8 @@ use std::io::Write; use std::sync::mpsc; use std::time::Duration; -use uucore::error::UResult; use uucore::format::sprintf; +use uucore::{error::UResult, format::FormatArgument}; use crate::numbers::{to_magnitude_and_suffix, SuffixType}; @@ -152,7 +152,9 @@ impl ProgUpdate { let (carriage_return, newline) = if rewrite { ("\r", "") } else { ("", "\n") }; // The duration should be formatted as in `printf %g`. - let duration_str = sprintf("%g", &[duration.to_string()])?; + // TODO: remove unwrap and make FormatError implement UError + let duration_str = sprintf("%g", &[FormatArgument::Float(duration)])?; + let duration_str = std::str::from_utf8(&duration_str).unwrap(); // If the number of bytes written is sufficiently large, then // print a more concise representation of the number, like diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 36b4c34535b..6e270ec2645 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -8,7 +8,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use uucore::error::{UResult, UUsageError}; -use uucore::format::printf; +use uucore::format::{printf, FormatArgument}; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; @@ -30,12 +30,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format_string = matches .get_one::(options::FORMATSTRING) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; - let values: Vec = match matches.get_many::(options::ARGUMENT) { - Some(s) => s.map(|s| s.to_string()).collect(), + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { + Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), None => vec![], }; - printf(format_string, &values[..])?; + printf(format_string, &values)?; Ok(()) } diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index f93ced9264d..217e9042833 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -9,7 +9,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use num_traits::Zero; use uucore::error::UResult; -use uucore::format::printf; +use uucore::format::{printf, FormatArgument}; use uucore::{format_usage, help_about, help_usage}; mod error; @@ -144,8 +144,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; match result { Ok(_) => Ok(()), - Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), - Err(e) => Err(e.map_err_context(|| "write error".into())), + _ => todo!(), + // Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), + // Err(e) => Err(e.map_err_context(|| "write error".into())), } } @@ -270,7 +271,7 @@ fn print_seq( match format { Some(f) => { let s = format!("{value}"); - printf(f, &[s])?; + printf(f, &[FormatArgument::String(s)])?; } None => write_value_float(&mut stdout, &value, padding, largest_dec)?, } @@ -326,7 +327,7 @@ fn print_seq_integers( match format { Some(f) => { let s = format!("{value}"); - printf(f, &[s])?; + printf(f, &[FormatArgument::String(s)])?; } None => write_value_int(&mut stdout, &value, padding, pad)?, } diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 133050954dd..1d0d437824d 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -8,16 +8,14 @@ pub mod backup_control; #[cfg(feature = "encoding")] pub mod encoding; +#[cfg(feature = "format")] +pub mod format; #[cfg(feature = "fs")] pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; #[cfg(feature = "lines")] pub mod lines; -#[cfg(feature = "format")] -pub mod format; -#[cfg(feature = "memo")] -pub mod memo; #[cfg(feature = "quoting-style")] pub mod quoting_style; #[cfg(feature = "ranges")] @@ -26,8 +24,6 @@ pub mod ranges; pub mod ringbuffer; #[cfg(feature = "sum")] pub mod sum; -#[cfg(feature = "memo")] -mod tokenize; #[cfg(feature = "update-control")] pub mod update_control; #[cfg(feature = "version-cmp")] diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index abd92011cc4..ebb1cc360eb 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -3,7 +3,7 @@ //! The [`printf`] and [`sprintf`] closely match the behavior of the //! corresponding C functions: the former renders a formatted string //! to stdout, the latter renders to a new [`String`] object. -//! +//! //! In addition to the [`printf`] and [`sprintf`] functions, we expose the //! [`Format`] struct, which represents a parsed format string. This reduces //! the need for parsing a format string multiple times and assures that no @@ -14,8 +14,15 @@ mod spec; use spec::Spec; -use std::io::{stdout, Write}; +use std::{ + error::Error, + fmt::Display, + io::{stdout, Write}, +}; + +use crate::error::UError; +#[derive(Debug)] pub enum FormatError { SpecError, IoError(std::io::Error), @@ -23,6 +30,21 @@ pub enum FormatError { InvalidArgument(FormatArgument), } +impl Error for FormatError {} +impl UError for FormatError {} + +impl Display for FormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // TODO: Be more precise about these + match self { + FormatError::SpecError => write!(f, "invalid spec"), + FormatError::IoError(_) => write!(f, "io error"), + FormatError::NoMoreArguments => write!(f, "no more arguments"), + FormatError::InvalidArgument(_) => write!(f, "invalid argument"), + } + } +} + /// A single item to format enum FormatItem { /// A format specifier @@ -30,21 +52,28 @@ enum FormatItem { /// Some plain text Text(Vec), /// A single character - /// + /// /// Added in addition to `Text` as an optimization. Char(u8), } +#[derive(Clone, Debug)] pub enum FormatArgument { Char(char), String(String), UnsignedInt(u64), SignedInt(i64), Float(f64), + // Special argument that gets coerced into the other variants + Unparsed(String), } impl FormatItem { - fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator) -> Result<(), FormatError> { + fn write<'a>( + &self, + mut writer: impl Write, + args: &mut impl Iterator, + ) -> Result<(), FormatError> { match self { FormatItem::Spec(spec) => spec.write(writer, args), FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), @@ -110,13 +139,20 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator) -> Result<(), FormatError> { +pub fn printf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result<(), FormatError> { printf_writer(stdout(), format_string, arguments) } -fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator) -> Result<(), FormatError> { +fn printf_writer<'a>( + mut writer: impl Write, + format_string: impl AsRef<[u8]>, + args: impl IntoIterator, +) -> Result<(), FormatError> { let mut args = args.into_iter(); - for item in parse_iter(format_string) { + for item in parse_iter(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } Ok(()) @@ -137,7 +173,10 @@ fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIt /// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); /// assert_eq!(s, "hello world".to_string()); /// ``` -pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator) -> Result, FormatError> { +pub fn sprintf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result, FormatError> { let mut writer = Vec::new(); printf_writer(&mut writer, format_string, arguments)?; Ok(writer) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index d1786c3d36f..e66cad32d65 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -257,7 +257,7 @@ impl Spec { pub fn write<'a>( &self, mut writer: impl Write, - mut args: impl Iterator, + mut args: impl Iterator, ) -> Result<(), FormatError> { match self { &Spec::Char { width, align_left } => { @@ -265,7 +265,7 @@ impl Spec { let arg = next_arg(&mut args)?; match arg { FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg)), + _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::String { width, align_left } => { @@ -273,7 +273,7 @@ impl Spec { let arg = next_arg(&mut args)?; match arg { FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg)), + _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::SignedInt { @@ -285,10 +285,10 @@ impl Spec { let arg = next_arg(&mut args)?; let FormatArgument::SignedInt(i) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; - if i >= 0 { + if *i >= 0 { match positive_sign { PositiveSign::None => Ok(()), PositiveSign::Plus => write!(writer, "+"), @@ -313,7 +313,7 @@ impl Spec { let arg = next_arg(args)?; let FormatArgument::SignedInt(i) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; let s = match variant { @@ -355,7 +355,7 @@ impl Spec { let arg = next_arg(args)?; let FormatArgument::Float(f) = arg else { - return Err(FormatError::InvalidArgument(arg)); + return Err(FormatError::InvalidArgument(arg.clone())); }; if f.is_sign_positive() { @@ -369,16 +369,16 @@ impl Spec { let s = match variant { FloatVariant::Decimal => { - format_float_decimal(f, precision, case, force_decimal) + format_float_decimal(*f, precision, case, force_decimal) } FloatVariant::Scientific => { - format_float_scientific(f, precision, case, force_decimal) + format_float_scientific(*f, precision, case, force_decimal) } FloatVariant::Shortest => { - format_float_shortest(f, precision, case, force_decimal) + format_float_shortest(*f, precision, case, force_decimal) } FloatVariant::Hexadecimal => { - format_float_hexadecimal(f, precision, case, force_decimal) + format_float_hexadecimal(*f, precision, case, force_decimal) } }; @@ -490,7 +490,7 @@ fn format_float_hexadecimal( let mut s = match (precision, force_decimal) { (0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+x}"), (0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+x}"), - _ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}") + _ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}"), }; if case == Case::Uppercase { @@ -500,29 +500,29 @@ fn format_float_hexadecimal( return s; } -fn resolve_asterisk( +fn resolve_asterisk<'a>( option: Option>, - args: impl Iterator, + args: impl Iterator, ) -> Result, FormatError> { Ok(match option { None => None, Some(CanAsterisk::Asterisk) => { let arg = next_arg(args)?; match arg { - FormatArgument::UnsignedInt(u) => match usize::try_from(u) { + FormatArgument::UnsignedInt(u) => match usize::try_from(*u) { Ok(u) => Some(u), - Err(_) => return Err(FormatError::InvalidArgument(arg)), + Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), }, - _ => return Err(FormatError::InvalidArgument(arg)), + _ => return Err(FormatError::InvalidArgument(arg.clone())), } } Some(CanAsterisk::Fixed(w)) => Some(w), }) } -fn next_arg( - mut arguments: impl Iterator, -) -> Result { +fn next_arg<'a>( + mut arguments: impl Iterator, +) -> Result<&'a FormatArgument, FormatError> { arguments.next().ok_or(FormatError::NoMoreArguments) } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 0540275eee4..af8668ef02f 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -37,16 +37,14 @@ pub use crate::parser::shortcut_value_parser; pub use crate::features::backup_control; #[cfg(feature = "encoding")] pub use crate::features::encoding; +#[cfg(feature = "format")] +pub use crate::features::format; #[cfg(feature = "fs")] pub use crate::features::fs; #[cfg(feature = "fsext")] pub use crate::features::fsext; #[cfg(feature = "lines")] pub use crate::features::lines; -#[cfg(feature = "format")] -pub use crate::features::format; -#[cfg(feature = "memo")] -pub use crate::features::memo; #[cfg(feature = "quoting-style")] pub use crate::features::quoting_style; #[cfg(feature = "ranges")] From 198f7c7f26c6aa5a374d8f4def4ad324bee38535 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 9 Nov 2023 15:45:44 +0100 Subject: [PATCH 010/429] printf: move number formatting to separate module --- src/uucore/src/lib/features/format/mod.rs | 2 +- .../src/lib/features/format/num_format.rs | 232 ++++++++++++++++++ src/uucore/src/lib/features/format/spec.rs | 202 ++------------- 3 files changed, 258 insertions(+), 178 deletions(-) create mode 100644 src/uucore/src/lib/features/format/num_format.rs diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index ebb1cc360eb..0849ada15f8 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -10,8 +10,8 @@ //! parsing errors occur during writing. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// mod num_format; mod spec; +mod num_format; use spec::Spec; use std::{ diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs new file mode 100644 index 00000000000..75c18438cc8 --- /dev/null +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -0,0 +1,232 @@ +use std::io::Write; + +use super::{ + spec::{ + Case, FloatVariant, ForceDecimal, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, + }, + FormatError, +}; + +pub trait Formatter { + type Input; + fn fmt(&self, writer: impl Write, x: Self::Input) -> Result<(), FormatError>; +} + +pub struct SignedInt { + pub width: usize, + pub positive_sign: PositiveSign, + pub alignment: NumberAlignment, +} + +impl Formatter for SignedInt { + type Input = i64; + + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + if x >= 0 { + match self.positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + } + + match self.alignment { + NumberAlignment::Left => write!(writer, "{x: write!(writer, "{x:>width$}", width = self.width), + NumberAlignment::RightZero => write!(writer, "{x:0>width$}", width = self.width), + } + .map_err(FormatError::IoError) + } +} + +pub struct UnsignedInt { + pub variant: UnsignedIntVariant, + pub width: usize, + pub alignment: NumberAlignment, +} + +impl Formatter for UnsignedInt { + type Input = u64; + + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + let s = match self.variant { + UnsignedIntVariant::Decimal => format!("{x}"), + UnsignedIntVariant::Octal(Prefix::No) => format!("{x:o}"), + UnsignedIntVariant::Octal(Prefix::Yes) => format!("{x:#o}"), + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => { + format!("{x:x}") + } + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => { + format!("{x:#x}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => { + format!("{x:X}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => { + format!("{x:#X}") + } + }; + + match self.alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}", width = self.width), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), + } + .map_err(FormatError::IoError) + } +} + +pub struct Float { + pub variant: FloatVariant, + pub case: Case, + pub force_decimal: ForceDecimal, + pub width: usize, + pub positive_sign: PositiveSign, + pub alignment: NumberAlignment, + pub precision: usize, +} + +impl Formatter for Float { + type Input = f64; + + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + if x.is_sign_positive() { + match self.positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + } + + let s = match self.variant { + FloatVariant::Decimal => { + format_float_decimal(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Scientific => { + format_float_scientific(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Shortest => { + format_float_shortest(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Hexadecimal => { + format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) + } + }; + + match self.alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}", width = self.width), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), + } + .map_err(FormatError::IoError) + } +} + +fn format_float_nonfinite(f: f64, case: Case) -> String { + debug_assert!(!f.is_finite()); + let mut s = format!("{f}"); + if case == Case::Uppercase { + s.make_ascii_uppercase(); + } + return s; +} + +fn format_float_decimal( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + if !f.is_finite() { + return format_float_nonfinite(f, case); + } + + if precision == 0 && force_decimal == ForceDecimal::Yes { + format!("{f:.0}.") + } else { + format!("{f:.*}", precision) + } +} + +fn format_float_scientific( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + // If the float is NaN, -Nan, Inf or -Inf, format like any other float + if !f.is_finite() { + return format_float_nonfinite(f, case); + } + + let exponent: i32 = f.log10().floor() as i32; + let normalized = f / 10.0_f64.powi(exponent); + + let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { + "." + } else { + "" + }; + + let exp_char = match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }; + + format!( + "{normalized:.*}{additional_dot}{exp_char}{exponent:+03}", + precision + ) +} + +// TODO: This could be optimized. It's not terribly important though. +fn format_float_shortest( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + let a = format_float_decimal(f, precision, case, force_decimal); + let b = format_float_scientific(f, precision, case, force_decimal); + + if a.len() > b.len() { + b + } else { + a + } +} + +fn format_float_hexadecimal( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + if !f.is_finite() { + return format_float_nonfinite(f, case); + } + + let (first_digit, mantissa, exponent) = if f == 0.0 { + (0, 0, 0) + } else { + let bits = f.to_bits(); + let exponent_bits = ((bits >> 52) & 0x7fff) as i64; + let exponent = exponent_bits - 1023; + let mantissa = bits & 0xf_ffff_ffff_ffff; + (1, mantissa, exponent) + }; + + let mut s = match (precision, force_decimal) { + (0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+x}"), + (0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+x}"), + _ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}"), + }; + + if case == Case::Uppercase { + s.make_ascii_uppercase(); + } + + return s; +} diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index e66cad32d65..4a533d1e10f 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -1,6 +1,9 @@ // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -use super::{FormatArgument, FormatError}; +use super::{ + num_format::{self, Formatter}, + FormatArgument, FormatError, +}; use std::{fmt::Display, io::Write}; pub enum Spec { @@ -256,7 +259,7 @@ impl Spec { pub fn write<'a>( &self, - mut writer: impl Write, + writer: impl Write, mut args: impl Iterator, ) -> Result<(), FormatError> { match self { @@ -288,21 +291,11 @@ impl Spec { return Err(FormatError::InvalidArgument(arg.clone())); }; - if *i >= 0 { - match positive_sign { - PositiveSign::None => Ok(()), - PositiveSign::Plus => write!(writer, "+"), - PositiveSign::Space => write!(writer, " "), - } - .map_err(FormatError::IoError)?; - } - - match alignment { - NumberAlignment::Left => write!(writer, "{i: write!(writer, "{i:>width$}"), - NumberAlignment::RightZero => write!(writer, "{i:0>width$}"), - } - .map_err(FormatError::IoError) + num_format::SignedInt { + width, + positive_sign, + alignment, + }.fmt(writer, *i) } &Spec::UnsignedInt { variant, @@ -312,34 +305,16 @@ impl Spec { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(args)?; - let FormatArgument::SignedInt(i) = arg else { + let FormatArgument::UnsignedInt(i) = arg else { return Err(FormatError::InvalidArgument(arg.clone())); }; - let s = match variant { - UnsignedIntVariant::Decimal => format!("{i}"), - UnsignedIntVariant::Octal(Prefix::No) => format!("{i:o}"), - UnsignedIntVariant::Octal(Prefix::Yes) => format!("{i:#o}"), - UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => { - format!("{i:x}") - } - UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => { - format!("{i:#x}") - } - UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => { - format!("{i:X}") - } - UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => { - format!("{i:#X}") - } - }; - - match alignment { - NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), - NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + num_format::UnsignedInt { + variant, + width, + alignment, } - .map_err(FormatError::IoError) + .fmt(writer, *i) } &Spec::Float { variant, @@ -358,148 +333,21 @@ impl Spec { return Err(FormatError::InvalidArgument(arg.clone())); }; - if f.is_sign_positive() { - match positive_sign { - PositiveSign::None => Ok(()), - PositiveSign::Plus => write!(writer, "+"), - PositiveSign::Space => write!(writer, " "), - } - .map_err(FormatError::IoError)?; - } - - let s = match variant { - FloatVariant::Decimal => { - format_float_decimal(*f, precision, case, force_decimal) - } - FloatVariant::Scientific => { - format_float_scientific(*f, precision, case, force_decimal) - } - FloatVariant::Shortest => { - format_float_shortest(*f, precision, case, force_decimal) - } - FloatVariant::Hexadecimal => { - format_float_hexadecimal(*f, precision, case, force_decimal) - } - }; - - match alignment { - NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), - NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + num_format::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, } - .map_err(FormatError::IoError) + .fmt(writer, *f) } } } } -fn format_float_nonfinite(f: f64, case: Case) -> String { - debug_assert!(!f.is_finite()); - let mut s = format!("{f}"); - if case == Case::Uppercase { - s.make_ascii_uppercase(); - } - return s; -} - -fn format_float_decimal( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - - if precision == 0 && force_decimal == ForceDecimal::Yes { - format!("{f:.0}.") - } else { - format!("{f:.*}", precision) - } -} - -fn format_float_scientific( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - // If the float is NaN, -Nan, Inf or -Inf, format like any other float - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - - let exponent: i32 = f.log10().floor() as i32; - let normalized = f / 10.0_f64.powi(exponent); - - let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { - "." - } else { - "" - }; - - let exp_char = match case { - Case::Lowercase => 'e', - Case::Uppercase => 'E', - }; - - format!( - "{normalized:.*}{additional_dot}{exp_char}{exponent:+03}", - precision - ) -} - -// TODO: This could be optimized. It's not terribly important though. -fn format_float_shortest( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - let a = format_float_decimal(f, precision, case, force_decimal); - let b = format_float_scientific(f, precision, case, force_decimal); - - if a.len() > b.len() { - b - } else { - a - } -} - -fn format_float_hexadecimal( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - - let (first_digit, mantissa, exponent) = if f == 0.0 { - (0, 0, 0) - } else { - let bits = f.to_bits(); - let exponent_bits = ((bits >> 52) & 0x7fff) as i64; - let exponent = exponent_bits - 1023; - let mantissa = bits & 0xf_ffff_ffff_ffff; - (1, mantissa, exponent) - }; - - let mut s = match (precision, force_decimal) { - (0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+x}"), - (0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+x}"), - _ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}"), - }; - - if case == Case::Uppercase { - s.make_ascii_uppercase(); - } - - return s; -} - fn resolve_asterisk<'a>( option: Option>, args: impl Iterator, From 39c675847545b5653e04f9338815d882fdc7d01a Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 9 Nov 2023 16:05:11 +0100 Subject: [PATCH 011/429] uucore/format: move types for num_format --- src/uucore/src/lib/features/format/mod.rs | 2 +- .../src/lib/features/format/num_format.rs | 69 +++++++++++++++++-- src/uucore/src/lib/features/format/spec.rs | 56 ++------------- 3 files changed, 70 insertions(+), 57 deletions(-) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 0849ada15f8..d6db5e8c7cd 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -11,7 +11,7 @@ // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety mod spec; -mod num_format; +pub mod num_format; use spec::Spec; use std::{ diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 75c18438cc8..3a27ac200f8 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -1,17 +1,60 @@ use std::io::Write; -use super::{ - spec::{ - Case, FloatVariant, ForceDecimal, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, - }, - FormatError, -}; +use super::FormatError; pub trait Formatter { type Input; fn fmt(&self, writer: impl Write, x: Self::Input) -> Result<(), FormatError>; } +#[derive(Clone, Copy)] +pub enum UnsignedIntVariant { + Decimal, + Octal(Prefix), + Hexadecimal(Case, Prefix), +} + +#[derive(Clone, Copy)] + +pub enum FloatVariant { + Decimal, + Scientific, + Shortest, + Hexadecimal, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Case { + Lowercase, + Uppercase, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Prefix { + No, + Yes, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ForceDecimal { + No, + Yes, +} + +#[derive(Clone, Copy)] +pub enum PositiveSign { + None, + Plus, + Space, +} + +#[derive(Clone, Copy)] +pub enum NumberAlignment { + Left, + RightSpace, + RightZero, +} + pub struct SignedInt { pub width: usize, pub positive_sign: PositiveSign, @@ -87,6 +130,20 @@ pub struct Float { pub precision: usize, } +impl Default for Float { + fn default() -> Self { + Self { + variant: FloatVariant::Decimal, + case: Case::Lowercase, + force_decimal: ForceDecimal::No, + width: 0, + positive_sign: PositiveSign::None, + alignment: NumberAlignment::Left, + precision: 2, + } + } +} + impl Formatter for Float { type Input = f64; diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 4a533d1e10f..80896997007 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -1,7 +1,10 @@ // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety use super::{ - num_format::{self, Formatter}, + num_format::{ + self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, + UnsignedIntVariant, + }, FormatArgument, FormatError, }; use std::{fmt::Display, io::Write}; @@ -36,54 +39,6 @@ pub enum Spec { }, } -#[derive(Clone, Copy)] -pub enum UnsignedIntVariant { - Decimal, - Octal(Prefix), - Hexadecimal(Case, Prefix), -} - -#[derive(Clone, Copy)] - -pub enum FloatVariant { - Decimal, - Scientific, - Shortest, - Hexadecimal, -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum Case { - Lowercase, - Uppercase, -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum Prefix { - No, - Yes, -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum ForceDecimal { - No, - Yes, -} - -#[derive(Clone, Copy)] -pub enum PositiveSign { - None, - Plus, - Space, -} - -#[derive(Clone, Copy)] -pub enum NumberAlignment { - Left, - RightSpace, - RightZero, -} - /// Precision and width specified might use an asterisk to indicate that they are /// determined by an argument. #[derive(Clone, Copy)] @@ -295,7 +250,8 @@ impl Spec { width, positive_sign, alignment, - }.fmt(writer, *i) + } + .fmt(writer, *i) } &Spec::UnsignedInt { variant, From ee0e2c042bf93062727cf20356d4ebbfaa018291 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 9 Nov 2023 16:05:38 +0100 Subject: [PATCH 012/429] dd: use num_format::Float directly instead of printf --- src/uu/dd/src/progress.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index 1d9b7247d20..269ae5df463 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -13,8 +13,10 @@ use std::io::Write; use std::sync::mpsc; use std::time::Duration; -use uucore::format::sprintf; -use uucore::{error::UResult, format::FormatArgument}; +use uucore::{ + error::UResult, + format::num_format::{FloatVariant, Formatter}, +}; use crate::numbers::{to_magnitude_and_suffix, SuffixType}; @@ -152,8 +154,13 @@ impl ProgUpdate { let (carriage_return, newline) = if rewrite { ("\r", "") } else { ("", "\n") }; // The duration should be formatted as in `printf %g`. - // TODO: remove unwrap and make FormatError implement UError - let duration_str = sprintf("%g", &[FormatArgument::Float(duration)])?; + let mut duration_str = Vec::new(); + uucore::format::num_format::Float { + variant: FloatVariant::Shortest, + ..Default::default() + } + .fmt(&mut duration_str, duration)?; + // We assume that printf will output valid UTF-8 let duration_str = std::str::from_utf8(&duration_str).unwrap(); // If the number of bytes written is sufficiently large, then From 6481d63ea4b8cd768d064e0a6769d7cbd4a2803c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 13 Nov 2023 15:22:49 +0100 Subject: [PATCH 013/429] uucore/format: implement single specifier formats --- src/uu/seq/src/seq.rs | 58 ++++++--- src/uucore/src/lib/features/format/mod.rs | 72 +++++++++++- .../src/lib/features/format/num_format.rs | 111 ++++++++++++++++-- src/uucore/src/lib/features/format/spec.rs | 3 + 4 files changed, 216 insertions(+), 28 deletions(-) diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 217e9042833..bb4d5414ef8 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -3,13 +3,13 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse -use std::io::{stdout, ErrorKind, Write}; +use std::io::{stdout, Write}; use clap::{crate_version, Arg, ArgAction, Command}; -use num_traits::Zero; +use num_traits::{Zero, ToPrimitive}; use uucore::error::UResult; -use uucore::format::{printf, FormatArgument}; +use uucore::format::{printf, FormatArgument, Format, num_format}; use uucore::{format_usage, help_about, help_usage}; mod error; @@ -119,16 +119,31 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result = match (first.number, increment.number, last.number) { (Number::Int(first), Number::Int(increment), last) => { let last = last.round_towards(&first); + let format = match options.format { + Some(f) => { + let f = Format::::parse(f)?; + Some(f) + } + None => None, + }; print_seq_integers( (first, increment, last), &options.separator, &options.terminator, options.equal_width, padding, - options.format, + format, ) } - (first, increment, last) => print_seq( + (first, increment, last) => { + let format = match options.format { + Some(f) => { + let f = Format::::parse(f)?; + Some(f) + } + None => None, + }; + print_seq( ( first.into_extended_big_decimal(), increment.into_extended_big_decimal(), @@ -139,8 +154,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { &options.terminator, options.equal_width, padding, - options.format, - ), + format, + ) + } }; match result { Ok(_) => Ok(()), @@ -244,7 +260,7 @@ fn print_seq( terminator: &str, pad: bool, padding: usize, - format: Option<&str>, + format: Option>, ) -> UResult<()> { let stdout = stdout(); let mut stdout = stdout.lock(); @@ -268,10 +284,16 @@ fn print_seq( // it as a string and ultimately writing to `stdout`. We // shouldn't have to do so much converting back and forth via // strings. - match format { + match &format { Some(f) => { - let s = format!("{value}"); - printf(f, &[FormatArgument::String(s)])?; + let float = match &value { + ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(), + ExtendedBigDecimal::Infinity => f64::INFINITY, + ExtendedBigDecimal::MinusInfinity => f64::NEG_INFINITY, + ExtendedBigDecimal::MinusZero => -0.0, + ExtendedBigDecimal::Nan => f64::NAN, + }; + f.fmt(&mut stdout, float)?; } None => write_value_float(&mut stdout, &value, padding, largest_dec)?, } @@ -306,7 +328,7 @@ fn print_seq_integers( terminator: &str, pad: bool, padding: usize, - format: Option<&str>, + format: Option>, ) -> UResult<()> { let stdout = stdout(); let mut stdout = stdout.lock(); @@ -324,10 +346,16 @@ fn print_seq_integers( // the current value and writes the result to `stdout`. // // TODO See similar comment about formatting in `print_seq()`. - match format { + match &format { Some(f) => { - let s = format!("{value}"); - printf(f, &[FormatArgument::String(s)])?; + let int = match &value { + ExtendedBigInt::BigInt(bi) => bi.to_i64().unwrap(), + ExtendedBigInt::Infinity => todo!(), + ExtendedBigInt::MinusInfinity => todo!(), + ExtendedBigInt::MinusZero => todo!(), + ExtendedBigInt::Nan => todo!(), + }; + f.fmt(&mut stdout, int)?; } None => write_value_int(&mut stdout, &value, padding, pad)?, } diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index d6db5e8c7cd..48151be9892 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -10,8 +10,8 @@ //! parsing errors occur during writing. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -mod spec; pub mod num_format; +mod spec; use spec::Spec; use std::{ @@ -22,6 +22,8 @@ use std::{ use crate::error::UError; +use self::num_format::Formatter; + #[derive(Debug)] pub enum FormatError { SpecError, @@ -33,6 +35,12 @@ pub enum FormatError { impl Error for FormatError {} impl UError for FormatError {} +impl From for FormatError { + fn from(value: std::io::Error) -> Self { + FormatError::IoError(value) + } +} + impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // TODO: Be more precise about these @@ -181,3 +189,65 @@ pub fn sprintf<'a>( printf_writer(&mut writer, format_string, arguments)?; Ok(writer) } + +/// A parsed format for a single float value +/// +/// This is used by `seq`. It can be constructed with [`FloatFormat::parse`] +/// and can write a value with [`FloatFormat::fmt`]. +/// +/// It can only accept a single specification without any asterisk parameters. +/// If it does get more specifications, it will return an error. +pub struct Format { + prefix: Vec, + suffix: Vec, + formatter: F, +} + +impl Format { + pub fn parse(format_string: impl AsRef<[u8]>) -> Result { + let mut iter = parse_iter(format_string.as_ref()); + + let mut prefix = Vec::new(); + let mut spec = None; + for item in &mut iter { + match item? { + FormatItem::Spec(s) => { + spec = Some(s); + break; + } + FormatItem::Text(t) => prefix.extend_from_slice(&t), + FormatItem::Char(c) => prefix.push(c), + } + } + + let Some(spec) = spec else { + return Err(FormatError::SpecError); + }; + + let formatter = F::try_from_spec(spec)?; + + let mut suffix = Vec::new(); + for item in &mut iter { + match item? { + FormatItem::Spec(_) => { + return Err(FormatError::SpecError); + } + FormatItem::Text(t) => suffix.extend_from_slice(&t), + FormatItem::Char(c) => suffix.push(c), + } + } + + Ok(Self { + prefix, + suffix, + formatter, + }) + } + + pub fn fmt(&self, mut w: impl Write, f: F::Input) -> std::io::Result<()> { + w.write_all(&self.prefix)?; + self.formatter.fmt(&mut w, f)?; + w.write_all(&self.suffix)?; + Ok(()) + } +} diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 3a27ac200f8..fd010bdc029 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -1,10 +1,16 @@ use std::io::Write; -use super::FormatError; +use super::{ + spec::{CanAsterisk, Spec}, + FormatError, +}; pub trait Formatter { type Input; - fn fmt(&self, writer: impl Write, x: Self::Input) -> Result<(), FormatError>; + fn fmt(&self, writer: impl Write, x: Self::Input) -> std::io::Result<()>; + fn try_from_spec(s: Spec) -> Result + where + Self: Sized; } #[derive(Clone, Copy)] @@ -64,14 +70,13 @@ pub struct SignedInt { impl Formatter for SignedInt { type Input = i64; - fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> { if x >= 0 { match self.positive_sign { PositiveSign::None => Ok(()), PositiveSign::Plus => write!(writer, "+"), PositiveSign::Space => write!(writer, " "), - } - .map_err(FormatError::IoError)?; + }?; } match self.alignment { @@ -79,7 +84,29 @@ impl Formatter for SignedInt { NumberAlignment::RightSpace => write!(writer, "{x:>width$}", width = self.width), NumberAlignment::RightZero => write!(writer, "{x:0>width$}", width = self.width), } - .map_err(FormatError::IoError) + } + + fn try_from_spec(s: Spec) -> Result { + let Spec::SignedInt { + width, + positive_sign, + alignment, + } = s + else { + return Err(FormatError::SpecError); + }; + + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + + Ok(Self { + width, + positive_sign, + alignment, + }) } } @@ -92,7 +119,7 @@ pub struct UnsignedInt { impl Formatter for UnsignedInt { type Input = u64; - fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> { let s = match self.variant { UnsignedIntVariant::Decimal => format!("{x}"), UnsignedIntVariant::Octal(Prefix::No) => format!("{x:o}"), @@ -116,7 +143,29 @@ impl Formatter for UnsignedInt { NumberAlignment::RightSpace => write!(writer, "{s:>width$}", width = self.width), NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), } - .map_err(FormatError::IoError) + } + + fn try_from_spec(s: Spec) -> Result { + let Spec::UnsignedInt { + variant, + width, + alignment, + } = s + else { + return Err(FormatError::SpecError); + }; + + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + + Ok(Self { + width, + variant, + alignment, + }) } } @@ -147,14 +196,13 @@ impl Default for Float { impl Formatter for Float { type Input = f64; - fn fmt(&self, mut writer: impl Write, x: Self::Input) -> Result<(), FormatError> { + fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> { if x.is_sign_positive() { match self.positive_sign { PositiveSign::None => Ok(()), PositiveSign::Plus => write!(writer, "+"), PositiveSign::Space => write!(writer, " "), - } - .map_err(FormatError::IoError)?; + }?; } let s = match self.variant { @@ -177,7 +225,46 @@ impl Formatter for Float { NumberAlignment::RightSpace => write!(writer, "{s:>width$}", width = self.width), NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), } - .map_err(FormatError::IoError) + } + + fn try_from_spec(s: Spec) -> Result + where + Self: Sized, + { + let Spec::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + } = s + else { + return Err(FormatError::SpecError); + }; + + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + + Ok(Self { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + }) } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 80896997007..9c53669fa9c 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -252,6 +252,7 @@ impl Spec { alignment, } .fmt(writer, *i) + .map_err(FormatError::IoError) } &Spec::UnsignedInt { variant, @@ -271,6 +272,7 @@ impl Spec { alignment, } .fmt(writer, *i) + .map_err(FormatError::IoError) } &Spec::Float { variant, @@ -299,6 +301,7 @@ impl Spec { precision, } .fmt(writer, *f) + .map_err(FormatError::IoError) } } } From c4580df2a4cbb4f1cb4e3124779bde43d7b027f8 Mon Sep 17 00:00:00 2001 From: cswn Date: Tue, 14 Nov 2023 18:07:35 +0100 Subject: [PATCH 014/429] split: remove crash macro --- src/uu/split/src/platform/unix.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/uu/split/src/platform/unix.rs b/src/uu/split/src/platform/unix.rs index f4adb818834..c2bf7216b57 100644 --- a/src/uu/split/src/platform/unix.rs +++ b/src/uu/split/src/platform/unix.rs @@ -7,9 +7,10 @@ use std::io::Write; use std::io::{BufWriter, Error, ErrorKind, Result}; use std::path::Path; use std::process::{Child, Command, Stdio}; -use uucore::crash; +use uucore::error::USimpleError; use uucore::fs; use uucore::fs::FileInformation; +use uucore::show; /// A writer that writes to a shell_process' stdin /// @@ -101,10 +102,13 @@ impl Drop for FilterWriter { .expect("Couldn't wait for child process"); if let Some(return_code) = exit_status.code() { if return_code != 0 { - crash!(1, "Shell process returned {}", return_code); + show!(USimpleError::new( + 1, + format!("Shell process returned {}", return_code) + )); } } else { - crash!(1, "Shell process terminated by signal") + show!(USimpleError::new(1, "Shell process terminated by signal")); } } } From e2e5ec60cde2a407a2c8a467c1d0057f312e2276 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 14 Nov 2023 20:05:36 +0000 Subject: [PATCH 015/429] chore(deps): update rust crate itertools to 0.12.0 --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d83adfee600..c89c87df41c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1122,9 +1122,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" dependencies = [ "either", ] diff --git a/Cargo.toml b/Cargo.toml index f313e2b03e4..2f3af2c83d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -285,7 +285,7 @@ gcd = "2.3" glob = "0.3.1" half = "2.3" indicatif = "0.17" -itertools = "0.11.0" +itertools = "0.12.0" libc = "0.2.150" lscolors = { version = "0.15.0", default-features = false, features = [ "nu-ansi-term", From 6446ef294c1ec0dc9ab9450d659e51dc4b30526f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 14 Nov 2023 20:44:19 +0100 Subject: [PATCH 016/429] publishing: check if the current version is already there or not This can happen when a publishing step failed --- util/publish.sh | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/util/publish.sh b/util/publish.sh index 71830f1f915..7207ba7fb91 100755 --- a/util/publish.sh +++ b/util/publish.sh @@ -5,6 +5,22 @@ if test "$1" != "--do-it"; then ARG="--dry-run --allow-dirty" fi +# Function to check if the crate is already published +is_already_published() { + local crate_name=$1 + local crate_version=$2 + + # Use the crates.io API to get the latest version of the crate + local latest_published_version + latest_published_version=$(curl -s https://crates.io/api/v1/crates/$crate_name | jq -r '.crate.max_version') + + if [ "$latest_published_version" = "$crate_version" ]; then + return 0 + else + return 1 + fi +} + # Figure out any dependencies between the util via Cargo.toml # We store this as edges in a graph with each line: # [dependent] [dependency] @@ -35,12 +51,19 @@ TOTAL_ORDER=$(echo -e $PARTIAL_ORDER | tsort | tac) # Remove the ROOT node from the start TOTAL_ORDER=${TOTAL_ORDER#ROOT} +CRATE_VERSION=$(grep '^version' Cargo.toml | head -n1 | cut -d '"' -f2) + set -e for dir in src/uuhelp_parser/ src/uucore_procs/ src/uucore/ src/uu/stdbuf/src/libstdbuf/; do ( cd "$dir" + CRATE_NAME=$(grep '^name =' "Cargo.toml" | head -n1 | cut -d '"' -f2) #shellcheck disable=SC2086 - cargo publish $ARG + if ! is_already_published "$CRATE_NAME" "$CRATE_VERSION"; then + cargo publish $ARG + else + echo "Skip: $CRATE_NAME $CRATE_VERSION already published" + fi ) sleep 2s done @@ -48,8 +71,13 @@ done for p in $TOTAL_ORDER; do ( cd "src/uu/$p" + CRATE_NAME=$(grep '^name =' "Cargo.toml" | head -n1 | cut -d '"' -f2) #shellcheck disable=SC2086 - cargo publish $ARG + if ! is_already_published "$CRATE_NAME" "$CRATE_VERSION"; then + cargo publish $ARG + else + echo "Skip: $CRATE_NAME $CRATE_VERSION already published" + fi ) done From 3f86bc59de6f8c6dbb73faaa81d3ad3fa40c01e7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 15 Nov 2023 08:31:47 +0100 Subject: [PATCH 017/429] add missing features to uucore --- src/uu/dd/Cargo.toml | 2 +- src/uu/printf/Cargo.toml | 2 +- src/uu/seq/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index 7db05b422f3..d654d829736 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -18,7 +18,7 @@ path = "src/dd.rs" clap = { workspace = true } gcd = { workspace = true } libc = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["memo", "quoting-style"] } [target.'cfg(any(target_os = "linux"))'.dependencies] nix = { workspace = true, features = ["fs"] } diff --git a/src/uu/printf/Cargo.toml b/src/uu/printf/Cargo.toml index 1cb05ec09b0..f36eff35e8c 100644 --- a/src/uu/printf/Cargo.toml +++ b/src/uu/printf/Cargo.toml @@ -16,7 +16,7 @@ path = "src/printf.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["memo", "quoting-style"] } [[bin]] name = "printf" diff --git a/src/uu/seq/Cargo.toml b/src/uu/seq/Cargo.toml index 95f761696c2..d4e7cd316c6 100644 --- a/src/uu/seq/Cargo.toml +++ b/src/uu/seq/Cargo.toml @@ -20,7 +20,7 @@ bigdecimal = { workspace = true } clap = { workspace = true } num-bigint = { workspace = true } num-traits = { workspace = true } -uucore = { workspace = true, features = ["memo"] } +uucore = { workspace = true, features = ["memo", "quoting-style"] } [[bin]] name = "seq" From 3f177ef97fc4e6f7a17f7565761732d89847fe72 Mon Sep 17 00:00:00 2001 From: "Y.D.X" <73375426+YDX-2147483647@users.noreply.github.com> Date: Wed, 15 Nov 2023 19:09:46 +0800 Subject: [PATCH 018/429] doc: Fix the markdown highlighting syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There should be a new line after `[!WARNING]`, according to [community · Discussion #16925](https://github.com/orgs/community/discussions/16925). --- CONTRIBUTING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 255ed2c53e3..b10d3d11472 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,8 @@ check out these documents: Now follows a very important warning: -> [!WARNING] uutils is original code and cannot contain any code from GNU or +> [!WARNING] +> uutils is original code and cannot contain any code from GNU or > other implementations. This means that **we cannot accept any changes based on > the GNU source code**. To make sure that cannot happen, **you cannot link to > the GNU source code** either. From 94f6702ba52a875d251e869eba55583e61da9ad1 Mon Sep 17 00:00:00 2001 From: cswn Date: Wed, 15 Nov 2023 13:20:22 +0100 Subject: [PATCH 019/429] join: remove crash! macro --- src/uu/join/src/join.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index a48ba3657bd..3f2172da33e 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -21,7 +21,7 @@ use std::os::unix::ffi::OsStrExt; use uucore::display::Quotable; use uucore::error::{set_exit_code, UError, UResult, USimpleError}; use uucore::line_ending::LineEnding; -use uucore::{crash, crash_if_err, format_usage, help_about, help_usage}; +use uucore::{crash_if_err, format_usage, help_about, help_usage}; const ABOUT: &str = help_about!("join.md"); const USAGE: &str = help_usage!("join.md"); @@ -334,17 +334,23 @@ impl<'a> State<'a> { key: usize, line_ending: LineEnding, print_unpaired: bool, - ) -> State<'a> { + ) -> Result, JoinError> { let f = if name == "-" { Box::new(stdin.lock()) as Box } else { match File::open(name) { Ok(file) => Box::new(BufReader::new(file)) as Box, - Err(err) => crash!(1, "{}: {}", name.maybe_quote(), err), + Err(err) => { + return Err(JoinError::UnorderedInput(format!( + "{}: {}", + name.maybe_quote(), + err + ))); + } } }; - State { + Ok(State { key, file_name: name, file_num, @@ -355,7 +361,7 @@ impl<'a> State<'a> { line_num: 0, has_failed: false, has_unpaired: false, - } + }) } /// Skip the current unpaired line. @@ -847,7 +853,7 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), JoinError> { settings.key1, settings.line_ending, settings.print_unpaired1, - ); + )?; let mut state2 = State::new( FileNum::File2, @@ -856,7 +862,7 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), JoinError> { settings.key2, settings.line_ending, settings.print_unpaired2, - ); + )?; let input = Input::new( settings.separator, From 5dff5f2f736c44c576adce2dbcbe8275d52ebbd2 Mon Sep 17 00:00:00 2001 From: cswn Date: Wed, 15 Nov 2023 13:52:01 +0100 Subject: [PATCH 020/429] join: rename f variable to file_buf --- src/uu/join/src/join.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index 3f2172da33e..c8008c91c61 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -335,7 +335,7 @@ impl<'a> State<'a> { line_ending: LineEnding, print_unpaired: bool, ) -> Result, JoinError> { - let f = if name == "-" { + let file_buf = if name == "-" { Box::new(stdin.lock()) as Box } else { match File::open(name) { @@ -355,7 +355,7 @@ impl<'a> State<'a> { file_name: name, file_num, print_unpaired, - lines: f.split(line_ending as u8), + lines: file_buf.split(line_ending as u8), max_len: 1, seq: Vec::new(), line_num: 0, From b3eae16faddf03e5ce83047f244a510932970565 Mon Sep 17 00:00:00 2001 From: ALXD Date: Wed, 15 Nov 2023 17:40:54 +0100 Subject: [PATCH 021/429] printf: intf: change warning and exit code --- .../features/tokenize/num_format/formatters/intf.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs index 11070113c26..767c0c4bb67 100644 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs +++ b/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs @@ -8,10 +8,11 @@ //! formatter for unsigned and signed int subs //! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64) //! signed int: %i %d (both base ten i64) +use crate::error::set_exit_code; +use crate::features::tokenize::num_format::num_format::warn_expected_numeric; + use super::super::format_field::FormatField; -use super::super::formatter::{ - get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix, -}; +use super::super::formatter::{get_it_at, Base, FormatPrimitive, Formatter, InitialPrefix}; use std::i64; use std::u64; @@ -112,7 +113,8 @@ impl Intf { } } _ => { - warn_incomplete_conv(str_in); + warn_expected_numeric(str_in); + set_exit_code(1); break; } } From 3cdb0966ae1b86724be8f7b6db2b6f41472169fc Mon Sep 17 00:00:00 2001 From: ALXD Date: Wed, 15 Nov 2023 17:46:58 +0100 Subject: [PATCH 022/429] printf: add a test for %x with invalid value --- tests/by-util/test_printf.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index a297dbf6833..ab3505a327b 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -258,6 +258,14 @@ fn sub_num_hex_upper() { .stdout_only("thirty in hex is 1E"); } +#[test] +fn sub_num_hex_non_numerical() { + new_ucmd!() + .args(&["parameters need to be numbers %X", "%194"]) + .fails() + .code_is(1); +} + #[test] fn sub_num_float() { new_ucmd!() From 212991cd53cba8be58e877ab5b17ea86eb46f20e Mon Sep 17 00:00:00 2001 From: "Y.D.X." <73375426+YDX-2147483647@users.noreply.github.com> Date: Thu, 16 Nov 2023 12:26:40 +0800 Subject: [PATCH 023/429] doc: Fix a broken link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `/build.md` is absolute, so the link in https://uutils.github.io/coreutils/book/installation.html turns out to be https://uutils.github.io/build.html instead of https://uutils.github.io/coreutils/book/build.html. Reference: [Links · Markdown - mdBook Documentation](https://rust-lang.github.io/mdBook/format/markdown.html#links) --- docs/src/installation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/installation.md b/docs/src/installation.md index da124ead977..54b1e23f3f6 100644 --- a/docs/src/installation.md +++ b/docs/src/installation.md @@ -6,7 +6,7 @@ This is a list of uutils packages in various distributions and package managers. Note that these are packaged by third-parties and the packages might contain patches. -You can also [build uutils from source](/build.md). +You can also [build uutils from source](build.md). From a064c886566f810c3e58b1b1153762e772e35567 Mon Sep 17 00:00:00 2001 From: cswn Date: Thu, 16 Nov 2023 09:35:32 +0100 Subject: [PATCH 024/429] join: replace match with JoinError with map_err_context --- src/uu/join/src/join.rs | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index c8008c91c61..9661138879b 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -19,7 +19,7 @@ use std::num::IntErrorKind; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; use uucore::display::Quotable; -use uucore::error::{set_exit_code, UError, UResult, USimpleError}; +use uucore::error::{set_exit_code, FromIo, UError, UResult, USimpleError}; use uucore::line_ending::LineEnding; use uucore::{crash_if_err, format_usage, help_about, help_usage}; @@ -334,20 +334,12 @@ impl<'a> State<'a> { key: usize, line_ending: LineEnding, print_unpaired: bool, - ) -> Result, JoinError> { + ) -> UResult> { let file_buf = if name == "-" { Box::new(stdin.lock()) as Box } else { - match File::open(name) { - Ok(file) => Box::new(BufReader::new(file)) as Box, - Err(err) => { - return Err(JoinError::UnorderedInput(format!( - "{}: {}", - name.maybe_quote(), - err - ))); - } - } + let file = File::open(name).map_err_context(|| format!("{}", name.maybe_quote()))?; + Box::new(BufReader::new(file)) as Box }; Ok(State { @@ -365,12 +357,7 @@ impl<'a> State<'a> { } /// Skip the current unpaired line. - fn skip_line( - &mut self, - writer: &mut impl Write, - input: &Input, - repr: &Repr, - ) -> Result<(), JoinError> { + fn skip_line(&mut self, writer: &mut impl Write, input: &Input, repr: &Repr) -> UResult<()> { if self.print_unpaired { self.print_first_line(writer, repr)?; } @@ -381,7 +368,7 @@ impl<'a> State<'a> { /// Keep reading line sequence until the key does not change, return /// the first line whose key differs. - fn extend(&mut self, input: &Input) -> Result, JoinError> { + fn extend(&mut self, input: &Input) -> UResult> { while let Some(line) = self.next_line(input)? { let diff = input.compare(self.get_current_key(), line.get_field(self.key)); @@ -490,12 +477,7 @@ impl<'a> State<'a> { 0 } - fn finalize( - &mut self, - writer: &mut impl Write, - input: &Input, - repr: &Repr, - ) -> Result<(), JoinError> { + fn finalize(&mut self, writer: &mut impl Write, input: &Input, repr: &Repr) -> UResult<()> { if self.has_line() { if self.print_unpaired { self.print_first_line(writer, repr)?; @@ -843,7 +825,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2", ) } -fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), JoinError> { +fn exec(file1: &str, file2: &str, settings: Settings) -> UResult<()> { let stdin = stdin(); let mut state1 = State::new( From 7ff4cb3f4e236724d8bdf0d3a83258cd5daeb228 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 16 Nov 2023 10:40:31 +0100 Subject: [PATCH 025/429] update of the license file to make it generic (#5545) --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 49fdbd4cf5f..21bd44404e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) Jordi Boggiano and many others +Copyright (c) uutils developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in From 65dc70b55396c3a00b70143fc84cc7d96cc25539 Mon Sep 17 00:00:00 2001 From: cswn Date: Thu, 16 Nov 2023 12:02:39 +0100 Subject: [PATCH 026/429] join: remove match in uumain and return exec result --- src/uu/join/src/join.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index 9661138879b..423af983ec9 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -701,10 +701,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { return Err(USimpleError::new(1, "both files cannot be standard input")); } - match exec(file1, file2, settings) { - Ok(_) => Ok(()), - Err(e) => Err(USimpleError::new(1, format!("{e}"))), - } + exec(file1, file2, settings) } pub fn uu_app() -> Command { From e7d58f673ff9515af11169cd92b6b341d6e8c13f Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 13 Nov 2023 17:37:25 +0100 Subject: [PATCH 027/429] seq: simplify and use new printf implementation --- src/uu/seq/src/extendedbigdecimal.rs | 54 +---- src/uu/seq/src/extendedbigint.rs | 214 ------------------ src/uu/seq/src/number.rs | 70 +----- src/uu/seq/src/numberparse.rs | 127 ++++------- src/uu/seq/src/seq.rs | 167 +++----------- src/uucore/src/lib/features/format/mod.rs | 4 +- .../src/lib/features/format/num_format.rs | 24 +- src/uucore/src/lib/features/format/spec.rs | 13 +- 8 files changed, 104 insertions(+), 569 deletions(-) delete mode 100644 src/uu/seq/src/extendedbigint.rs diff --git a/src/uu/seq/src/extendedbigdecimal.rs b/src/uu/seq/src/extendedbigdecimal.rs index 388046ba368..ecd460ceb73 100644 --- a/src/uu/seq/src/extendedbigdecimal.rs +++ b/src/uu/seq/src/extendedbigdecimal.rs @@ -25,13 +25,8 @@ use std::fmt::Display; use std::ops::Add; use bigdecimal::BigDecimal; -use num_bigint::BigInt; -use num_bigint::ToBigInt; -use num_traits::One; use num_traits::Zero; -use crate::extendedbigint::ExtendedBigInt; - #[derive(Debug, Clone)] pub enum ExtendedBigDecimal { /// Arbitrary precision floating point number. @@ -72,53 +67,14 @@ pub enum ExtendedBigDecimal { Nan, } -/// The smallest integer greater than or equal to this number. -fn ceil(x: BigDecimal) -> BigInt { - if x.is_integer() { - // Unwrapping the Option because it always returns Some - x.to_bigint().unwrap() - } else { - (x + BigDecimal::one().half()).round(0).to_bigint().unwrap() - } -} - -/// The largest integer less than or equal to this number. -fn floor(x: BigDecimal) -> BigInt { - if x.is_integer() { - // Unwrapping the Option because it always returns Some - x.to_bigint().unwrap() - } else { - (x - BigDecimal::one().half()).round(0).to_bigint().unwrap() - } -} - impl ExtendedBigDecimal { - /// The smallest integer greater than or equal to this number. - pub fn ceil(self) -> ExtendedBigInt { - match self { - Self::BigDecimal(x) => ExtendedBigInt::BigInt(ceil(x)), - other => From::from(other), - } + #[cfg(test)] + pub fn zero() -> Self { + Self::BigDecimal(1.into()) } - /// The largest integer less than or equal to this number. - pub fn floor(self) -> ExtendedBigInt { - match self { - Self::BigDecimal(x) => ExtendedBigInt::BigInt(floor(x)), - other => From::from(other), - } - } -} - -impl From for ExtendedBigDecimal { - fn from(big_int: ExtendedBigInt) -> Self { - match big_int { - ExtendedBigInt::BigInt(n) => Self::BigDecimal(BigDecimal::from(n)), - ExtendedBigInt::Infinity => Self::Infinity, - ExtendedBigInt::MinusInfinity => Self::MinusInfinity, - ExtendedBigInt::MinusZero => Self::MinusZero, - ExtendedBigInt::Nan => Self::Nan, - } + pub fn one() -> Self { + Self::BigDecimal(1.into()) } } diff --git a/src/uu/seq/src/extendedbigint.rs b/src/uu/seq/src/extendedbigint.rs deleted file mode 100644 index 6828fba2df2..00000000000 --- a/src/uu/seq/src/extendedbigint.rs +++ /dev/null @@ -1,214 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. -// spell-checker:ignore bigint extendedbigint extendedbigdecimal -//! An arbitrary precision integer that can also represent infinity, NaN, etc. -//! -//! Usually infinity, NaN, and negative zero are only represented for -//! floating point numbers. The [`ExtendedBigInt`] enumeration provides -//! a representation of those things with the set of integers. The -//! finite values are stored as [`BigInt`] instances. -//! -//! # Examples -//! -//! Addition works for [`ExtendedBigInt`] as it does for floats. For -//! example, adding infinity to any finite value results in infinity: -//! -//! ```rust,ignore -//! let summand1 = ExtendedBigInt::BigInt(BigInt::zero()); -//! let summand2 = ExtendedBigInt::Infinity; -//! assert_eq!(summand1 + summand2, ExtendedBigInt::Infinity); -//! ``` -use std::cmp::Ordering; -use std::fmt::Display; -use std::ops::Add; - -use num_bigint::BigInt; -use num_bigint::ToBigInt; -use num_traits::One; -use num_traits::Zero; - -use crate::extendedbigdecimal::ExtendedBigDecimal; - -#[derive(Debug, Clone)] -pub enum ExtendedBigInt { - BigInt(BigInt), - Infinity, - MinusInfinity, - MinusZero, - Nan, -} - -impl ExtendedBigInt { - /// The integer number one. - pub fn one() -> Self { - // We would like to implement `num_traits::One`, but it requires - // a multiplication implementation, and we don't want to - // implement that here. - Self::BigInt(BigInt::one()) - } -} - -impl From for ExtendedBigInt { - fn from(big_decimal: ExtendedBigDecimal) -> Self { - match big_decimal { - // TODO When can this fail? - ExtendedBigDecimal::BigDecimal(x) => Self::BigInt(x.to_bigint().unwrap()), - ExtendedBigDecimal::Infinity => Self::Infinity, - ExtendedBigDecimal::MinusInfinity => Self::MinusInfinity, - ExtendedBigDecimal::MinusZero => Self::MinusZero, - ExtendedBigDecimal::Nan => Self::Nan, - } - } -} - -impl Display for ExtendedBigInt { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::BigInt(n) => n.fmt(f), - Self::Infinity => f32::INFINITY.fmt(f), - Self::MinusInfinity => f32::NEG_INFINITY.fmt(f), - Self::MinusZero => "-0".fmt(f), - Self::Nan => "nan".fmt(f), - } - } -} - -impl Zero for ExtendedBigInt { - fn zero() -> Self { - Self::BigInt(BigInt::zero()) - } - fn is_zero(&self) -> bool { - match self { - Self::BigInt(n) => n.is_zero(), - Self::MinusZero => true, - _ => false, - } - } -} - -impl Add for ExtendedBigInt { - type Output = Self; - - fn add(self, other: Self) -> Self { - match (self, other) { - (Self::BigInt(m), Self::BigInt(n)) => Self::BigInt(m.add(n)), - (Self::BigInt(_), Self::MinusInfinity) => Self::MinusInfinity, - (Self::BigInt(_), Self::Infinity) => Self::Infinity, - (Self::BigInt(_), Self::Nan) => Self::Nan, - (Self::BigInt(m), Self::MinusZero) => Self::BigInt(m), - (Self::Infinity, Self::BigInt(_)) => Self::Infinity, - (Self::Infinity, Self::Infinity) => Self::Infinity, - (Self::Infinity, Self::MinusZero) => Self::Infinity, - (Self::Infinity, Self::MinusInfinity) => Self::Nan, - (Self::Infinity, Self::Nan) => Self::Nan, - (Self::MinusInfinity, Self::BigInt(_)) => Self::MinusInfinity, - (Self::MinusInfinity, Self::MinusInfinity) => Self::MinusInfinity, - (Self::MinusInfinity, Self::MinusZero) => Self::MinusInfinity, - (Self::MinusInfinity, Self::Infinity) => Self::Nan, - (Self::MinusInfinity, Self::Nan) => Self::Nan, - (Self::Nan, _) => Self::Nan, - (Self::MinusZero, other) => other, - } - } -} - -impl PartialEq for ExtendedBigInt { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::BigInt(m), Self::BigInt(n)) => m.eq(n), - (Self::BigInt(_), Self::MinusInfinity) => false, - (Self::BigInt(_), Self::Infinity) => false, - (Self::BigInt(_), Self::Nan) => false, - (Self::BigInt(_), Self::MinusZero) => false, - (Self::Infinity, Self::BigInt(_)) => false, - (Self::Infinity, Self::Infinity) => true, - (Self::Infinity, Self::MinusZero) => false, - (Self::Infinity, Self::MinusInfinity) => false, - (Self::Infinity, Self::Nan) => false, - (Self::MinusInfinity, Self::BigInt(_)) => false, - (Self::MinusInfinity, Self::Infinity) => false, - (Self::MinusInfinity, Self::MinusZero) => false, - (Self::MinusInfinity, Self::MinusInfinity) => true, - (Self::MinusInfinity, Self::Nan) => false, - (Self::Nan, _) => false, - (Self::MinusZero, Self::BigInt(_)) => false, - (Self::MinusZero, Self::Infinity) => false, - (Self::MinusZero, Self::MinusZero) => true, - (Self::MinusZero, Self::MinusInfinity) => false, - (Self::MinusZero, Self::Nan) => false, - } - } -} - -impl PartialOrd for ExtendedBigInt { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::BigInt(m), Self::BigInt(n)) => m.partial_cmp(n), - (Self::BigInt(_), Self::MinusInfinity) => Some(Ordering::Greater), - (Self::BigInt(_), Self::Infinity) => Some(Ordering::Less), - (Self::BigInt(_), Self::Nan) => None, - (Self::BigInt(m), Self::MinusZero) => m.partial_cmp(&BigInt::zero()), - (Self::Infinity, Self::BigInt(_)) => Some(Ordering::Greater), - (Self::Infinity, Self::Infinity) => Some(Ordering::Equal), - (Self::Infinity, Self::MinusZero) => Some(Ordering::Greater), - (Self::Infinity, Self::MinusInfinity) => Some(Ordering::Greater), - (Self::Infinity, Self::Nan) => None, - (Self::MinusInfinity, Self::BigInt(_)) => Some(Ordering::Less), - (Self::MinusInfinity, Self::Infinity) => Some(Ordering::Less), - (Self::MinusInfinity, Self::MinusZero) => Some(Ordering::Less), - (Self::MinusInfinity, Self::MinusInfinity) => Some(Ordering::Equal), - (Self::MinusInfinity, Self::Nan) => None, - (Self::Nan, _) => None, - (Self::MinusZero, Self::BigInt(n)) => BigInt::zero().partial_cmp(n), - (Self::MinusZero, Self::Infinity) => Some(Ordering::Less), - (Self::MinusZero, Self::MinusZero) => Some(Ordering::Equal), - (Self::MinusZero, Self::MinusInfinity) => Some(Ordering::Greater), - (Self::MinusZero, Self::Nan) => None, - } - } -} - -#[cfg(test)] -mod tests { - - use num_bigint::BigInt; - use num_traits::Zero; - - use crate::extendedbigint::ExtendedBigInt; - - #[test] - fn test_addition_infinity() { - let summand1 = ExtendedBigInt::BigInt(BigInt::zero()); - let summand2 = ExtendedBigInt::Infinity; - assert_eq!(summand1 + summand2, ExtendedBigInt::Infinity); - } - - #[test] - fn test_addition_minus_infinity() { - let summand1 = ExtendedBigInt::BigInt(BigInt::zero()); - let summand2 = ExtendedBigInt::MinusInfinity; - assert_eq!(summand1 + summand2, ExtendedBigInt::MinusInfinity); - } - - #[test] - fn test_addition_nan() { - let summand1 = ExtendedBigInt::BigInt(BigInt::zero()); - let summand2 = ExtendedBigInt::Nan; - let sum = summand1 + summand2; - match sum { - ExtendedBigInt::Nan => (), - _ => unreachable!(), - } - } - - #[test] - fn test_display() { - assert_eq!(format!("{}", ExtendedBigInt::BigInt(BigInt::zero())), "0"); - assert_eq!(format!("{}", ExtendedBigInt::MinusZero), "-0"); - assert_eq!(format!("{}", ExtendedBigInt::Infinity), "inf"); - assert_eq!(format!("{}", ExtendedBigInt::MinusInfinity), "-inf"); - assert_eq!(format!("{}", ExtendedBigInt::Nan), "nan"); - } -} diff --git a/src/uu/seq/src/number.rs b/src/uu/seq/src/number.rs index 85bc327ff46..4da1146eff6 100644 --- a/src/uu/seq/src/number.rs +++ b/src/uu/seq/src/number.rs @@ -12,70 +12,6 @@ use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; -use crate::extendedbigint::ExtendedBigInt; - -/// An integral or floating point number. -#[derive(Debug, PartialEq)] -pub enum Number { - Int(ExtendedBigInt), - Float(ExtendedBigDecimal), -} - -impl Number { - /// Decide whether this number is zero (either positive or negative). - pub fn is_zero(&self) -> bool { - // We would like to implement `num_traits::Zero`, but it - // requires an addition implementation, and we don't want to - // implement that here. - match self { - Self::Int(n) => n.is_zero(), - Self::Float(x) => x.is_zero(), - } - } - - /// Convert this number into an `ExtendedBigDecimal`. - pub fn into_extended_big_decimal(self) -> ExtendedBigDecimal { - match self { - Self::Int(n) => ExtendedBigDecimal::from(n), - Self::Float(x) => x, - } - } - - /// The integer number one. - pub fn one() -> Self { - // We would like to implement `num_traits::One`, but it requires - // a multiplication implementation, and we don't want to - // implement that here. - Self::Int(ExtendedBigInt::one()) - } - - /// Round this number towards the given other number. - /// - /// If `other` is greater, then round up. If `other` is smaller, - /// then round down. - pub fn round_towards(self, other: &ExtendedBigInt) -> ExtendedBigInt { - match self { - // If this number is already an integer, it is already - // rounded to the nearest integer in the direction of - // `other`. - Self::Int(num) => num, - // Otherwise, if this number is a float, we need to decide - // whether `other` is larger or smaller than it, and thus - // whether to round up or round down, respectively. - Self::Float(num) => { - let other: ExtendedBigDecimal = From::from(other.clone()); - if other > num { - num.ceil() - } else { - // If they are equal, then `self` is already an - // integer, so calling `floor()` does no harm and - // will just return that integer anyway. - num.floor() - } - } - } - } -} /// A number with a specified number of integer and fractional digits. /// @@ -87,13 +23,13 @@ impl Number { /// You can get an instance of this struct by calling [`str::parse`]. #[derive(Debug)] pub struct PreciseNumber { - pub number: Number, + pub number: ExtendedBigDecimal, pub num_integral_digits: usize, pub num_fractional_digits: usize, } impl PreciseNumber { - pub fn new(number: Number, num_integral_digits: usize, num_fractional_digits: usize) -> Self { + pub fn new(number: ExtendedBigDecimal, num_integral_digits: usize, num_fractional_digits: usize) -> Self { Self { number, num_integral_digits, @@ -106,7 +42,7 @@ impl PreciseNumber { // We would like to implement `num_traits::One`, but it requires // a multiplication implementation, and we don't want to // implement that here. - Self::new(Number::one(), 1, 0) + Self::new(ExtendedBigDecimal::one(), 1, 0) } /// Decide whether this number is zero (either positive or negative). diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 3f4b213955f..a82d1e88776 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -16,8 +16,6 @@ use num_traits::Num; use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; -use crate::extendedbigint::ExtendedBigInt; -use crate::number::Number; use crate::number::PreciseNumber; /// An error returned when parsing a number fails. @@ -29,8 +27,8 @@ pub enum ParseNumberError { } /// Decide whether a given string and its parsed `BigInt` is negative zero. -fn is_minus_zero_int(s: &str, n: &BigInt) -> bool { - s.starts_with('-') && n == &BigInt::zero() +fn is_minus_zero_int(s: &str, n: &BigDecimal) -> bool { + s.starts_with('-') && n == &BigDecimal::zero() } /// Decide whether a given string and its parsed `BigDecimal` is negative zero. @@ -53,19 +51,19 @@ fn is_minus_zero_float(s: &str, x: &BigDecimal) -> bool { /// assert_eq!(actual, expected); /// ``` fn parse_no_decimal_no_exponent(s: &str) -> Result { - match s.parse::() { + match s.parse::() { Ok(n) => { // If `s` is '-0', then `parse()` returns `BigInt::zero()`, // but we need to return `Number::MinusZeroInt` instead. if is_minus_zero_int(s, &n) { Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::MinusZero), + ExtendedBigDecimal::MinusZero, s.len(), 0, )) } else { Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::BigInt(n)), + ExtendedBigDecimal::BigDecimal(n), s.len(), 0, )) @@ -79,7 +77,7 @@ fn parse_no_decimal_no_exponent(s: &str) -> Result return Err(ParseNumberError::Nan), _ => return Err(ParseNumberError::Float), }; - Ok(PreciseNumber::new(Number::Float(float_val), 0, 0)) + Ok(PreciseNumber::new(float_val, 0, 0)) } } } @@ -125,13 +123,13 @@ fn parse_exponent_no_decimal(s: &str, j: usize) -> Result Result() + .parse::() .map_err(|_| ParseNumberError::Float)?; Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::BigInt(n)), + ExtendedBigDecimal::BigDecimal(n), num_integral_digits, num_fractional_digits, )) } } else if is_minus_zero_float(s, &val) { Ok(PreciseNumber::new( - Number::Float(ExtendedBigDecimal::MinusZero), + ExtendedBigDecimal::MinusZero, num_integral_digits, num_fractional_digits, )) } else { Ok(PreciseNumber::new( - Number::Float(ExtendedBigDecimal::BigDecimal(val)), + ExtendedBigDecimal::BigDecimal(val), num_integral_digits, num_fractional_digits, )) @@ -303,20 +301,17 @@ fn parse_hexadecimal(s: &str) -> Result { } let num = BigInt::from_str_radix(s, 16).map_err(|_| ParseNumberError::Hex)?; + let num = BigDecimal::from(num); - match (is_neg, num == BigInt::zero()) { - (true, true) => Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::MinusZero), - 2, - 0, - )), + match (is_neg, num == BigDecimal::zero()) { + (true, true) => Ok(PreciseNumber::new(ExtendedBigDecimal::MinusZero, 2, 0)), (true, false) => Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::BigInt(-num)), + ExtendedBigDecimal::BigDecimal(-num), 0, 0, )), (false, _) => Ok(PreciseNumber::new( - Number::Int(ExtendedBigInt::BigInt(num)), + ExtendedBigDecimal::BigDecimal(num), 0, 0, )), @@ -364,19 +359,14 @@ impl FromStr for PreciseNumber { #[cfg(test)] mod tests { - use bigdecimal::BigDecimal; - use num_bigint::BigInt; - use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; - use crate::extendedbigint::ExtendedBigInt; - use crate::number::Number; use crate::number::PreciseNumber; use crate::numberparse::ParseNumberError; /// Convenience function for parsing a [`Number`] and unwrapping. - fn parse(s: &str) -> Number { + fn parse(s: &str) -> ExtendedBigDecimal { s.parse::().unwrap().number } @@ -392,40 +382,37 @@ mod tests { #[test] fn test_parse_minus_zero_int() { - assert_eq!(parse("-0e0"), Number::Int(ExtendedBigInt::MinusZero)); - assert_eq!(parse("-0e-0"), Number::Int(ExtendedBigInt::MinusZero)); - assert_eq!(parse("-0e1"), Number::Int(ExtendedBigInt::MinusZero)); - assert_eq!(parse("-0e+1"), Number::Int(ExtendedBigInt::MinusZero)); - assert_eq!(parse("-0.0e1"), Number::Int(ExtendedBigInt::MinusZero)); - assert_eq!(parse("-0x0"), Number::Int(ExtendedBigInt::MinusZero)); + assert_eq!(parse("-0e0"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0e-0"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0e1"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0e+1"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0.0e1"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0x0"), ExtendedBigDecimal::MinusZero); } #[test] fn test_parse_minus_zero_float() { - assert_eq!(parse("-0.0"), Number::Float(ExtendedBigDecimal::MinusZero)); - assert_eq!(parse("-0e-1"), Number::Float(ExtendedBigDecimal::MinusZero)); - assert_eq!( - parse("-0.0e-1"), - Number::Float(ExtendedBigDecimal::MinusZero) - ); + assert_eq!(parse("-0.0"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0e-1"), ExtendedBigDecimal::MinusZero); + assert_eq!(parse("-0.0e-1"), ExtendedBigDecimal::MinusZero); } #[test] fn test_parse_big_int() { - assert_eq!(parse("0"), Number::Int(ExtendedBigInt::zero())); - assert_eq!(parse("0.1e1"), Number::Int(ExtendedBigInt::one())); + assert_eq!(parse("0"), ExtendedBigDecimal::zero()); + assert_eq!(parse("0.1e1"), ExtendedBigDecimal::one()); assert_eq!( parse("1.0e1"), - Number::Int(ExtendedBigInt::BigInt("10".parse::().unwrap())) + ExtendedBigDecimal::BigDecimal("10".parse::().unwrap()) ); } #[test] fn test_parse_hexadecimal_big_int() { - assert_eq!(parse("0x0"), Number::Int(ExtendedBigInt::zero())); + assert_eq!(parse("0x0"), ExtendedBigDecimal::zero()); assert_eq!( parse("0x10"), - Number::Int(ExtendedBigInt::BigInt("16".parse::().unwrap())) + ExtendedBigDecimal::BigDecimal("16".parse::().unwrap()) ); } @@ -433,56 +420,34 @@ mod tests { fn test_parse_big_decimal() { assert_eq!( parse("0.0"), - Number::Float(ExtendedBigDecimal::BigDecimal( - "0.0".parse::().unwrap() - )) + ExtendedBigDecimal::BigDecimal("0.0".parse::().unwrap()) ); assert_eq!( parse(".0"), - Number::Float(ExtendedBigDecimal::BigDecimal( - "0.0".parse::().unwrap() - )) + ExtendedBigDecimal::BigDecimal("0.0".parse::().unwrap()) ); assert_eq!( parse("1.0"), - Number::Float(ExtendedBigDecimal::BigDecimal( - "1.0".parse::().unwrap() - )) + ExtendedBigDecimal::BigDecimal("1.0".parse::().unwrap()) ); assert_eq!( parse("10e-1"), - Number::Float(ExtendedBigDecimal::BigDecimal( - "1.0".parse::().unwrap() - )) + ExtendedBigDecimal::BigDecimal("1.0".parse::().unwrap()) ); assert_eq!( parse("-1e-3"), - Number::Float(ExtendedBigDecimal::BigDecimal( - "-0.001".parse::().unwrap() - )) + ExtendedBigDecimal::BigDecimal("-0.001".parse::().unwrap()) ); } #[test] fn test_parse_inf() { - assert_eq!(parse("inf"), Number::Float(ExtendedBigDecimal::Infinity)); - assert_eq!( - parse("infinity"), - Number::Float(ExtendedBigDecimal::Infinity) - ); - assert_eq!(parse("+inf"), Number::Float(ExtendedBigDecimal::Infinity)); - assert_eq!( - parse("+infinity"), - Number::Float(ExtendedBigDecimal::Infinity) - ); - assert_eq!( - parse("-inf"), - Number::Float(ExtendedBigDecimal::MinusInfinity) - ); - assert_eq!( - parse("-infinity"), - Number::Float(ExtendedBigDecimal::MinusInfinity) - ); + assert_eq!(parse("inf"), ExtendedBigDecimal::Infinity); + assert_eq!(parse("infinity"), ExtendedBigDecimal::Infinity); + assert_eq!(parse("+inf"), ExtendedBigDecimal::Infinity); + assert_eq!(parse("+infinity"), ExtendedBigDecimal::Infinity); + assert_eq!(parse("-inf"), ExtendedBigDecimal::MinusInfinity); + assert_eq!(parse("-infinity"), ExtendedBigDecimal::MinusInfinity); } #[test] diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index bb4d5414ef8..a987405ce15 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -3,24 +3,21 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse -use std::io::{stdout, Write}; +use std::io::{stdout, ErrorKind, Write}; use clap::{crate_version, Arg, ArgAction, Command}; -use num_traits::{Zero, ToPrimitive}; +use num_traits::{ToPrimitive, Zero}; -use uucore::error::UResult; -use uucore::format::{printf, FormatArgument, Format, num_format}; +use uucore::error::{FromIo, UResult}; +use uucore::format::{num_format, Format}; use uucore::{format_usage, help_about, help_usage}; mod error; mod extendedbigdecimal; -mod extendedbigint; mod number; mod numberparse; use crate::error::SeqError; use crate::extendedbigdecimal::ExtendedBigDecimal; -use crate::extendedbigint::ExtendedBigInt; -use crate::number::Number; use crate::number::PreciseNumber; const ABOUT: &str = help_about!("seq.md"); @@ -41,11 +38,6 @@ struct SeqOptions<'a> { format: Option<&'a str>, } -/// A range of integers. -/// -/// The elements are (first, increment, last). -type RangeInt = (ExtendedBigInt, ExtendedBigInt, ExtendedBigInt); - /// A range of floats. /// /// The elements are (first, increment, last). @@ -116,53 +108,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .num_fractional_digits .max(increment.num_fractional_digits); - let result = match (first.number, increment.number, last.number) { - (Number::Int(first), Number::Int(increment), last) => { - let last = last.round_towards(&first); - let format = match options.format { - Some(f) => { - let f = Format::::parse(f)?; - Some(f) - } - None => None, - }; - print_seq_integers( - (first, increment, last), - &options.separator, - &options.terminator, - options.equal_width, - padding, - format, - ) - } - (first, increment, last) => { - let format = match options.format { - Some(f) => { - let f = Format::::parse(f)?; - Some(f) - } - None => None, - }; - print_seq( - ( - first.into_extended_big_decimal(), - increment.into_extended_big_decimal(), - last.into_extended_big_decimal(), - ), - largest_dec, - &options.separator, - &options.terminator, - options.equal_width, - padding, - format, - ) + let format = match options.format { + Some(f) => { + let f = Format::::parse(f)?; + Some(f) } + None => None, }; + let result = print_seq( + (first.number, increment.number, last.number), + largest_dec, + &options.separator, + &options.terminator, + options.equal_width, + padding, + format, + ); match result { Ok(_) => Ok(()), - _ => todo!(), - // Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), - // Err(e) => Err(e.map_err_context(|| "write error".into())), + Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), + Err(e) => Err(e.map_err_context(|| "write error".into())), } } @@ -230,28 +195,6 @@ fn write_value_float( write!(writer, "{value_as_str}") } -/// Write a big int formatted according to the given parameters. -fn write_value_int( - writer: &mut impl Write, - value: &ExtendedBigInt, - width: usize, - pad: bool, -) -> std::io::Result<()> { - let value_as_str = if pad { - if *value == ExtendedBigInt::MinusZero { - format!("{value:00width$}") - } - } else { - format!("{value}") - }; - write!(writer, "{value_as_str}") -} - -// TODO `print_seq()` and `print_seq_integers()` are nearly identical, -// they could be refactored into a single more general function. - /// Floating point based code path fn print_seq( range: RangeFloat, @@ -261,12 +204,16 @@ fn print_seq( pad: bool, padding: usize, format: Option>, -) -> UResult<()> { +) -> std::io::Result<()> { let stdout = stdout(); let mut stdout = stdout.lock(); let (first, increment, last) = range; let mut value = first; - let padding = if pad { padding + 1 + largest_dec } else { 0 }; + let padding = if pad { + padding + if largest_dec > 0 { largest_dec + 1 } else { 0 } + } else { + 0 + }; let mut is_first_iteration = true; while !done_printing(&value, &increment, &last) { if !is_first_iteration { @@ -307,65 +254,3 @@ fn print_seq( stdout.flush()?; Ok(()) } - -/// Print an integer sequence. -/// -/// This function prints a sequence of integers defined by `range`, -/// which defines the first integer, last integer, and increment of the -/// range. The `separator` is inserted between each integer and -/// `terminator` is inserted at the end. -/// -/// The `pad` parameter indicates whether to pad numbers to the width -/// given in `padding`. -/// -/// If `is_first_minus_zero` is `true`, then the `first` parameter is -/// printed as if it were negative zero, even though no such number -/// exists as an integer (negative zero only exists for floating point -/// numbers). Only set this to `true` if `first` is actually zero. -fn print_seq_integers( - range: RangeInt, - separator: &str, - terminator: &str, - pad: bool, - padding: usize, - format: Option>, -) -> UResult<()> { - let stdout = stdout(); - let mut stdout = stdout.lock(); - let (first, increment, last) = range; - let mut value = first; - let mut is_first_iteration = true; - while !done_printing(&value, &increment, &last) { - if !is_first_iteration { - write!(stdout, "{separator}")?; - } - // If there was an argument `-f FORMAT`, then use that format - // template instead of the default formatting strategy. - // - // The `printf()` function takes in the template and - // the current value and writes the result to `stdout`. - // - // TODO See similar comment about formatting in `print_seq()`. - match &format { - Some(f) => { - let int = match &value { - ExtendedBigInt::BigInt(bi) => bi.to_i64().unwrap(), - ExtendedBigInt::Infinity => todo!(), - ExtendedBigInt::MinusInfinity => todo!(), - ExtendedBigInt::MinusZero => todo!(), - ExtendedBigInt::Nan => todo!(), - }; - f.fmt(&mut stdout, int)?; - } - None => write_value_int(&mut stdout, &value, padding, pad)?, - } - // TODO Implement augmenting addition. - value = value + increment.clone(); - is_first_iteration = false; - } - - if !is_first_iteration { - write!(stdout, "{terminator}")?; - } - Ok(()) -} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 48151be9892..d6500b20c5a 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -115,7 +115,7 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator { let spec = match Spec::parse(&mut rest) { Some(spec) => spec, - None => return Some(Err(FormatError::SpecError)), + None => return Some(Err(dbg!(FormatError::SpecError))), }; Some(Ok(FormatItem::Spec(spec))) } @@ -230,7 +230,7 @@ impl Format { for item in &mut iter { match item? { FormatItem::Spec(_) => { - return Err(FormatError::SpecError); + return Err(dbg!(FormatError::SpecError)); } FormatItem::Text(t) => suffix.extend_from_slice(&t), FormatItem::Char(c) => suffix.push(c), diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index fd010bdc029..046249a13d2 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -13,14 +13,14 @@ pub trait Formatter { Self: Sized; } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum UnsignedIntVariant { Decimal, Octal(Prefix), Hexadecimal(Case, Prefix), } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum FloatVariant { Decimal, @@ -29,32 +29,32 @@ pub enum FloatVariant { Hexadecimal, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Case { Lowercase, Uppercase, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Prefix { No, Yes, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ForceDecimal { No, Yes, } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum PositiveSign { None, Plus, Space, } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum NumberAlignment { Left, RightSpace, @@ -93,7 +93,7 @@ impl Formatter for SignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(dbg!(FormatError::SpecError)); }; let width = match width { @@ -152,7 +152,7 @@ impl Formatter for UnsignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(dbg!(FormatError::SpecError)); }; let width = match width { @@ -241,19 +241,19 @@ impl Formatter for Float { precision, } = s else { - return Err(FormatError::SpecError); + return Err(dbg!(FormatError::SpecError)); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), }; Ok(Self { diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 9c53669fa9c..abc9b7a875a 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -9,6 +9,7 @@ use super::{ }; use std::{fmt::Display, io::Write}; +#[derive(Debug)] pub enum Spec { Char { width: Option>, @@ -41,7 +42,7 @@ pub enum Spec { /// Precision and width specified might use an asterisk to indicate that they are /// determined by an argument. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum CanAsterisk { Fixed(T), Asterisk, @@ -99,6 +100,7 @@ impl Spec { let width = eat_asterisk_or_number(rest); let precision = if let Some(b'.') = rest.get(0) { + *rest = &rest[1..]; Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0))) } else { None @@ -134,7 +136,9 @@ impl Spec { *rest = &rest[1..]; } - Some(match rest.get(0)? { + let type_spec = rest.get(0)?; + *rest = &rest[1..]; + Some(match type_spec { b'c' => Spec::Char { width, align_left: minus, @@ -208,7 +212,10 @@ impl Spec { (false, false) => PositiveSign::None, }, }, - _ => return None, + x => { + dbg!("{:b}", x); + return dbg!(None) + }, }) } From 2f9fcf73faad9d60db6f08c2e9ecd57fa845b0bd Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 16 Nov 2023 16:02:38 +0100 Subject: [PATCH 028/429] clippy: fix warnings introduced by Rust 1.74 --- src/uu/more/src/more.rs | 2 +- tests/by-util/test_dd.rs | 4 ++-- tests/by-util/test_ls.rs | 6 +++--- tests/by-util/test_users.rs | 6 ++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 02ed0feea20..b21b2ab1f35 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -88,7 +88,7 @@ impl Options { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let args = args.collect_lossy(); - let matches = match uu_app().try_get_matches_from(&args) { + let matches = match uu_app().try_get_matches_from(args) { Ok(m) => m, Err(e) => return Err(e.into()), }; diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index f560e35261b..d5ac8dc801c 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1470,7 +1470,7 @@ fn test_seek_output_fifo() { .args(&["count=0", "seek=1", "of=fifo", "status=noxfer"]) .run_no_wait(); - std::fs::write(at.plus("fifo"), &vec![0; 512]).unwrap(); + std::fs::write(at.plus("fifo"), vec![0; 512]).unwrap(); child .wait() @@ -1492,7 +1492,7 @@ fn test_skip_input_fifo() { .args(&["count=0", "skip=1", "if=fifo", "status=noxfer"]) .run_no_wait(); - std::fs::write(at.plus("fifo"), &vec![0; 512]).unwrap(); + std::fs::write(at.plus("fifo"), vec![0; 512]).unwrap(); child .wait() diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index cdd0292e1f2..07ea8c9cd63 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -994,9 +994,9 @@ fn test_ls_long() { fn test_ls_long_format() { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; - at.mkdir(&at.plus_as_string("test-long-dir")); + at.mkdir(at.plus_as_string("test-long-dir")); at.touch(at.plus_as_string("test-long-dir/test-long-file")); - at.mkdir(&at.plus_as_string("test-long-dir/test-long-dir")); + at.mkdir(at.plus_as_string("test-long-dir/test-long-dir")); for arg in LONG_ARGS { // Assuming sane username do not have spaces within them. @@ -1971,7 +1971,7 @@ fn test_ls_color() { .join("nested_dir") .to_string_lossy() .to_string(); - at.mkdir(&nested_dir); + at.mkdir(nested_dir); at.mkdir("z"); let nested_file = Path::new("a") .join("nested_file") diff --git a/tests/by-util/test_users.rs b/tests/by-util/test_users.rs index 766378a9dca..3d87aa9d068 100644 --- a/tests/by-util/test_users.rs +++ b/tests/by-util/test_users.rs @@ -21,11 +21,9 @@ fn test_users_check_name() { #[cfg(target_os = "linux")] let util_name = util_name!(); #[cfg(target_vendor = "apple")] - let util_name = format!("g{}", util_name!()); + let util_name = &format!("g{}", util_name!()); - // note: clippy::needless_borrow *false positive* - #[allow(clippy::needless_borrow)] - let expected = TestScenario::new(&util_name) + let expected = TestScenario::new(util_name) .cmd(util_name) .env("LC_ALL", "C") .succeeds() From eaf500637900a47c4e00497aaccccf3d6d7dd5c8 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 16 Nov 2023 17:00:41 +0100 Subject: [PATCH 029/429] printf: parse arguments and handle escape codes --- src/uu/printf/src/printf.rs | 23 ++- .../src/lib/features/format/argument.rs | 60 ++++++ src/uucore/src/lib/features/format/escape.rs | 100 ++++++++++ src/uucore/src/lib/features/format/mod.rs | 186 +++++++++++------- .../src/lib/features/format/num_format.rs | 10 +- src/uucore/src/lib/features/format/spec.rs | 29 ++- 6 files changed, 317 insertions(+), 91 deletions(-) create mode 100644 src/uucore/src/lib/features/format/argument.rs create mode 100644 src/uucore/src/lib/features/format/escape.rs diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 6e270ec2645..00d03816e4c 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -6,9 +6,12 @@ // spell-checker:ignore (change!) each's // spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr +use std::io::stdout; +use std::ops::ControlFlow; + use clap::{crate_version, Arg, ArgAction, Command}; use uucore::error::{UResult, UUsageError}; -use uucore::format::{printf, FormatArgument}; +use uucore::format::{parse_spec_and_escape, FormatArgument}; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; @@ -30,12 +33,28 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format_string = matches .get_one::(options::FORMATSTRING) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), None => vec![], }; - printf(format_string, &values)?; + let mut args = values.iter().peekable(); + for item in parse_spec_and_escape(format_string.as_ref()) { + match item?.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => break, + }; + } + + while args.peek().is_some() { + for item in parse_spec_and_escape(format_string.as_ref()) { + match item?.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => break, + }; + } + } Ok(()) } diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs new file mode 100644 index 00000000000..007f519c2a4 --- /dev/null +++ b/src/uucore/src/lib/features/format/argument.rs @@ -0,0 +1,60 @@ +#[derive(Clone, Debug)] +pub enum FormatArgument { + Char(char), + String(String), + UnsignedInt(u64), + SignedInt(i64), + Float(f64), + /// Special argument that gets coerced into the other variants + Unparsed(String), +} + +impl FormatArgument { + pub fn get_char(&self) -> Option { + match self { + Self::Char(c) => Some(*c), + Self::Unparsed(s) => { + let mut chars = s.chars(); + let Some(c) = chars.next() else { + return None; + }; + let None = chars.next() else { + return None; + }; + Some(c) + } + _ => None, + } + } + + pub fn get_u64(&self) -> Option { + match self { + Self::UnsignedInt(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_i64(&self) -> Option { + match self { + Self::SignedInt(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_f64(&self) -> Option { + match self { + Self::Float(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_str(&self) -> Option<&str> { + match self { + Self::Unparsed(s) | Self::String(s) => Some(s), + _ => None, + } + } +} \ No newline at end of file diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs new file mode 100644 index 00000000000..b8c21741caf --- /dev/null +++ b/src/uucore/src/lib/features/format/escape.rs @@ -0,0 +1,100 @@ +#[derive(Debug)] +pub enum EscapedChar { + Char(u8), + Backslash(u8), + End, +} + +#[repr(u8)] +#[derive(Clone, Copy)] +enum Base { + Oct = 8, + Hex = 16, +} + +impl Base { + fn max_digits(&self) -> u8 { + match self { + Self::Oct => 3, + Self::Hex => 2, + } + } + + fn to_digit(&self, c: u8) -> Option { + match self { + Base::Oct => { + if matches!(c, b'0'..=b'7') { + Some(c - b'0') + } else { + None + } + } + Base::Hex => match c { + b'0'..=b'9' => Some(c - b'0'), + b'A'..=b'F' => Some(c - b'A' + 10), + b'a'..=b'f' => Some(c - b'a' + 10), + _ => None, + }, + } + } +} + +/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences +fn parse_code(input: &mut &[u8], base: Base) -> Option { + // All arithmetic on `ret` needs to be wrapping, because octal input can + // take 3 digits, which is 9 bits, and therefore more than what fits in a + // `u8`. GNU just seems to wrap these values. + // Note that if we instead make `ret` a `u32` and use `char::from_u32` will + // yield incorrect results because it will interpret values larger than + // `u8::MAX` as unicode. + let [c, rest @ ..] = input else { return None }; + let mut ret = base.to_digit(*c)?; + *input = &rest[..]; + + for _ in 1..base.max_digits() { + let [c, rest @ ..] = input else { break }; + let Some(n) = base.to_digit(*c) else { break }; + ret = ret.wrapping_mul(base as u8).wrapping_add(n); + *input = &rest[..]; + } + + Some(ret) +} + +pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { + if let [c, new_rest @ ..] = rest { + // This is for the \NNN syntax for octal sequences. + // Note that '0' is intentionally omitted because that + // would be the \0NNN syntax. + if let b'1'..=b'7' = c { + if let Some(parsed) = parse_code(rest, Base::Oct) { + return EscapedChar::Char(parsed); + } + } + + *rest = &new_rest[..]; + match c { + b'\\' => EscapedChar::Char(b'\\'), + b'a' => EscapedChar::Char(b'\x07'), + b'b' => EscapedChar::Char(b'\x08'), + b'c' => return EscapedChar::End, + b'e' => EscapedChar::Char(b'\x1b'), + b'f' => EscapedChar::Char(b'\x0c'), + b'n' => EscapedChar::Char(b'\n'), + b'r' => EscapedChar::Char(b'\r'), + b't' => EscapedChar::Char(b'\t'), + b'v' => EscapedChar::Char(b'\x0b'), + b'x' => { + if let Some(c) = parse_code(rest, Base::Hex) { + EscapedChar::Char(c) + } else { + EscapedChar::Backslash(b'x') + } + } + b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + c => EscapedChar::Backslash(*c), + } + } else { + EscapedChar::Char(b'\\') + } +} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index d6500b20c5a..8fa8d0717e1 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -8,8 +8,19 @@ //! [`Format`] struct, which represents a parsed format string. This reduces //! the need for parsing a format string multiple times and assures that no //! parsing errors occur during writing. +//! +//! There are three kinds of parsing that we might want to do: +//! +//! 1. Only `printf` specifiers (for e.g. `seq`, `dd`) +//! 2. Only escape sequences (for e.g. `echo`) +//! 3. Both `printf` specifiers and escape sequences (for e.g. `printf`) +//! +//! This module aims to combine all three use cases. + // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety +mod escape; +mod argument; pub mod num_format; mod spec; @@ -18,11 +29,16 @@ use std::{ error::Error, fmt::Display, io::{stdout, Write}, + ops::ControlFlow, }; +pub use argument::*; use crate::error::UError; -use self::num_format::Formatter; +use self::{ + escape::{parse_escape_code, EscapedChar}, + num_format::Formatter, +}; #[derive(Debug)] pub enum FormatError { @@ -54,80 +70,116 @@ impl Display for FormatError { } /// A single item to format -enum FormatItem { +pub enum FormatItem { /// A format specifier Spec(Spec), - /// Some plain text - Text(Vec), /// A single character - /// - /// Added in addition to `Text` as an optimization. - Char(u8), + Char(C), +} + +pub trait FormatChar { + fn write(&self, writer: impl Write) -> std::io::Result>; +} + +impl FormatChar for u8 { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + writer.write(&[*self])?; + Ok(ControlFlow::Continue(())) + } } -#[derive(Clone, Debug)] -pub enum FormatArgument { - Char(char), - String(String), - UnsignedInt(u64), - SignedInt(i64), - Float(f64), - // Special argument that gets coerced into the other variants - Unparsed(String), +impl FormatChar for EscapedChar { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + match self { + EscapedChar::Char(c) => { + writer.write(&[*c])?; + } + EscapedChar::Backslash(c) => { + writer.write(&[b'\\', *c])?; + } + EscapedChar::End => return Ok(ControlFlow::Break(())), + } + Ok(ControlFlow::Continue(())) + } } -impl FormatItem { - fn write<'a>( +impl FormatItem { + pub fn write<'a>( &self, - mut writer: impl Write, + writer: impl Write, args: &mut impl Iterator, - ) -> Result<(), FormatError> { + ) -> Result, FormatError> { match self { - FormatItem::Spec(spec) => spec.write(writer, args), - FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), - FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError), - } + FormatItem::Spec(spec) => spec.write(writer, args)?, + FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError), + }; + Ok(ControlFlow::Continue(())) } } -fn parse_iter(fmt: &[u8]) -> impl Iterator> + '_ { - let mut rest = fmt; - std::iter::from_fn(move || { - if rest.is_empty() { - return None; +pub fn parse_spec_and_escape( + fmt: &[u8], +) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Char(b'%')))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) } + [b'\\', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(parse_escape_code(&mut current)))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Char(*c)))) + } + }) +} - match rest.iter().position(|c| *c == b'%') { - None => { - let final_text = rest; - rest = &[]; - Some(Ok(FormatItem::Text(final_text.into()))) - } - Some(0) => { - // Handle the spec - rest = &rest[1..]; - match rest.get(0) { - None => Some(Ok(FormatItem::Char(b'%'))), - Some(b'%') => { - rest = &rest[1..]; - Some(Ok(FormatItem::Char(b'%'))) - } - Some(_) => { - let spec = match Spec::parse(&mut rest) { - Some(spec) => spec, - None => return Some(Err(dbg!(FormatError::SpecError))), - }; - Some(Ok(FormatItem::Spec(spec))) - } - } - } - Some(i) => { - // The `after` slice includes the % so it will be handled correctly - // in the next iteration. - let (before, after) = rest.split_at(i); - rest = after; - return Some(Ok(FormatItem::Text(before.into()))); - } +fn parse_spec_only(fmt: &[u8]) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(b'%'))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(*c))) + } + }) +} + +fn parse_escape_only(fmt: &[u8]) -> impl Iterator> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'\\', rest @ ..] => { + current = rest; + Some(Ok(parse_escape_code(&mut current))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(EscapedChar::Char(*c))) } }) } @@ -144,7 +196,7 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator( @@ -160,7 +212,7 @@ fn printf_writer<'a>( args: impl IntoIterator, ) -> Result<(), FormatError> { let mut args = args.into_iter(); - for item in parse_iter(format_string.as_ref()) { + for item in parse_spec_only(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } Ok(()) @@ -191,10 +243,10 @@ pub fn sprintf<'a>( } /// A parsed format for a single float value -/// +/// /// This is used by `seq`. It can be constructed with [`FloatFormat::parse`] /// and can write a value with [`FloatFormat::fmt`]. -/// +/// /// It can only accept a single specification without any asterisk parameters. /// If it does get more specifications, it will return an error. pub struct Format { @@ -205,7 +257,7 @@ pub struct Format { impl Format { pub fn parse(format_string: impl AsRef<[u8]>) -> Result { - let mut iter = parse_iter(format_string.as_ref()); + let mut iter = parse_spec_only(format_string.as_ref()); let mut prefix = Vec::new(); let mut spec = None; @@ -215,7 +267,6 @@ impl Format { spec = Some(s); break; } - FormatItem::Text(t) => prefix.extend_from_slice(&t), FormatItem::Char(c) => prefix.push(c), } } @@ -230,9 +281,8 @@ impl Format { for item in &mut iter { match item? { FormatItem::Spec(_) => { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); } - FormatItem::Text(t) => suffix.extend_from_slice(&t), FormatItem::Char(c) => suffix.push(c), } } diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 046249a13d2..339b522091d 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -93,7 +93,7 @@ impl Formatter for SignedInt { alignment, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { @@ -152,7 +152,7 @@ impl Formatter for UnsignedInt { alignment, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { @@ -241,19 +241,19 @@ impl Formatter for Float { precision, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; Ok(Self { diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index abc9b7a875a..258005bb5e6 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -212,10 +212,7 @@ impl Spec { (false, false) => PositiveSign::None, }, }, - x => { - dbg!("{:b}", x); - return dbg!(None) - }, + _ => return None, }) } @@ -228,16 +225,16 @@ impl Spec { &Spec::Char { width, align_left } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - match arg { - FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), + match arg.get_char() { + Some(c) => write_padded(writer, c, width, false, align_left), _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::String { width, align_left } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - match arg { - FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), + match arg.get_str() { + Some(s) => write_padded(writer, s, width, false, align_left), _ => Err(FormatError::InvalidArgument(arg.clone())), } } @@ -249,7 +246,7 @@ impl Spec { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - let FormatArgument::SignedInt(i) = arg else { + let Some(i) = arg.get_i64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -258,7 +255,7 @@ impl Spec { positive_sign, alignment, } - .fmt(writer, *i) + .fmt(writer, i) .map_err(FormatError::IoError) } &Spec::UnsignedInt { @@ -269,7 +266,7 @@ impl Spec { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(args)?; - let FormatArgument::UnsignedInt(i) = arg else { + let Some(i) = arg.get_u64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -278,7 +275,7 @@ impl Spec { width, alignment, } - .fmt(writer, *i) + .fmt(writer, i) .map_err(FormatError::IoError) } &Spec::Float { @@ -294,7 +291,7 @@ impl Spec { let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); let arg = next_arg(args)?; - let FormatArgument::Float(f) = arg else { + let Some(f) = arg.get_f64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -307,7 +304,7 @@ impl Spec { alignment, precision, } - .fmt(writer, *f) + .fmt(writer, f) .map_err(FormatError::IoError) } } @@ -322,8 +319,8 @@ fn resolve_asterisk<'a>( None => None, Some(CanAsterisk::Asterisk) => { let arg = next_arg(args)?; - match arg { - FormatArgument::UnsignedInt(u) => match usize::try_from(*u) { + match arg.get_u64() { + Some(u) => match usize::try_from(u) { Ok(u) => Some(u), Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), }, From a45ff8ca73060939983379f5d973ac3da4c75330 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:39:39 +0100 Subject: [PATCH 030/429] printf: more flexible parsing of unparsed arguments --- .../src/lib/features/format/argument.rs | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 007f519c2a4..644546c38b7 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -30,23 +30,51 @@ impl FormatArgument { pub fn get_u64(&self) -> Option { match self { Self::UnsignedInt(n) => Some(*n), - Self::Unparsed(s) => s.parse().ok(), + Self::Unparsed(s) => { + if let Some(s) = s.strip_prefix("0x") { + u64::from_str_radix(s, 16).ok() + } else if let Some(s) = s.strip_prefix("0") { + u64::from_str_radix(s, 8).ok() + } else if let Some(s) = s.strip_prefix('\'') { + Some(s.chars().next()? as u64) + } else { + s.parse().ok() + } + } _ => None, } } - + pub fn get_i64(&self) -> Option { match self { Self::SignedInt(n) => Some(*n), - Self::Unparsed(s) => s.parse().ok(), + Self::Unparsed(s) => { + if let Some(s) = s.strip_prefix("0x") { + i64::from_str_radix(s, 16).ok() + } else if let Some(s) = s.strip_prefix("0") { + i64::from_str_radix(s, 8).ok() + } else if let Some(s) = s.strip_prefix('\'') { + Some(s.chars().next()? as i64) + } else { + s.parse().ok() + } + } _ => None, } } - + pub fn get_f64(&self) -> Option { match self { Self::Float(n) => Some(*n), - Self::Unparsed(s) => s.parse().ok(), + Self::Unparsed(s) => { + if s.starts_with("0x") || s.starts_with("-0x") { + unimplemented!("Hexadecimal floats are unimplemented!") + } else if let Some(s) = s.strip_prefix('\'') { + Some(s.chars().next()? as u64 as f64) + } else { + s.parse().ok() + } + } _ => None, } } @@ -57,4 +85,4 @@ impl FormatArgument { _ => None, } } -} \ No newline at end of file +} From cd0c24af07d1412a746c5dbfe8a3df0a8cb56191 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:41:14 +0100 Subject: [PATCH 031/429] printf: implement %b --- src/uucore/src/lib/features/format/mod.rs | 6 +-- src/uucore/src/lib/features/format/spec.rs | 43 +++++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 8fa8d0717e1..7417d48fad5 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -169,17 +169,17 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo }) } -fn parse_escape_only(fmt: &[u8]) -> impl Iterator> + '_ { +fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => return None, [b'\\', rest @ ..] => { current = rest; - Some(Ok(parse_escape_code(&mut current))) + Some(parse_escape_code(&mut current)) } [c, rest @ ..] => { current = rest; - Some(Ok(EscapedChar::Char(*c))) + Some(EscapedChar::Char(*c)) } }) } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 258005bb5e6..dc55bc653e6 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -5,9 +5,9 @@ use super::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, - FormatArgument, FormatError, + parse_escape_only, FormatArgument, FormatChar, FormatError, }; -use std::{fmt::Display, io::Write}; +use std::{fmt::Display, io::Write, ops::ControlFlow}; #[derive(Debug)] pub enum Spec { @@ -17,6 +17,7 @@ pub enum Spec { }, String { width: Option>, + parse_escape: bool, align_left: bool, }, SignedInt { @@ -145,6 +146,12 @@ impl Spec { }, b's' => Spec::String { width, + parse_escape: false, + align_left: minus, + }, + b'b' => Spec::String { + width, + parse_escape: true, align_left: minus, }, b'd' | b'i' => Spec::SignedInt { @@ -230,12 +237,36 @@ impl Spec { _ => Err(FormatError::InvalidArgument(arg.clone())), } } - &Spec::String { width, align_left } => { + &Spec::String { + width, + parse_escape, + align_left, + } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - match arg.get_str() { - Some(s) => write_padded(writer, s, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg.clone())), + let Some(s) = arg.get_str() else { + return Err(FormatError::InvalidArgument(arg.clone())); + }; + if parse_escape { + let mut parsed = Vec::new(); + for c in parse_escape_only(s.as_bytes()) { + match c.write(&mut parsed)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => { + // TODO: This should break the _entire execution_ of printf + break; + } + }; + } + write_padded( + writer, + std::str::from_utf8(&parsed).expect("TODO: Accept invalid utf8"), + width, + false, + align_left, + ) + } else { + write_padded(writer, s, width, false, align_left) } } &Spec::SignedInt { From f83e0d1b04a55feaf7a4dbb810c9dda7a007dc40 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:41:42 +0100 Subject: [PATCH 032/429] printf: accept multiple length parameters --- src/uucore/src/lib/features/format/spec.rs | 63 +++++++++++++--------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index dc55bc653e6..0f48cdafe9f 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -107,34 +107,45 @@ impl Spec { None }; - let length = rest.get(0).and_then(|c| { - Some(match c { - b'h' => { - if let Some(b'h') = rest.get(1) { - *rest = &rest[1..]; - Length::Char - } else { - Length::Short + // Parse 0..N length options, keep the last one + // Even though it is just ignored. We might want to use it later and we + // should parse those characters. + // + // TODO: This needs to be configurable: `seq` accepts only one length + // param + let mut _length = None; + loop { + let new_length = rest.get(0).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(1) { + *rest = &rest[1..]; + Length::Char + } else { + Length::Short + } } - } - b'l' => { - if let Some(b'l') = rest.get(1) { - *rest = &rest[1..]; - Length::Long - } else { - Length::LongLong + b'l' => { + if let Some(b'l') = rest.get(1) { + *rest = &rest[1..]; + Length::Long + } else { + Length::LongLong + } } - } - b'j' => Length::IntMaxT, - b'z' => Length::SizeT, - b't' => Length::PtfDiffT, - b'L' => Length::LongDouble, - _ => return None, - }) - }); - - if length.is_some() { - *rest = &rest[1..]; + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + if new_length.is_some() { + *rest = &rest[1..]; + _length = new_length; + } else { + break; + } } let type_spec = rest.get(0)?; From f3da0817a57d740a527d07b5a74368ed41fd3d08 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:42:52 +0100 Subject: [PATCH 033/429] printf: support precision for integers --- .../src/lib/features/format/num_format.rs | 32 ++++++++++++++++--- src/uucore/src/lib/features/format/spec.rs | 10 ++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 339b522091d..fab81c456fc 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -63,6 +63,7 @@ pub enum NumberAlignment { pub struct SignedInt { pub width: usize, + pub precision: usize, pub positive_sign: PositiveSign, pub alignment: NumberAlignment, } @@ -79,16 +80,19 @@ impl Formatter for SignedInt { }?; } + let s = format!("{:0width$}", x, width = self.precision); + match self.alignment { - NumberAlignment::Left => write!(writer, "{x: write!(writer, "{x:>width$}", width = self.width), - NumberAlignment::RightZero => write!(writer, "{x:0>width$}", width = self.width), + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}", width = self.width), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), } } fn try_from_spec(s: Spec) -> Result { let Spec::SignedInt { width, + precision, positive_sign, alignment, } = s @@ -102,8 +106,15 @@ impl Formatter for SignedInt { Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + Ok(Self { width, + precision, positive_sign, alignment, }) @@ -113,6 +124,7 @@ impl Formatter for SignedInt { pub struct UnsignedInt { pub variant: UnsignedIntVariant, pub width: usize, + pub precision: usize, pub alignment: NumberAlignment, } @@ -120,7 +132,7 @@ impl Formatter for UnsignedInt { type Input = u64; fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> { - let s = match self.variant { + let mut s = match self.variant { UnsignedIntVariant::Decimal => format!("{x}"), UnsignedIntVariant::Octal(Prefix::No) => format!("{x:o}"), UnsignedIntVariant::Octal(Prefix::Yes) => format!("{x:#o}"), @@ -138,6 +150,10 @@ impl Formatter for UnsignedInt { } }; + if self.precision > s.len() { + s = format!("{:0width$}", s, width = self.precision) + } + match self.alignment { NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}", width = self.width), @@ -149,6 +165,7 @@ impl Formatter for UnsignedInt { let Spec::UnsignedInt { variant, width, + precision, alignment, } = s else { @@ -161,8 +178,15 @@ impl Formatter for UnsignedInt { Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + }; + Ok(Self { width, + precision, variant, alignment, }) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 0f48cdafe9f..06f0ca1d6e7 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -22,12 +22,14 @@ pub enum Spec { }, SignedInt { width: Option>, + precision: Option>, positive_sign: PositiveSign, alignment: NumberAlignment, }, UnsignedInt { variant: UnsignedIntVariant, width: Option>, + precision: Option>, alignment: NumberAlignment, }, Float { @@ -167,6 +169,7 @@ impl Spec { }, b'd' | b'i' => Spec::SignedInt { width, + precision, alignment: match (minus, zero) { (true, _) => NumberAlignment::Left, (false, true) => NumberAlignment::RightZero, @@ -197,6 +200,7 @@ impl Spec { }; Spec::UnsignedInt { variant, + precision, width, alignment, } @@ -282,10 +286,12 @@ impl Spec { } &Spec::SignedInt { width, + precision, positive_sign, alignment, } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; let Some(i) = arg.get_i64() else { @@ -294,6 +300,7 @@ impl Spec { num_format::SignedInt { width, + precision, positive_sign, alignment, } @@ -303,9 +310,11 @@ impl Spec { &Spec::UnsignedInt { variant, width, + precision, alignment, } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); let arg = next_arg(args)?; let Some(i) = arg.get_u64() else { @@ -314,6 +323,7 @@ impl Spec { num_format::UnsignedInt { variant, + precision, width, alignment, } From 76eca8d9996cd96751a1ab8a0e7577bff967fd6a Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:43:25 +0100 Subject: [PATCH 034/429] uucore/format: fix doctests --- src/uucore/src/lib/features/format/mod.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 7417d48fad5..47e6fa4cc0b 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -194,9 +194,9 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { /// # Examples /// /// ```rust -/// use uucore::format::printf; +/// use uucore::format::{printf, FormatArgument}; /// -/// printf("hello %s", &[FormatArgument::String("world")]).unwrap(); +/// printf("hello %s", &[FormatArgument::String("world".into())]).unwrap(); /// // prints "hello world" /// ``` pub fn printf<'a>( @@ -228,10 +228,11 @@ fn printf_writer<'a>( /// # Examples /// /// ```rust -/// use uucore::format::sprintf; +/// use uucore::format::{sprintf, FormatArgument}; /// -/// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); -/// assert_eq!(s, "hello world".to_string()); +/// let s = sprintf("hello %s", &[FormatArgument::String("world".into())]).unwrap(); +/// let s = std::str::from_utf8(&s).unwrap(); +/// assert_eq!(s, "hello world"); /// ``` pub fn sprintf<'a>( format_string: impl AsRef<[u8]>, From 4aafb3f88ba2e3113df29ab264fe507ab78fdfb1 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:46:00 +0100 Subject: [PATCH 035/429] printf: exit correctly on \c --- src/uu/printf/src/printf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 00d03816e4c..663411b8952 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -43,7 +43,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { for item in parse_spec_and_escape(format_string.as_ref()) { match item?.write(stdout(), &mut args)? { ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => break, + ControlFlow::Break(()) => return Ok(()), }; } @@ -51,7 +51,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { for item in parse_spec_and_escape(format_string.as_ref()) { match item?.write(stdout(), &mut args)? { ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => break, + ControlFlow::Break(()) => return Ok(()), }; } } From 955640aac8a9b1b40932bf6f282ea48ce30398b8 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:46:38 +0100 Subject: [PATCH 036/429] printf: fix and test float formatting --- .../src/lib/features/format/num_format.rs | 205 +++++++++++++++++- tests/by-util/test_printf.rs | 2 +- 2 files changed, 198 insertions(+), 9 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index fab81c456fc..49edecce085 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -329,8 +329,24 @@ fn format_float_scientific( return format_float_nonfinite(f, case); } - let exponent: i32 = f.log10().floor() as i32; - let normalized = f / 10.0_f64.powi(exponent); + if f == 0.0 { + return if force_decimal == ForceDecimal::Yes && precision == 0 { + "0.e+00".into() + } else { + format!("{:.*}e+00", precision, 0.0) + }; + } + + + let mut exponent: i32 = f.log10().floor() as i32; + let mut normalized = f / 10.0_f64.powi(exponent); + + // If the normalized value will be rounded to a value greater than 10 + // we need to correct. + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + normalized /= 10.0; + exponent += 1; + } let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { "." @@ -349,20 +365,89 @@ fn format_float_scientific( ) } -// TODO: This could be optimized. It's not terribly important though. fn format_float_shortest( f: f64, precision: usize, case: Case, force_decimal: ForceDecimal, ) -> String { - let a = format_float_decimal(f, precision, case, force_decimal); - let b = format_float_scientific(f, precision, case, force_decimal); + // If the float is NaN, -Nan, Inf or -Inf, format like any other float + if !f.is_finite() { + return format_float_nonfinite(f, case); + } - if a.len() > b.len() { - b + // Precision here is about how many digits should be displayed + // instead of how many digits for the fractional part, this means that if + // we pass this to rust's format string, it's always gonna be one less. + let precision = precision.saturating_sub(1); + + if f == 0.0 { + return match (force_decimal, precision) { + (ForceDecimal::Yes, 0) => "0.".into(), + (ForceDecimal::Yes, _) => { + format!("{:.*}", precision, 0.0) + } + (ForceDecimal::No, _) => "0".into(), + }; + } + + let mut exponent = f.log10().floor() as i32; + if f != 0.0 && exponent <= -4 || exponent > precision as i32 { + // Scientific-ish notation (with a few differences) + let mut normalized = f / 10.0_f64.powi(exponent); + + // If the normalized value will be rounded to a value greater than 10 + // we need to correct. + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + normalized /= 10.0; + exponent += 1; + } + + let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { + "." + } else { + "" + }; + + let mut normalized = format!("{normalized:.*}", precision); + + if force_decimal == ForceDecimal::No { + while normalized.ends_with('0') { + normalized.pop(); + } + if normalized.ends_with('.') { + normalized.pop(); + } + } + + let exp_char = match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }; + + format!("{normalized}{additional_dot}{exp_char}{exponent:+03}") } else { - a + // Decimal-ish notation with a few differences: + // - The precision works differently and specifies the total number + // of digits instead of the digits in the fractional part. + // - If we don't force the decimal, '0' and `.` are trimmed. + let decimal_places = (precision as i32).saturating_sub(exponent) as usize; + let mut formatted = if decimal_places == 0 && force_decimal == ForceDecimal::Yes { + format!("{f:.0}.") + } else { + format!("{f:.*}", decimal_places) + }; + + if force_decimal == ForceDecimal::No { + while formatted.ends_with('0') { + formatted.pop(); + } + if formatted.ends_with('.') { + formatted.pop(); + } + } + + formatted } } @@ -398,3 +483,107 @@ fn format_float_hexadecimal( return s; } + +#[cfg(test)] +mod test { + use crate::format::num_format::{Case, ForceDecimal}; + + #[test] + fn decimal_float() { + use super::format_float_decimal; + let f = |x| format_float_decimal(x, 6, Case::Lowercase, ForceDecimal::No); + assert_eq!(f(0.0), "0.000000"); + assert_eq!(f(1.0), "1.000000"); + assert_eq!(f(100.0), "100.000000"); + assert_eq!(f(123456.789), "123456.789000"); + assert_eq!(f(12.3456789), "12.345679"); + assert_eq!(f(1000000.0), "1000000.000000"); + assert_eq!(f(99999999.0), "99999999.000000"); + assert_eq!(f(1.9999995), "1.999999"); + assert_eq!(f(1.9999996), "2.000000"); + } + + #[test] + fn scientific_float() { + use super::format_float_scientific; + let f = |x| format_float_scientific(x, 6, Case::Lowercase, ForceDecimal::No); + assert_eq!(f(0.0), "0.000000e+00"); + assert_eq!(f(1.0), "1.000000e+00"); + assert_eq!(f(100.0), "1.000000e+02"); + assert_eq!(f(123456.789), "1.234568e+05"); + assert_eq!(f(12.3456789), "1.234568e+01"); + assert_eq!(f(1000000.0), "1.000000e+06"); + assert_eq!(f(99999999.0), "1.000000e+08"); + } + + #[test] + fn scientific_float_zero_precision() { + use super::format_float_scientific; + + let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::No); + assert_eq!(f(0.0), "0e+00"); + assert_eq!(f(1.0), "1e+00"); + assert_eq!(f(100.0), "1e+02"); + assert_eq!(f(123456.789), "1e+05"); + assert_eq!(f(12.3456789), "1e+01"); + assert_eq!(f(1000000.0), "1e+06"); + assert_eq!(f(99999999.0), "1e+08"); + + let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::Yes); + assert_eq!(f(0.0), "0.e+00"); + assert_eq!(f(1.0), "1.e+00"); + assert_eq!(f(100.0), "1.e+02"); + assert_eq!(f(123456.789), "1.e+05"); + assert_eq!(f(12.3456789), "1.e+01"); + assert_eq!(f(1000000.0), "1.e+06"); + assert_eq!(f(99999999.0), "1.e+08"); + } + + #[test] + fn shortest_float() { + use super::format_float_shortest; + let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::No); + assert_eq!(f(0.0), "0"); + assert_eq!(f(1.0), "1"); + assert_eq!(f(100.0), "100"); + assert_eq!(f(123456.789), "123457"); + assert_eq!(f(12.3456789), "12.3457"); + assert_eq!(f(1000000.0), "1e+06"); + assert_eq!(f(99999999.0), "1e+08"); + } + + #[test] + fn shortest_float_force_decimal() { + use super::format_float_shortest; + let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::Yes); + assert_eq!(f(0.0), "0.00000"); + assert_eq!(f(1.0), "1.00000"); + assert_eq!(f(100.0), "100.000"); + assert_eq!(f(123456.789), "123457."); + assert_eq!(f(12.3456789), "12.3457"); + assert_eq!(f(1000000.0), "1.00000e+06"); + assert_eq!(f(99999999.0), "1.00000e+08"); + } + + #[test] + fn shortest_float_force_decimal_zero_precision() { + use super::format_float_shortest; + let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::No); + assert_eq!(f(0.0), "0"); + assert_eq!(f(1.0), "1"); + assert_eq!(f(100.0), "1e+02"); + assert_eq!(f(123456.789), "1e+05"); + assert_eq!(f(12.3456789), "1e+01"); + assert_eq!(f(1000000.0), "1e+06"); + assert_eq!(f(99999999.0), "1e+08"); + + let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes); + assert_eq!(f(0.0), "0."); + assert_eq!(f(1.0), "1."); + assert_eq!(f(100.0), "1.e+02"); + assert_eq!(f(123456.789), "1.e+05"); + assert_eq!(f(12.3456789), "1.e+01"); + assert_eq!(f(1000000.0), "1.e+06"); + assert_eq!(f(99999999.0), "1.e+08"); + } +} diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index d7ba5679ecf..5a74aa724e4 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -251,7 +251,7 @@ fn sub_num_float_e_no_round() { #[test] fn sub_num_float_round() { new_ucmd!() - .args(&["two is %f", "1.9999995"]) + .args(&["two is %f", "1.9999996"]) .succeeds() .stdout_only("two is 2.000000"); } From fef84f72030fdd13065bd6735b1b01a40a36e52f Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:47:08 +0100 Subject: [PATCH 037/429] printf: add emoji character test --- tests/by-util/test_printf.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 5a74aa724e4..7573b5fb983 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -190,6 +190,11 @@ fn sub_num_int_char_const_in() { .args(&["ninety seven is %i", "'a"]) .succeeds() .stdout_only("ninety seven is 97"); + + new_ucmd!() + .args(&["emoji is %i", "'🙃"]) + .succeeds() + .stdout_only("emoji is 128579"); } #[test] From ce18e0ab9702c73d778b6b4bb9218926562c83fa Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:47:41 +0100 Subject: [PATCH 038/429] printf: ignore hexadecimal floats test This can be un-ignored when it is implemented --- tests/by-util/test_printf.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 7573b5fb983..436bc498b7c 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -376,6 +376,7 @@ fn sub_float_dec_places() { } #[test] +#[ignore = "hexadecimal floats are unimplemented"] fn sub_float_hex_in() { new_ucmd!() .args(&["%f", "0xF1.1F"]) From 5f2374b33960f42fd1cb575ee4801ef50f342cf8 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 17 Nov 2023 14:57:09 +0100 Subject: [PATCH 039/429] printf: fix negative hex argument parsing --- src/uucore/src/lib/features/format/argument.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 644546c38b7..120b59aa4b5 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -49,9 +49,13 @@ impl FormatArgument { match self { Self::SignedInt(n) => Some(*n), Self::Unparsed(s) => { - if let Some(s) = s.strip_prefix("0x") { - i64::from_str_radix(s, 16).ok() - } else if let Some(s) = s.strip_prefix("0") { + // For hex, we parse `u64` because we do not allow another + // minus sign. We might need to do more precise parsing here. + if let Some(s) = s.strip_prefix("-0x") { + Some(- (u64::from_str_radix(s, 16).ok()? as i64)) + } else if let Some(s) = s.strip_prefix("0x") { + Some(u64::from_str_radix(s, 16).ok()? as i64) + } else if s.starts_with("-0") || s.starts_with('0') { i64::from_str_radix(s, 8).ok() } else if let Some(s) = s.strip_prefix('\'') { Some(s.chars().next()? as i64) From eb00c195c6c9e014c89d53b7d0e6feade507da8d Mon Sep 17 00:00:00 2001 From: Yury Zhytkou <54360928+zhitkoff@users.noreply.github.com> Date: Fri, 17 Nov 2023 11:19:10 -0500 Subject: [PATCH 040/429] split: pass GNU tests/b-chunk.sh (#5475) --------- Co-authored-by: Terts Diepraam Co-authored-by: Daniel Hofstetter Co-authored-by: Brandon Elam Barker Co-authored-by: Kostiantyn Hryshchuk Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- src/uu/split/src/split.rs | 669 +++++++++++++++++++----------------- tests/by-util/test_split.rs | 130 ++++--- 2 files changed, 435 insertions(+), 364 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 17a783d72f2..592e4eedde9 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -18,11 +18,12 @@ use std::ffi::OsString; use std::fmt; use std::fs::{metadata, File}; use std::io; -use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write}; +use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::u64; use uucore::display::Quotable; use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError}; +use uucore::parse_size::parse_size_u64; use uucore::uio_error; use uucore::{format_usage, help_about, help_section, help_usage}; @@ -40,11 +41,20 @@ static OPT_HEX_SUFFIXES_SHORT: &str = "-x"; static OPT_SUFFIX_LENGTH: &str = "suffix-length"; static OPT_VERBOSE: &str = "verbose"; static OPT_SEPARATOR: &str = "separator"; -//The ---io and ---io-blksize parameters are consumed and ignored. -//The parameter is included to make GNU coreutils tests pass. -static OPT_IO: &str = "-io"; -static OPT_IO_BLKSIZE: &str = "-io-blksize"; static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files"; +static OPT_IO_BLKSIZE: &str = "-io-blksize"; +// Cap ---io-blksize value +// For 64bit systems the max value is the same as in GNU +// and is equivalent of `i32::MAX >> 20 << 20` operation. +// On 32bit systems however, even though it fits within `u32` and `i32`, +// it causes rust-lang `library/alloc/src/raw_vec.rs` to panic with 'capacity overflow' error. +// Could be due to how `std::io::BufReader` handles internal buffers. +// So we use much smaller value for those +static OPT_IO_BLKSIZE_MAX: usize = if usize::BITS >= 64 { + 2_146_435_072 +} else { + 1_000_000_000 +}; static ARG_INPUT: &str = "input"; static ARG_PREFIX: &str = "prefix"; @@ -311,7 +321,6 @@ pub fn uu_app() -> Command { .arg( Arg::new(OPT_NUMERIC_SUFFIXES) .long(OPT_NUMERIC_SUFFIXES) - .alias("numeric") .require_equals(true) .num_args(0..=1) .overrides_with_all([ @@ -338,7 +347,6 @@ pub fn uu_app() -> Command { .arg( Arg::new(OPT_HEX_SUFFIXES) .long(OPT_HEX_SUFFIXES) - .alias("hex") .require_equals(true) .num_args(0..=1) .overrides_with_all([ @@ -373,12 +381,6 @@ pub fn uu_app() -> Command { .action(ArgAction::Append) .help("use SEP instead of newline as the record separator; '\\0' (zero) specifies the NUL character"), ) - .arg( - Arg::new(OPT_IO) - .long("io") - .alias(OPT_IO) - .hide(true), - ) .arg( Arg::new(OPT_IO_BLKSIZE) .long("io-blksize") @@ -419,6 +421,7 @@ struct Settings { /// chunks. If this is `false`, then empty files will not be /// created. elide_empty_files: bool, + io_blksize: Option, } /// An error when parsing settings from command-line arguments. @@ -441,6 +444,9 @@ enum SettingsError { /// r/K/N FilterWithKthChunkNumber, + /// Invalid IO block size + InvalidIOBlockSize(String), + /// The `--filter` option is not supported on Windows. #[cfg(windows)] NotSupported, @@ -471,6 +477,7 @@ impl fmt::Display for SettingsError { Self::FilterWithKthChunkNumber => { write!(f, "--filter does not process a chunk extracted to stdout") } + Self::InvalidIOBlockSize(s) => write!(f, "invalid IO block size: {}", s.quote()), #[cfg(windows)] Self::NotSupported => write!( f, @@ -499,12 +506,29 @@ impl Settings { match first.as_str() { "\\0" => b'\0', s if s.as_bytes().len() == 1 => s.as_bytes()[0], - s => return Err(SettingsError::MultiCharacterSeparator(s.to_owned())), + s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())), } } None => b'\n', }; + let io_blksize: Option = if let Some(s) = matches.get_one::(OPT_IO_BLKSIZE) { + match parse_size_u64(s) { + Ok(n) => { + let n: usize = n + .try_into() + .map_err(|_| SettingsError::InvalidIOBlockSize(s.to_string()))?; + if n > OPT_IO_BLKSIZE_MAX { + return Err(SettingsError::InvalidIOBlockSize(s.to_string())); + } + Some(n) + } + _ => return Err(SettingsError::InvalidIOBlockSize(s.to_string())), + } + } else { + None + }; + let result = Self { prefix: matches.get_one::(ARG_PREFIX).unwrap().clone(), suffix, @@ -514,6 +538,7 @@ impl Settings { verbose: matches.value_source(OPT_VERBOSE) == Some(ValueSource::CommandLine), separator, elide_empty_files: matches.get_flag(OPT_ELIDE_EMPTY_FILES), + io_blksize, }; #[cfg(windows)] @@ -591,6 +616,93 @@ fn custom_write_all( } } +/// Get the size of the input file in bytes +/// Used only for subset of `--number=CHUNKS` strategy, as there is a need +/// to determine input file size upfront in order to know chunk size +/// to be written into each of N files/chunks: +/// * N split into N files based on size of input +/// * K/N output Kth of N to stdout +/// * l/N split into N files without splitting lines/records +/// * l/K/N output Kth of N to stdout without splitting lines/records +/// +/// For most files the size will be determined by either reading entire file content into a buffer +/// or by `len()` function of [`std::fs::metadata`]. +/// +/// However, for some files which report filesystem metadata size that does not match +/// their actual content size, we will need to attempt to find the end of file +/// with direct `seek()` on [`std::fs::File`]. +/// +/// For STDIN stream - read into a buffer up to a limit +/// If input stream does not EOF before that - return an error +/// (i.e. "infinite" input as in `cat /dev/zero | split ...`, `yes | split ...` etc.). +/// +/// Note: The `buf` might end up with either partial or entire input content. +fn get_input_size( + input: &String, + reader: &mut R, + buf: &mut Vec, + io_blksize: &Option, +) -> std::io::Result +where + R: BufRead, +{ + // Set read limit to io_blksize if specified + // Otherwise to OPT_IO_BLKSIZE_MAX + let read_limit = io_blksize.unwrap_or(OPT_IO_BLKSIZE_MAX) as u64; + + // Try to read into buffer up to a limit + let num_bytes = reader + .by_ref() + .take(read_limit) + .read_to_end(buf) + .map(|n| n as u64)?; + + if num_bytes < read_limit { + // Finite file or STDIN stream that fits entirely + // into a buffer within the limit + // Note: files like /dev/null or similar, + // empty STDIN stream, + // and files with true file size 0 + // will also fit here + Ok(num_bytes) + } else if input == "-" { + // STDIN stream that did not fit all content into a buffer + // Most likely continuous/infinite input stream + return Err(io::Error::new( + ErrorKind::Other, + format!("{}: cannot determine input size", input), + )); + } else { + // Could be that file size is larger than set read limit + // Get the file size from filesystem metadata + let metadata = metadata(input)?; + let metadata_size = metadata.len(); + if num_bytes <= metadata_size { + Ok(metadata_size) + } else { + // Could be a file from locations like /dev, /sys, /proc or similar + // which report filesystem metadata size that does not match + // their actual content size + // Attempt direct `seek()` for the end of a file + let mut tmp_fd = File::open(Path::new(input))?; + let end = tmp_fd.seek(SeekFrom::End(0))?; + if end > 0 { + Ok(end) + } else { + // Edge case of either "infinite" file (i.e. /dev/zero) + // or some other "special" non-standard file type + // Give up and return an error + // TODO It might be possible to do more here + // to address all possible file types and edge cases + return Err(io::Error::new( + ErrorKind::Other, + format!("{}: cannot determine file size", input), + )); + } + } + } +} + /// Write a certain number of bytes to one file, then move on to another one. /// /// This struct maintains an underlying writer representing the @@ -1018,155 +1130,110 @@ impl<'a> Write for LineBytesChunkWriter<'a> { } } -/// Split a file into a specific number of chunks by byte. +/// Split a file or STDIN into a specific number of chunks by byte. +/// If in Kth chunk of N mode - print the k-th chunk to STDOUT. /// -/// This function always creates one output file for each chunk, even +/// When file size cannot be evenly divided into the number of chunks of the same size, +/// the first X chunks are 1 byte longer than the rest, +/// where X is a modulus reminder of (file size % number of chunks) +/// +/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` +/// +/// In N chunks mode - this function always creates one output file for each chunk, even /// if there is an error reading or writing one of the chunks or if -/// the input file is truncated. However, if the `filter` option is -/// being used, then no files are created. +/// the input file is truncated. However, if the `--filter` option is +/// being used, then files will only be created if `$FILE` variable was used +/// in filter command, +/// i.e. `split -n 10 --filter='head -c1 > $FILE' in` /// /// # Errors /// /// This function returns an error if there is a problem reading from -/// `reader` or writing to one of the output files. +/// `reader` or writing to one of the output files or stdout. +/// +/// # See also +/// +/// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line. /// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * N -fn split_into_n_chunks_by_byte( +/// * K/N +fn n_chunks_by_byte( settings: &Settings, reader: &mut R, num_chunks: u64, + kth_chunk: Option, ) -> UResult<()> where - R: Read, + R: BufRead, { - // Get the size of the input file in bytes and compute the number - // of bytes per chunk. - // + // Get the size of the input in bytes + let initial_buf = &mut Vec::new(); + let mut num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; + let mut reader = initial_buf.chain(reader); + + // If input file is empty and we would not have determined the Kth chunk + // in the Kth chunk of N chunk mode, then terminate immediately. + // This happens on `split -n 3/10 /dev/null`, for example. + if kth_chunk.is_some() && num_bytes == 0 { + return Ok(()); + } + // If the requested number of chunks exceeds the number of bytes - // in the file *and* the `elide_empty_files` parameter is enabled, - // then behave as if the number of chunks was set to the number of - // bytes in the file. This ensures that we don't write empty - // files. Otherwise, just write the `num_chunks - num_bytes` empty - // files. - let metadata = metadata(&settings.input).map_err(|_| { - USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) - })?; - - let num_bytes = metadata.len(); - let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes; - let (num_chunks, chunk_size) = if will_have_empty_files { - let num_chunks = num_bytes; - let chunk_size = 1; - (num_chunks, chunk_size) + // in the input: + // * in Kth chunk of N mode - just write empty byte string to stdout + // NOTE: the `elide_empty_files` parameter is ignored here + // as we do not generate any files + // and instead writing to stdout + // * In N chunks mode - if the `elide_empty_files` parameter is enabled, + // then behave as if the number of chunks was set to the number of + // bytes in the file. This ensures that we don't write empty + // files. Otherwise, just write the `num_chunks - num_bytes` empty files. + let num_chunks = if kth_chunk.is_none() && settings.elide_empty_files && num_chunks > num_bytes + { + num_bytes } else { - let chunk_size = (num_bytes / (num_chunks)).max(1); - (num_chunks, chunk_size) + num_chunks }; // If we would have written zero chunks of output, then terminate // immediately. This happens on `split -e -n 3 /dev/null`, for // example. - if num_chunks == 0 || num_bytes == 0 { + if num_chunks == 0 { return Ok(()); } - let num_chunks: usize = num_chunks - .try_into() - .map_err(|_| USimpleError::new(1, "Number of chunks too big"))?; - - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?; - - // Create one writer for each chunk. This will create each - // of the underlying files (if not in `--filter` mode). + // In Kth chunk of N mode - we will write to stdout instead of to a file. + let mut stdout_writer = std::io::stdout().lock(); + // In N chunks mode - we will write to `num_chunks` files let mut writers = vec![]; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); - } - - // Write `chunk_size` bytes from the reader into each writer - // except the last. - // - // The last writer gets all remaining bytes so that if the number - // of bytes in the input file was not evenly divisible by - // `num_chunks`, we don't leave any bytes behind. - for writer in writers.iter_mut().take(num_chunks - 1) { - match io::copy(&mut reader.by_ref().take(chunk_size), writer) { - Ok(_) => continue, - Err(e) if ignorable_io_error(&e, settings) => continue, - Err(e) => return Err(uio_error!(e, "input/output error")), - }; - } - - // Write all the remaining bytes to the last chunk. - let i = num_chunks - 1; - let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1)); - match io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i]) { - Ok(_) => Ok(()), - Err(e) if ignorable_io_error(&e, settings) => Ok(()), - Err(e) => Err(uio_error!(e, "input/output error")), - } -} -/// Print the k-th chunk of a file to stdout, splitting by byte. -/// -/// This function is like [`split_into_n_chunks_by_byte`], but instead -/// of writing each chunk to its own file, it only writes to stdout -/// the contents of the chunk identified by `chunk_number` -/// -/// # Errors -/// -/// This function returns an error if there is a problem reading from -/// `reader` or writing to stdout. -/// -/// Implements `--number=CHUNKS` -/// Where CHUNKS -/// * K/N -fn kth_chunks_by_byte( - settings: &Settings, - reader: &mut R, - chunk_number: u64, - num_chunks: u64, -) -> UResult<()> -where - R: BufRead, -{ - // Get the size of the input file in bytes and compute the number - // of bytes per chunk. - // - // If the requested number of chunks exceeds the number of bytes - // in the file - just write empty byte string to stdout - // NOTE: the `elide_empty_files` parameter is ignored here - // as we do not generate any files - // and instead writing to stdout - let metadata = metadata(&settings.input).map_err(|_| { - USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) - })?; - - let num_bytes = metadata.len(); - // If input file is empty and we would have written zero chunks of output, - // then terminate immediately. - // This happens on `split -e -n 3 /dev/null`, for example. - if num_bytes == 0 { - return Ok(()); + // Calculate chunk size base and modulo reminder + // to be used in calculating chunk_size later on + let chunk_size_base = num_bytes / num_chunks; + let chunk_size_reminder = num_bytes % num_chunks; + + // If in N chunks mode + // Create one writer for each chunk. + // This will create each of the underlying files + // or stdin pipes to child shell/command processes if in `--filter` mode + if kth_chunk.is_none() { + // This object is responsible for creating the filename for each chunk. + let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) + .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; + for _ in 0..num_chunks { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let writer = settings.instantiate_current_writer(filename.as_str())?; + writers.push(writer); + } } - // Write to stdout instead of to a file. - let stdout = std::io::stdout(); - let mut writer = stdout.lock(); - - let chunk_size = (num_bytes / (num_chunks)).max(1); - let mut num_bytes: usize = num_bytes.try_into().unwrap(); - - let mut i = 1; - loop { - let buf: &mut Vec = &mut vec![]; + for i in 1_u64..=num_chunks { + let chunk_size = chunk_size_base + (chunk_size_reminder > i - 1) as u64; + let buf = &mut Vec::new(); if num_bytes > 0 { // Read `chunk_size` bytes from the reader into `buf` // except the last. @@ -1176,15 +1243,17 @@ where // `num_chunks`, we don't leave any bytes behind. let limit = { if i == num_chunks { - num_bytes.try_into().unwrap() + num_bytes } else { chunk_size } }; + let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf); + match n_bytes_read { Ok(n_bytes) => { - num_bytes -= n_bytes; + num_bytes -= n_bytes as u64; } Err(error) => { return Err(USimpleError::new( @@ -1193,11 +1262,20 @@ where )); } } - if i == chunk_number { - writer.write_all(buf)?; - break; + + match kth_chunk { + Some(chunk_number) => { + if i == chunk_number { + stdout_writer.write_all(buf)?; + break; + } + } + None => { + let idx = (i - 1) as usize; + let writer = writers.get_mut(idx).unwrap(); + writer.write_all(buf)?; + } } - i += 1; } else { break; } @@ -1205,12 +1283,17 @@ where Ok(()) } -/// Split a file into a specific number of chunks by line. +/// Split a file or STDIN into a specific number of chunks by line. +/// If in Kth chunk of N mode - print the k-th chunk to STDOUT. +/// +/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` /// -/// This function always creates one output file for each chunk, even +/// In N chunks mode - this function always creates one output file for each chunk, even /// if there is an error reading or writing one of the chunks or if -/// the input file is truncated. However, if the `filter` option is -/// being used, then no files are created. +/// the input file is truncated. However, if the `--filter` option is +/// being used, then files will only be created if `$FILE` variable was used +/// in filter command, +/// i.e. `split -n l/10 --filter='head -c1 > $FILE' in` /// /// # Errors /// @@ -1219,119 +1302,82 @@ where /// /// # See also /// -/// * [`kth_chunk_by_line`], which splits its input in the same way, -/// but writes only one specified chunk to stdout. +/// * [`n_chunks_by_byte`], which splits its input into a specific number of chunks by byte. /// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * l/N -fn split_into_n_chunks_by_line( +/// * l/K/N +fn n_chunks_by_line( settings: &Settings, reader: &mut R, num_chunks: u64, + kth_chunk: Option, ) -> UResult<()> where R: BufRead, { - // Get the size of the input file in bytes and compute the number + // Get the size of the input in bytes and compute the number // of bytes per chunk. - let metadata = metadata(&settings.input).map_err(|_| { - USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) - })?; - let num_bytes = metadata.len(); + let initial_buf = &mut Vec::new(); + let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; + let reader = initial_buf.chain(reader); let chunk_size = (num_bytes / num_chunks) as usize; - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?; - - // Create one writer for each chunk. This will create each - // of the underlying files (if not in `--filter` mode). - let mut writers = vec![]; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); + // If input file is empty and we would not have determined the Kth chunk + // in the Kth chunk of N chunk mode, then terminate immediately. + // This happens on `split -n l/3/10 /dev/null`, for example. + if kth_chunk.is_some() && num_bytes == 0 { + return Ok(()); } - let mut num_bytes_remaining_in_current_chunk = chunk_size; - let mut i = 0; - let sep = settings.separator; - for line_result in reader.split(sep) { - let line = line_result.unwrap(); - let maybe_writer = writers.get_mut(i); - let writer = maybe_writer.unwrap(); - let bytes = line.as_slice(); - custom_write_all(bytes, writer, settings)?; - custom_write_all(&[sep], writer, settings)?; + // In Kth chunk of N mode - we will write to stdout instead of to a file. + let mut stdout_writer = std::io::stdout().lock(); + // In N chunks mode - we will write to `num_chunks` files + let mut writers = vec![]; - // Add one byte for the separator character. - let num_bytes = bytes.len() + 1; - if num_bytes > num_bytes_remaining_in_current_chunk { - num_bytes_remaining_in_current_chunk = chunk_size; - i += 1; - } else { - num_bytes_remaining_in_current_chunk -= num_bytes; + // If in N chunks mode + // Create one writer for each chunk. + // This will create each of the underlying files + // or stdin pipes to child shell/command processes if in `--filter` mode + if kth_chunk.is_none() { + // This object is responsible for creating the filename for each chunk. + let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) + .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; + for _ in 0..num_chunks { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let writer = settings.instantiate_current_writer(filename.as_str())?; + writers.push(writer); } } - Ok(()) -} - -/// Print the k-th chunk of a file, splitting by line. -/// -/// This function is like [`split_into_n_chunks_by_line`], but instead -/// of writing each chunk to its own file, it only writes to stdout -/// the contents of the chunk identified by `chunk_number`. -/// -/// # Errors -/// -/// This function returns an error if there is a problem reading from -/// `reader` or writing to one of the output files. -/// -/// # See also -/// -/// * [`split_into_n_chunks_by_line`], which splits its input in the -/// same way, but writes each chunk to its own file. -/// -/// Implements `--number=CHUNKS` -/// Where CHUNKS -/// * l/K/N -fn kth_chunk_by_line( - settings: &Settings, - reader: &mut R, - chunk_number: u64, - num_chunks: u64, -) -> UResult<()> -where - R: BufRead, -{ - // Get the size of the input file in bytes and compute the number - // of bytes per chunk. - let metadata = metadata(&settings.input).map_err(|_| { - USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) - })?; - let num_bytes = metadata.len(); - let chunk_size = (num_bytes / num_chunks) as usize; - - // Write to stdout instead of to a file. - let stdout = std::io::stdout(); - let mut writer = stdout.lock(); - let mut num_bytes_remaining_in_current_chunk = chunk_size; let mut i = 1; let sep = settings.separator; + for line_result in reader.split(sep) { - let line = line_result?; + // add separator back in at the end of the line + let mut line = line_result?; + line.push(sep); let bytes = line.as_slice(); - if i == chunk_number { - writer.write_all(bytes)?; - writer.write_all(&[sep])?; + + match kth_chunk { + Some(chunk_number) => { + if i == chunk_number { + stdout_writer.write_all(bytes)?; + } + } + None => { + let idx = (i - 1) as usize; + let maybe_writer = writers.get_mut(idx); + let writer = maybe_writer.unwrap(); + custom_write_all(bytes, writer, settings)?; + } } - // Add one byte for the separator character. - let num_bytes = bytes.len() + 1; + let num_bytes = bytes.len(); if num_bytes >= num_bytes_remaining_in_current_chunk { num_bytes_remaining_in_current_chunk = chunk_size; i += 1; @@ -1339,21 +1385,27 @@ where num_bytes_remaining_in_current_chunk -= num_bytes; } - if i > chunk_number { - break; + if let Some(chunk_number) = kth_chunk { + if i > chunk_number { + break; + } } } Ok(()) } -/// Split a file into a specific number of chunks by line, but +/// Split a file or STDIN into a specific number of chunks by line, but /// assign lines via round-robin /// -/// This function always creates one output file for each chunk, even +/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` +/// +/// In N chunks mode - this function always creates one output file for each chunk, even /// if there is an error reading or writing one of the chunks or if -/// the input file is truncated. However, if the `filter` option is -/// being used, then no files are created. +/// the input file is truncated. However, if the `--filter` option is +/// being used, then files will only be created if `$FILE` variable was used +/// in filter command, +/// i.e. `split -n r/10 --filter='head -c1 > $FILE' in` /// /// # Errors /// @@ -1362,50 +1414,73 @@ where /// /// # See also /// -/// * [`split_into_n_chunks_by_line`], which splits its input in the same way, -/// but without round robin distribution. +/// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line. /// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * r/N -fn split_into_n_chunks_by_line_round_robin( +/// * r/K/N +fn n_chunks_by_line_round_robin( settings: &Settings, reader: &mut R, num_chunks: u64, + kth_chunk: Option, ) -> UResult<()> where R: BufRead, { - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) - .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; - - // Create one writer for each chunk. This will create each - // of the underlying files (if not in `--filter` mode). + // In Kth chunk of N mode - we will write to stdout instead of to a file. + let mut stdout_writer = std::io::stdout().lock(); + // In N chunks mode - we will write to `num_chunks` files let mut writers = vec![]; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| io::Error::new(ErrorKind::Other, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); + + // If in N chunks mode + // Create one writer for each chunk. + // This will create each of the underlying files + // or stdin pipes to child shell/command processes if in `--filter` mode + if kth_chunk.is_none() { + // This object is responsible for creating the filename for each chunk. + let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) + .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; + for _ in 0..num_chunks { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let writer = settings.instantiate_current_writer(filename.as_str())?; + writers.push(writer); + } } let num_chunks: usize = num_chunks.try_into().unwrap(); let sep = settings.separator; let mut closed_writers = 0; for (i, line_result) in reader.split(sep).enumerate() { - let maybe_writer = writers.get_mut(i % num_chunks); - let writer = maybe_writer.unwrap(); - let mut line = line_result.unwrap(); + // add separator back in at the end of the line + let mut line = line_result?; line.push(sep); let bytes = line.as_slice(); - let writer_stdin_open = custom_write_all(bytes, writer, settings)?; - if !writer_stdin_open { - closed_writers += 1; - if closed_writers == num_chunks { - // all writers are closed - stop reading - break; + + match kth_chunk { + Some(chunk_number) => { + // The `.enumerate()` method returns index `i` starting with 0, + // but chunk number is given as a 1-indexed number, + // so compare to `chunk_number - 1` + if (i % num_chunks) == (chunk_number - 1) as usize { + stdout_writer.write_all(bytes)?; + } + } + None => { + let maybe_writer = writers.get_mut(i % num_chunks); + let writer = maybe_writer.unwrap(); + + let writer_stdin_open = custom_write_all(bytes, writer, settings)?; + if !writer_stdin_open { + closed_writers += 1; + if closed_writers == num_chunks { + // all writers are closed - stop reading + break; + } + } } } } @@ -1413,62 +1488,9 @@ where Ok(()) } -/// Print the k-th chunk of a file, splitting by line, but -/// assign lines via round-robin to the specified number of output -/// chunks, but output only the *k*th chunk. -/// -/// This function is like [`kth_chunk_by_line`], as it only writes to stdout and -/// prints out only *k*th chunk -/// It is also like [`split_into_n_chunks_by_line_round_robin`], as it is assigning chunks -/// using round robin distribution -/// -/// # Errors -/// -/// This function returns an error if there is a problem reading from -/// `reader` or writing to one of the output files. -/// -/// # See also -/// -/// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the -/// same way, but writes each chunk to its own file. -/// -/// Implements `--number=CHUNKS` -/// Where CHUNKS -/// * r/K/N -fn kth_chunk_by_line_round_robin( - settings: &Settings, - reader: &mut R, - chunk_number: u64, - num_chunks: u64, -) -> UResult<()> -where - R: BufRead, -{ - // Write to stdout instead of to a file. - let stdout = std::io::stdout(); - let mut writer = stdout.lock(); - - let num_chunks: usize = num_chunks.try_into().unwrap(); - let chunk_number: usize = chunk_number.try_into().unwrap(); - let sep = settings.separator; - // The chunk number is given as a 1-indexed number, but it - // is a little easier to deal with a 0-indexed number - // since `.enumerate()` returns index `i` starting with 0 - let chunk_number = chunk_number - 1; - for (i, line_result) in reader.split(sep).enumerate() { - let line = line_result?; - let bytes = line.as_slice(); - if (i % num_chunks) == chunk_number { - writer.write_all(bytes)?; - writer.write_all(&[sep])?; - } - } - Ok(()) -} - #[allow(clippy::cognitive_complexity)] fn split(settings: &Settings) -> UResult<()> { - let mut reader = BufReader::new(if settings.input == "-" { + let r_box = if settings.input == "-" { Box::new(stdin()) as Box } else { let r = File::open(Path::new(&settings.input)).map_err_context(|| { @@ -1478,26 +1500,33 @@ fn split(settings: &Settings) -> UResult<()> { ) })?; Box::new(r) as Box - }); + }; + let mut reader = if let Some(c) = settings.io_blksize { + BufReader::with_capacity(c, r_box) + } else { + BufReader::new(r_box) + }; match settings.strategy { Strategy::Number(NumberType::Bytes(num_chunks)) => { - split_into_n_chunks_by_byte(settings, &mut reader, num_chunks) + // split_into_n_chunks_by_byte(settings, &mut reader, num_chunks) + n_chunks_by_byte(settings, &mut reader, num_chunks, None) } Strategy::Number(NumberType::KthBytes(chunk_number, num_chunks)) => { - kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks) + // kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks) + n_chunks_by_byte(settings, &mut reader, num_chunks, Some(chunk_number)) } Strategy::Number(NumberType::Lines(num_chunks)) => { - split_into_n_chunks_by_line(settings, &mut reader, num_chunks) + n_chunks_by_line(settings, &mut reader, num_chunks, None) } Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => { - kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks) + n_chunks_by_line(settings, &mut reader, num_chunks, Some(chunk_number)) } Strategy::Number(NumberType::RoundRobin(num_chunks)) => { - split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks) + n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, None) } Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => { - kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks) + n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, Some(chunk_number)) } Strategy::Lines(chunk_size) => { let mut writer = LineChunkWriter::new(chunk_size, settings)?; diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index aec6f059441..0ae2af5cb92 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc use crate::common::util::{AtPath, TestScenario}; use rand::{thread_rng, Rng, SeedableRng}; @@ -704,54 +704,41 @@ fn test_split_overflow_bytes_size() { assert_eq!(glob.collate(), at.read_bytes(name)); } -#[test] -#[cfg(target_pointer_width = "32")] -fn test_split_chunks_num_chunks_oversized_32() { - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; - at.touch("file"); - scene - .ucmd() - .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"]) - .fails() - .code_is(1) - .stderr_only("split: Number of chunks too big\n"); -} - #[test] fn test_split_stdin_num_chunks() { - new_ucmd!() - .args(&["--number=1"]) - .fails() - .code_is(1) - .stderr_only("split: -: cannot determine file size\n"); + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["--number=1"]).pipe_in("").succeeds(); + assert_eq!(file_read(&at, "xaa"), ""); + assert!(!at.plus("xab").exists()); } #[test] fn test_split_stdin_num_kth_chunk() { new_ucmd!() .args(&["--number=1/2"]) - .fails() - .code_is(1) - .stderr_only("split: -: cannot determine file size\n"); + .pipe_in("1\n2\n3\n4\n5\n") + .succeeds() + .stdout_only("1\n2\n3"); } #[test] fn test_split_stdin_num_line_chunks() { - new_ucmd!() - .args(&["--number=l/2"]) - .fails() - .code_is(1) - .stderr_only("split: -: cannot determine file size\n"); + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["--number=l/2"]) + .pipe_in("1\n2\n3\n4\n5\n") + .succeeds(); + assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n"); + assert_eq!(file_read(&at, "xab"), "4\n5\n"); + assert!(!at.plus("xac").exists()); } #[test] fn test_split_stdin_num_kth_line_chunk() { new_ucmd!() .args(&["--number=l/2/5"]) - .fails() - .code_is(1) - .stderr_only("split: -: cannot determine file size\n"); + .pipe_in("1\n2\n3\n4\n5\n") + .succeeds() + .stdout_only("2\n"); } fn file_read(at: &AtPath, filename: &str) -> String { @@ -912,6 +899,14 @@ fn test_suffixes_exhausted() { .stderr_only("split: output file suffixes exhausted\n"); } +#[test] +fn test_suffix_length_req() { + new_ucmd!() + .args(&["-n", "100", "-a", "1", "asciilowercase.txt"]) + .fails() + .stderr_only("split: the suffix length needs to be at least 2\n"); +} + #[test] fn test_verbose() { new_ucmd!() @@ -937,11 +932,11 @@ fn test_number_n() { s }; ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds(); - assert_eq!(file_read("xaa"), "abcde"); - assert_eq!(file_read("xab"), "fghij"); - assert_eq!(file_read("xac"), "klmno"); - assert_eq!(file_read("xad"), "pqrst"); - assert_eq!(file_read("xae"), "uvwxyz\n"); + assert_eq!(file_read("xaa"), "abcdef"); + assert_eq!(file_read("xab"), "ghijkl"); + assert_eq!(file_read("xac"), "mnopq"); + assert_eq!(file_read("xad"), "rstuv"); + assert_eq!(file_read("xae"), "wxyz\n"); #[cfg(unix)] new_ucmd!() .args(&["--number=100", "/dev/null"]) @@ -954,11 +949,11 @@ fn test_number_kth_of_n() { new_ucmd!() .args(&["--number=3/5", "asciilowercase.txt"]) .succeeds() - .stdout_only("klmno"); + .stdout_only("mnopq"); new_ucmd!() .args(&["--number=5/5", "asciilowercase.txt"]) .succeeds() - .stdout_only("uvwxyz\n"); + .stdout_only("wxyz\n"); new_ucmd!() .args(&["-e", "--number=99/100", "asciilowercase.txt"]) .succeeds() @@ -1046,11 +1041,11 @@ fn test_split_number_with_io_blksize() { }; ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"]) .succeeds(); - assert_eq!(file_read("xaa"), "abcde"); - assert_eq!(file_read("xab"), "fghij"); - assert_eq!(file_read("xac"), "klmno"); - assert_eq!(file_read("xad"), "pqrst"); - assert_eq!(file_read("xae"), "uvwxyz\n"); + assert_eq!(file_read("xaa"), "abcdef"); + assert_eq!(file_read("xab"), "ghijkl"); + assert_eq!(file_read("xac"), "mnopq"); + assert_eq!(file_read("xad"), "rstuv"); + assert_eq!(file_read("xae"), "wxyz\n"); } #[test] @@ -1065,6 +1060,32 @@ fn test_split_default_with_io_blksize() { assert_eq!(glob.collate(), at.read_bytes(name)); } +#[test] +fn test_split_invalid_io_blksize() { + new_ucmd!() + .args(&["---io-blksize=XYZ", "threebytes.txt"]) + .fails() + .stderr_only("split: invalid IO block size: 'XYZ'\n"); + new_ucmd!() + .args(&["---io-blksize=5000000000", "threebytes.txt"]) + .fails() + .stderr_only("split: invalid IO block size: '5000000000'\n"); + #[cfg(target_pointer_width = "32")] + new_ucmd!() + .args(&["---io-blksize=2146435072", "threebytes.txt"]) + .fails() + .stderr_only("split: invalid IO block size: '2146435072'\n"); +} + +#[test] +fn test_split_number_oversized_stdin() { + new_ucmd!() + .args(&["--number=3", "---io-blksize=600"]) + .pipe_in_fixture("sixhundredfiftyonebytes.txt") + .fails() + .stderr_only("split: -: cannot determine input size\n"); +} + #[test] fn test_invalid_suffix_length() { new_ucmd!() @@ -1157,6 +1178,18 @@ fn test_elide_dev_null() { assert!(!at.plus("xac").exists()); } +#[test] +#[cfg(unix)] +fn test_dev_zero() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "3", "/dev/zero"]) + .fails() + .stderr_only("split: /dev/zero: cannot determine file size\n"); + assert!(!at.plus("xaa").exists()); + assert!(!at.plus("xab").exists()); + assert!(!at.plus("xac").exists()); +} + #[test] fn test_lines() { let (at, mut ucmd) = at_and_ucmd!(); @@ -1182,6 +1215,15 @@ fn test_lines_kth() { .stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n"); } +#[test] +#[cfg(unix)] +fn test_lines_kth_dev_null() { + new_ucmd!() + .args(&["-n", "l/3/10", "/dev/null"]) + .succeeds() + .stdout_only(""); +} + #[test] fn test_line_bytes() { let (at, mut ucmd) = at_and_ucmd!(); @@ -1321,7 +1363,7 @@ fn test_numeric_suffix() { } #[test] -fn test_numeric_suffix_alias() { +fn test_numeric_suffix_inferred() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-n", "4", "--numeric=9", "threebytes.txt"]) .succeeds() From 8e796d3bb679bf7c064baca79b2ee0664b90762e Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 19 Nov 2023 15:10:28 +0100 Subject: [PATCH 041/429] mv: fix issue with -T and dest ending with "/" --- src/uu/mv/src/mv.rs | 2 +- tests/by-util/test_mv.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 036024f9907..f0529af7e6c 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -341,7 +341,7 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> let target_is_dir = target.is_dir(); - if path_ends_with_terminator(target) && !target_is_dir { + if path_ends_with_terminator(target) && !target_is_dir && !opts.no_target_dir { return Err(MvError::FailedToAccessNotADirectory(target.quote().to_string()).into()); } diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index c54d24ea906..571de769125 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -1158,6 +1158,32 @@ fn test_mv_overwrite_dir() { assert!(at.dir_exists(dir_b)); } +#[test] +fn test_mv_no_target_dir_with_dest_not_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + let dir_a = "a"; + let dir_b = "b"; + + at.mkdir(dir_a); + ucmd.arg("-T").arg(dir_a).arg(dir_b).succeeds().no_output(); + + assert!(!at.dir_exists(dir_a)); + assert!(at.dir_exists(dir_b)); +} + +#[test] +fn test_mv_no_target_dir_with_dest_not_existing_and_ending_with_slash() { + let (at, mut ucmd) = at_and_ucmd!(); + let dir_a = "a"; + let dir_b = "b/"; + + at.mkdir(dir_a); + ucmd.arg("-T").arg(dir_a).arg(dir_b).succeeds().no_output(); + + assert!(!at.dir_exists(dir_a)); + assert!(at.dir_exists(dir_b)); +} + #[test] fn test_mv_overwrite_nonempty_dir() { let (at, mut ucmd) = at_and_ucmd!(); From 285e580b6eb9475e5d61645594b7a75c13073eed Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 19 Nov 2023 15:59:38 +0100 Subject: [PATCH 042/429] cp: improve error msg if -r is not specified --- src/uu/cp/src/copydir.rs | 2 +- tests/by-util/test_cp.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/uu/cp/src/copydir.rs b/src/uu/cp/src/copydir.rs index 763d66c0b03..a903ed2aaff 100644 --- a/src/uu/cp/src/copydir.rs +++ b/src/uu/cp/src/copydir.rs @@ -324,7 +324,7 @@ pub(crate) fn copy_directory( source_in_command_line: bool, ) -> CopyResult<()> { if !options.recursive { - return Err(format!("omitting directory {}", root.quote()).into()); + return Err(format!("-r not specified; omitting directory {}", root.quote()).into()); } // if no-dereference is enabled and this is a symlink, copy it as a file diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index c8761fab8fb..14b68da3718 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -131,7 +131,9 @@ fn test_cp_directory_not_recursive() { .arg(TEST_COPY_TO_FOLDER) .arg(TEST_HELLO_WORLD_DEST) .fails() - .stderr_contains("omitting directory"); + .stderr_is(format!( + "cp: -r not specified; omitting directory '{TEST_COPY_TO_FOLDER}'\n" + )); } #[test] From 58087df02a30bb4c7d6e474dfeee20b9faa51b93 Mon Sep 17 00:00:00 2001 From: clara swanson <69856940+cswn@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:16:18 +0100 Subject: [PATCH 043/429] stdbuf: remove crash macro (#5549) * stdbuf: remove crash! macro * stdbuf: change target_vendor back to apple * tests/stdbuf: change stderr_only to usage_error in test_stdbuf_invalid_mode_fails * stdbuf: add exit code to check_option * stdbuf: remove set_exit_code line from error --- src/uu/stdbuf/src/stdbuf.rs | 29 +++++++++++++++-------------- tests/by-util/test_stdbuf.rs | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index 8578282752e..38c4451ca50 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -7,7 +7,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::fs::File; -use std::io::{self, Write}; +use std::io::Write; use std::os::unix::process::ExitStatusExt; use std::path::PathBuf; use std::process; @@ -15,7 +15,7 @@ use tempfile::tempdir; use tempfile::TempDir; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::parse_size::parse_size_u64; -use uucore::{crash, format_usage, help_about, help_section, help_usage}; +use uucore::{format_usage, help_about, help_section, help_usage}; const ABOUT: &str = help_about!("stdbuf.md"); const USAGE: &str = help_usage!("stdbuf.md"); @@ -66,13 +66,13 @@ struct ProgramOptionsError(String); target_os = "netbsd", target_os = "dragonflybsd" ))] -fn preload_strings() -> (&'static str, &'static str) { - ("LD_PRELOAD", "so") +fn preload_strings() -> UResult<(&'static str, &'static str)> { + Ok(("LD_PRELOAD", "so")) } #[cfg(target_vendor = "apple")] -fn preload_strings() -> (&'static str, &'static str) { - ("DYLD_LIBRARY_PATH", "dylib") +fn preload_strings() -> UResult<(&'static str, &'static str)> { + Ok(("DYLD_LIBRARY_PATH", "dylib")) } #[cfg(not(any( @@ -83,10 +83,11 @@ fn preload_strings() -> (&'static str, &'static str) { target_os = "dragonflybsd", target_vendor = "apple" )))] -fn preload_strings() -> (&'static str, &'static str) { - use uucore::crash; - - crash!(1, "Command not supported for this operating system!") +fn preload_strings() -> UResult<(&'static str, &'static str)> { + Err(USimpleError::new( + 1, + "Command not supported for this operating system!", + )) } fn check_option(matches: &ArgMatches, name: &str) -> Result { @@ -102,7 +103,7 @@ fn check_option(matches: &ArgMatches, name: &str) -> Result parse_size_u64(x).map_or_else( - |e| crash!(125, "invalid mode {}", e), + |e| Err(ProgramOptionsError(format!("invalid mode {e}"))), |m| { Ok(BufferType::Size(m.try_into().map_err(|_| { ProgramOptionsError(format!( @@ -128,8 +129,8 @@ fn set_command_env(command: &mut process::Command, buffer_name: &str, buffer_typ } } -fn get_preload_env(tmp_dir: &TempDir) -> io::Result<(String, PathBuf)> { - let (preload, extension) = preload_strings(); +fn get_preload_env(tmp_dir: &TempDir) -> UResult<(String, PathBuf)> { + let (preload, extension) = preload_strings()?; let inject_path = tmp_dir.path().join("libstdbuf").with_extension(extension); let mut file = File::create(&inject_path)?; @@ -151,7 +152,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let command_params: Vec<&str> = command_values.map(|s| s.as_ref()).collect(); let tmp_dir = tempdir().unwrap(); - let (preload_env, libstdbuf) = get_preload_env(&tmp_dir).map_err_context(String::new)?; + let (preload_env, libstdbuf) = get_preload_env(&tmp_dir)?; command.env(preload_env, libstdbuf); set_command_env(&mut command, "_STDBUF_I", &options.stdin); set_command_env(&mut command, "_STDBUF_O", &options.stdout); diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 9a67dad9e37..50de4c54696 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -65,7 +65,7 @@ fn test_stdbuf_invalid_mode_fails() { .args(&[*option, "1024R", "head"]) .fails() .code_is(125) - .stderr_only("stdbuf: invalid mode '1024R': Value too large for defined data type\n"); + .usage_error("invalid mode '1024R': Value too large for defined data type"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&[*option, "1Y", "head"]) From c43ee01d1920eeeb98d2cc38f61ef217ef89cf12 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 12:38:26 +0100 Subject: [PATCH 044/429] printf: allow precision in string --- src/uucore/src/lib/features/format/spec.rs | 23 ++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 06f0ca1d6e7..0cd0f03b4c3 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -17,6 +17,7 @@ pub enum Spec { }, String { width: Option>, + precision: Option>, parse_escape: bool, align_left: bool, }, @@ -159,11 +160,13 @@ impl Spec { }, b's' => Spec::String { width, + precision, parse_escape: false, align_left: minus, }, b'b' => Spec::String { width, + precision, parse_escape: true, align_left: minus, }, @@ -254,10 +257,12 @@ impl Spec { } &Spec::String { width, + precision, parse_escape, align_left, } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(precision, &mut args)?; let arg = next_arg(&mut args)?; let Some(s) = arg.get_str() else { return Err(FormatError::InvalidArgument(arg.clone())); @@ -273,15 +278,29 @@ impl Spec { } }; } + // GNU does do this truncation on a byte level, see for instance: + // printf "%.1s" 🙃 + // > � + // For now, we let printf panic when we truncate within a code point. + // TODO: We need to not use Rust's formatting for aligning the output, + // so that we can just write bytes to stdout without panicking. + let truncated = match precision { + Some(p) if p < parsed.len() => &parsed[..p], + _ => &parsed, + }; write_padded( writer, - std::str::from_utf8(&parsed).expect("TODO: Accept invalid utf8"), + std::str::from_utf8(&truncated).expect("TODO: Accept invalid utf8"), width, false, align_left, ) } else { - write_padded(writer, s, width, false, align_left) + let truncated = match precision { + Some(p) if p < s.len() => &s[..p], + _ => s, + }; + write_padded(writer, truncated, width, false, align_left) } } &Spec::SignedInt { From 066d8ba73d0a08eb47c1fea9e1d446b47fbca1f6 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 13:25:20 +0100 Subject: [PATCH 045/429] printf: coerce missing and invalid arguments to 0 --- .../src/lib/features/format/argument.rs | 113 ++++++++++-------- src/uucore/src/lib/features/format/spec.rs | 52 ++------ 2 files changed, 75 insertions(+), 90 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 120b59aa4b5..96cfeddf3f6 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -9,84 +9,103 @@ pub enum FormatArgument { Unparsed(String), } -impl FormatArgument { - pub fn get_char(&self) -> Option { - match self { - Self::Char(c) => Some(*c), - Self::Unparsed(s) => { +pub trait ArgumentIter<'a>: Iterator { + fn get_char(&mut self) -> char; + fn get_i64(&mut self) -> i64; + fn get_u64(&mut self) -> u64; + fn get_f64(&mut self) -> f64; + fn get_str(&mut self) -> &'a str; +} + +impl<'a, T: Iterator> ArgumentIter<'a> for T { + fn get_char(&mut self) -> char { + let Some(next) = self.next() else { + return '\0'; + }; + match next { + FormatArgument::Char(c) => *c, + FormatArgument::Unparsed(s) => { let mut chars = s.chars(); let Some(c) = chars.next() else { - return None; + return '\0'; }; let None = chars.next() else { - return None; + return '\0'; }; - Some(c) + c } - _ => None, + _ => '\0', } } - pub fn get_u64(&self) -> Option { - match self { - Self::UnsignedInt(n) => Some(*n), - Self::Unparsed(s) => { - if let Some(s) = s.strip_prefix("0x") { - u64::from_str_radix(s, 16).ok() - } else if let Some(s) = s.strip_prefix("0") { - u64::from_str_radix(s, 8).ok() - } else if let Some(s) = s.strip_prefix('\'') { - Some(s.chars().next()? as u64) - } else { - s.parse().ok() - } + fn get_u64(&mut self) -> u64 { + let Some(next) = self.next() else { + return 0; + }; + match next { + FormatArgument::UnsignedInt(n) => *n, + FormatArgument::Unparsed(s) => if let Some(s) = s.strip_prefix("0x") { + u64::from_str_radix(s, 16).ok() + } else if let Some(s) = s.strip_prefix("0") { + u64::from_str_radix(s, 8).ok() + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|c| c as u64) + } else { + s.parse().ok() } - _ => None, + .unwrap_or(0), + _ => 0, } } - pub fn get_i64(&self) -> Option { - match self { - Self::SignedInt(n) => Some(*n), - Self::Unparsed(s) => { + fn get_i64(&mut self) -> i64 { + let Some(next) = self.next() else { + return 0; + }; + match next { + FormatArgument::SignedInt(n) => *n, + FormatArgument::Unparsed(s) => { // For hex, we parse `u64` because we do not allow another // minus sign. We might need to do more precise parsing here. if let Some(s) = s.strip_prefix("-0x") { - Some(- (u64::from_str_radix(s, 16).ok()? as i64)) + u64::from_str_radix(s, 16).ok().map(|x| -(x as i64)) } else if let Some(s) = s.strip_prefix("0x") { - Some(u64::from_str_radix(s, 16).ok()? as i64) + u64::from_str_radix(s, 16).ok().map(|x| x as i64) } else if s.starts_with("-0") || s.starts_with('0') { i64::from_str_radix(s, 8).ok() } else if let Some(s) = s.strip_prefix('\'') { - Some(s.chars().next()? as i64) + s.chars().next().map(|x| x as i64) } else { s.parse().ok() } + .unwrap_or(0) } - _ => None, + _ => 0, } } - pub fn get_f64(&self) -> Option { - match self { - Self::Float(n) => Some(*n), - Self::Unparsed(s) => { - if s.starts_with("0x") || s.starts_with("-0x") { - unimplemented!("Hexadecimal floats are unimplemented!") - } else if let Some(s) = s.strip_prefix('\'') { - Some(s.chars().next()? as u64 as f64) - } else { - s.parse().ok() - } + fn get_f64(&mut self) -> f64 { + let Some(next) = self.next() else { + return 0.0; + }; + match next { + FormatArgument::Float(n) => *n, + FormatArgument::Unparsed(s) => if s.starts_with("0x") || s.starts_with("-0x") { + unimplemented!("Hexadecimal floats are unimplemented!") + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|x| x as u64 as f64) + } else { + s.parse().ok() } - _ => None, + .unwrap_or(0.0), + _ => 0.0, } } - pub fn get_str(&self) -> Option<&str> { - match self { - Self::Unparsed(s) | Self::String(s) => Some(s), - _ => None, + fn get_str(&mut self) -> &'a str { + match self.next() { + Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s, + _ => "", } } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 0cd0f03b4c3..23c68c06669 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -5,7 +5,7 @@ use super::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, - parse_escape_only, FormatArgument, FormatChar, FormatError, + parse_escape_only, ArgumentIter, FormatChar, FormatError, }; use std::{fmt::Display, io::Write, ops::ControlFlow}; @@ -244,16 +244,12 @@ impl Spec { pub fn write<'a>( &self, writer: impl Write, - mut args: impl Iterator, + mut args: impl ArgumentIter<'a>, ) -> Result<(), FormatError> { match self { &Spec::Char { width, align_left } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let arg = next_arg(&mut args)?; - match arg.get_char() { - Some(c) => write_padded(writer, c, width, false, align_left), - _ => Err(FormatError::InvalidArgument(arg.clone())), - } + write_padded(writer, args.get_char(), width, false, align_left) } &Spec::String { width, @@ -263,10 +259,7 @@ impl Spec { } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(precision, &mut args)?; - let arg = next_arg(&mut args)?; - let Some(s) = arg.get_str() else { - return Err(FormatError::InvalidArgument(arg.clone())); - }; + let s = args.get_str(); if parse_escape { let mut parsed = Vec::new(); for c in parse_escape_only(s.as_bytes()) { @@ -311,11 +304,7 @@ impl Spec { } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); - - let arg = next_arg(&mut args)?; - let Some(i) = arg.get_i64() else { - return Err(FormatError::InvalidArgument(arg.clone())); - }; + let i = args.get_i64(); num_format::SignedInt { width, @@ -334,11 +323,7 @@ impl Spec { } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); - - let arg = next_arg(args)?; - let Some(i) = arg.get_u64() else { - return Err(FormatError::InvalidArgument(arg.clone())); - }; + let i = args.get_u64(); num_format::UnsignedInt { variant, @@ -360,11 +345,7 @@ impl Spec { } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); - - let arg = next_arg(args)?; - let Some(f) = arg.get_f64() else { - return Err(FormatError::InvalidArgument(arg.clone())); - }; + let f = args.get_f64(); num_format::Float { variant, @@ -384,30 +365,15 @@ impl Spec { fn resolve_asterisk<'a>( option: Option>, - args: impl Iterator, + mut args: impl ArgumentIter<'a>, ) -> Result, FormatError> { Ok(match option { None => None, - Some(CanAsterisk::Asterisk) => { - let arg = next_arg(args)?; - match arg.get_u64() { - Some(u) => match usize::try_from(u) { - Ok(u) => Some(u), - Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), - }, - _ => return Err(FormatError::InvalidArgument(arg.clone())), - } - } + Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)), Some(CanAsterisk::Fixed(w)) => Some(w), }) } -fn next_arg<'a>( - mut arguments: impl Iterator, -) -> Result<&'a FormatArgument, FormatError> { - arguments.next().ok_or(FormatError::NoMoreArguments) -} - fn write_padded( mut writer: impl Write, text: impl Display, From 68d036c9a299282f77143728063f2c7d52b67c5b Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 13:45:02 +0100 Subject: [PATCH 046/429] printf: basic support for unicode escape sequences --- src/uucore/src/lib/features/format/escape.rs | 49 ++++++++++++++------ src/uucore/src/lib/features/format/mod.rs | 15 +++--- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs index b8c21741caf..1e06a8176dd 100644 --- a/src/uucore/src/lib/features/format/escape.rs +++ b/src/uucore/src/lib/features/format/escape.rs @@ -1,6 +1,7 @@ #[derive(Debug)] pub enum EscapedChar { - Char(u8), + Byte(u8), + Char(char), Backslash(u8), End, } @@ -61,6 +62,24 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option { Some(ret) } +/// Parse `\uHHHH` and `\UHHHHHHHH` +// TODO: This should print warnings and possibly halt execution when it fails to parse +// TODO: If the character cannot be converted to u32, the input should be printed. +fn parse_unicode(input: &mut &[u8], digits: u8) -> Option { + let (c, rest) = input.split_first()?; + let mut ret = Base::Hex.to_digit(*c)? as u32; + *input = &rest[..]; + + for _ in 1..digits { + let (c, rest) = input.split_first()?; + let n = Base::Hex.to_digit(*c)?; + ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32); + *input = &rest[..]; + } + + char::from_u32(ret) +} + pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { if let [c, new_rest @ ..] = rest { // This is for the \NNN syntax for octal sequences. @@ -68,33 +87,35 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { // would be the \0NNN syntax. if let b'1'..=b'7' = c { if let Some(parsed) = parse_code(rest, Base::Oct) { - return EscapedChar::Char(parsed); + return EscapedChar::Byte(parsed); } } *rest = &new_rest[..]; match c { - b'\\' => EscapedChar::Char(b'\\'), - b'a' => EscapedChar::Char(b'\x07'), - b'b' => EscapedChar::Char(b'\x08'), + b'\\' => EscapedChar::Byte(b'\\'), + b'a' => EscapedChar::Byte(b'\x07'), + b'b' => EscapedChar::Byte(b'\x08'), b'c' => return EscapedChar::End, - b'e' => EscapedChar::Char(b'\x1b'), - b'f' => EscapedChar::Char(b'\x0c'), - b'n' => EscapedChar::Char(b'\n'), - b'r' => EscapedChar::Char(b'\r'), - b't' => EscapedChar::Char(b'\t'), - b'v' => EscapedChar::Char(b'\x0b'), + b'e' => EscapedChar::Byte(b'\x1b'), + b'f' => EscapedChar::Byte(b'\x0c'), + b'n' => EscapedChar::Byte(b'\n'), + b'r' => EscapedChar::Byte(b'\r'), + b't' => EscapedChar::Byte(b'\t'), + b'v' => EscapedChar::Byte(b'\x0b'), b'x' => { if let Some(c) = parse_code(rest, Base::Hex) { - EscapedChar::Char(c) + EscapedChar::Byte(c) } else { EscapedChar::Backslash(b'x') } } - b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')), + b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')), c => EscapedChar::Backslash(*c), } } else { - EscapedChar::Char(b'\\') + EscapedChar::Byte(b'\\') } } diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 47e6fa4cc0b..cfa9a034fd7 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -19,11 +19,12 @@ // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -mod escape; mod argument; +mod escape; pub mod num_format; mod spec; +pub use argument::*; use spec::Spec; use std::{ error::Error, @@ -31,7 +32,6 @@ use std::{ io::{stdout, Write}, ops::ControlFlow, }; -pub use argument::*; use crate::error::UError; @@ -91,9 +91,12 @@ impl FormatChar for u8 { impl FormatChar for EscapedChar { fn write(&self, mut writer: impl Write) -> std::io::Result> { match self { - EscapedChar::Char(c) => { + EscapedChar::Byte(c) => { writer.write(&[*c])?; } + EscapedChar::Char(c) => { + write!(writer, "{c}")?; + } EscapedChar::Backslash(c) => { writer.write(&[b'\\', *c])?; } @@ -125,7 +128,7 @@ pub fn parse_spec_and_escape( [] => return None, [b'%', b'%', rest @ ..] => { current = rest; - Some(Ok(FormatItem::Char(EscapedChar::Char(b'%')))) + Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) } [b'%', rest @ ..] => { current = rest; @@ -141,7 +144,7 @@ pub fn parse_spec_and_escape( } [c, rest @ ..] => { current = rest; - Some(Ok(FormatItem::Char(EscapedChar::Char(*c)))) + Some(Ok(FormatItem::Char(EscapedChar::Byte(*c)))) } }) } @@ -179,7 +182,7 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { } [c, rest @ ..] => { current = rest; - Some(EscapedChar::Char(*c)) + Some(EscapedChar::Byte(*c)) } }) } From 90b61a8c5cb5768f93ef935cb6ee4c6d328ab128 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 20 Nov 2023 22:17:57 +0100 Subject: [PATCH 047/429] fuzzing: add a variable to state if we know it fails or not --- .github/workflows/fuzzing.yml | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index e7a9cb1e329..cbb0574dee2 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -37,16 +37,13 @@ jobs: strategy: matrix: test-target: - [ - fuzz_date, - fuzz_test, - fuzz_expr, - fuzz_parse_glob, - fuzz_parse_size, - fuzz_parse_time, - # adding more fuzz tests here. - # e.g. fuzz_test_a, - ] + - { name: fuzz_test, should_pass: true } + # https://github.com/uutils/coreutils/issues/5311 + - { name: fuzz_date, should_pass: false } + - { name: fuzz_expr, should_pass: true } + - { name: fuzz_parse_glob, should_pass: true } + - { name: fuzz_parse_size, should_pass: true } + - { name: fuzz_parse_time, should_pass: true } steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -59,16 +56,17 @@ jobs: - name: Restore Cached Corpus uses: actions/cache/restore@v3 with: - key: corpus-cache-${{ matrix.test-target }} + key: corpus-cache-${{ matrix.test-target.name }} path: | - fuzz/corpus/${{ matrix.test-target }} - - name: Run ${{ matrix.test-target }} for XX seconds + fuzz/corpus/${{ matrix.test-target.name }} + - name: Run ${{ matrix.test-target.name }} for XX seconds shell: bash + continue-on-error: ${{ !matrix.test-target.name.should_pass }} run: | - cargo +nightly fuzz run ${{ matrix.test-target }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0 + cargo +nightly fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0 - name: Save Corpus Cache uses: actions/cache/save@v3 with: - key: corpus-cache-${{ matrix.test-target }} + key: corpus-cache-${{ matrix.test-target.name }} path: | - fuzz/corpus/${{ matrix.test-target }} + fuzz/corpus/${{ matrix.test-target.name }} From 5c04283d6ee1a3fc7d4f7f88476809e760933f3c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 13:54:58 +0100 Subject: [PATCH 048/429] printf: address fmt, clippy, spelling and failing test --- src/uu/printf/src/printf.rs | 17 +- src/uu/seq/src/extendedbigdecimal.rs | 2 +- src/uu/seq/src/number.rs | 12 +- src/uu/seq/src/seq.rs | 4 +- .../src/lib/features/format/argument.rs | 65 ++- src/uucore/src/lib/features/format/escape.rs | 29 +- src/uucore/src/lib/features/format/mod.rs | 65 +-- .../src/lib/features/format/num_format.rs | 116 +++--- src/uucore/src/lib/features/format/spec.rs | 380 ++++++++++-------- 9 files changed, 379 insertions(+), 311 deletions(-) diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 663411b8952..cfb0315cfb0 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -10,9 +10,9 @@ use std::io::stdout; use std::ops::ControlFlow; use clap::{crate_version, Arg, ArgAction, Command}; -use uucore::error::{UResult, UUsageError}; +use uucore::error::{UError, UResult, UUsageError}; use uucore::format::{parse_spec_and_escape, FormatArgument}; -use uucore::{format_usage, help_about, help_section, help_usage}; +use uucore::{format_usage, help_about, help_section, help_usage, show}; const VERSION: &str = "version"; const HELP: &str = "help"; @@ -49,10 +49,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { while args.peek().is_some() { for item in parse_spec_and_escape(format_string.as_ref()) { - match item?.write(stdout(), &mut args)? { - ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => return Ok(()), - }; + match item { + Ok(item) => { + match item.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => return Ok(()), + }; + } + Err(e) => show!(e), + } } } Ok(()) diff --git a/src/uu/seq/src/extendedbigdecimal.rs b/src/uu/seq/src/extendedbigdecimal.rs index ecd460ceb73..4f9a0415218 100644 --- a/src/uu/seq/src/extendedbigdecimal.rs +++ b/src/uu/seq/src/extendedbigdecimal.rs @@ -70,7 +70,7 @@ pub enum ExtendedBigDecimal { impl ExtendedBigDecimal { #[cfg(test)] pub fn zero() -> Self { - Self::BigDecimal(1.into()) + Self::BigDecimal(0.into()) } pub fn one() -> Self { diff --git a/src/uu/seq/src/number.rs b/src/uu/seq/src/number.rs index 4da1146eff6..182431a9210 100644 --- a/src/uu/seq/src/number.rs +++ b/src/uu/seq/src/number.rs @@ -3,12 +3,6 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore extendedbigdecimal extendedbigint -//! A type to represent the possible start, increment, and end values for seq. -//! -//! The [`Number`] enumeration represents the possible values for the -//! start, increment, and end values for `seq`. These may be integers, -//! floating point numbers, negative zero, etc. A [`Number`] can be -//! parsed from a string by calling [`str::parse`]. use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; @@ -29,7 +23,11 @@ pub struct PreciseNumber { } impl PreciseNumber { - pub fn new(number: ExtendedBigDecimal, num_integral_digits: usize, num_fractional_digits: usize) -> Self { + pub fn new( + number: ExtendedBigDecimal, + num_integral_digits: usize, + num_fractional_digits: usize, + ) -> Self { Self { number, num_integral_digits, diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index a987405ce15..05338864545 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -122,7 +122,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { &options.terminator, options.equal_width, padding, - format, + &format, ); match result { Ok(_) => Ok(()), @@ -203,7 +203,7 @@ fn print_seq( terminator: &str, pad: bool, padding: usize, - format: Option>, + format: &Option>, ) -> std::io::Result<()> { let stdout = stdout(); let mut stdout = stdout.lock(); diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 96cfeddf3f6..6370c4177e8 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -1,3 +1,7 @@ +use os_display::Quotable; + +use crate::{error::set_exit_code, show_warning}; + #[derive(Clone, Debug)] pub enum FormatArgument { Char(char), @@ -44,16 +48,25 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => if let Some(s) = s.strip_prefix("0x") { - u64::from_str_radix(s, 16).ok() - } else if let Some(s) = s.strip_prefix("0") { - u64::from_str_radix(s, 8).ok() - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|c| c as u64) - } else { - s.parse().ok() + FormatArgument::Unparsed(s) => { + let opt = if let Some(s) = s.strip_prefix("0x") { + u64::from_str_radix(s, 16).ok() + } else if let Some(s) = s.strip_prefix('0') { + u64::from_str_radix(s, 8).ok() + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|c| c as u64) + } else { + s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0 + } + } } - .unwrap_or(0), _ => 0, } } @@ -67,7 +80,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { FormatArgument::Unparsed(s) => { // For hex, we parse `u64` because we do not allow another // minus sign. We might need to do more precise parsing here. - if let Some(s) = s.strip_prefix("-0x") { + let opt = if let Some(s) = s.strip_prefix("-0x") { u64::from_str_radix(s, 16).ok().map(|x| -(x as i64)) } else if let Some(s) = s.strip_prefix("0x") { u64::from_str_radix(s, 16).ok().map(|x| x as i64) @@ -77,8 +90,15 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { s.chars().next().map(|x| x as i64) } else { s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0 + } } - .unwrap_or(0) } _ => 0, } @@ -90,14 +110,23 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => if s.starts_with("0x") || s.starts_with("-0x") { - unimplemented!("Hexadecimal floats are unimplemented!") - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|x| x as u64 as f64) - } else { - s.parse().ok() + FormatArgument::Unparsed(s) => { + let opt = if s.starts_with("0x") || s.starts_with("-0x") { + unimplemented!("Hexadecimal floats are unimplemented!") + } else if let Some(s) = s.strip_prefix('\'') { + s.chars().next().map(|x| x as u64 as f64) + } else { + s.parse().ok() + }; + match opt { + Some(n) => n, + None => { + show_warning!("{}: expected a numeric value", s.quote()); + set_exit_code(1); + 0.0 + } + } } - .unwrap_or(0.0), _ => 0.0, } } diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs index 1e06a8176dd..188dd1892b5 100644 --- a/src/uucore/src/lib/features/format/escape.rs +++ b/src/uucore/src/lib/features/format/escape.rs @@ -21,16 +21,16 @@ impl Base { } } - fn to_digit(&self, c: u8) -> Option { + fn convert_digit(&self, c: u8) -> Option { match self { - Base::Oct => { + Self::Oct => { if matches!(c, b'0'..=b'7') { Some(c - b'0') } else { None } } - Base::Hex => match c { + Self::Hex => match c { b'0'..=b'9' => Some(c - b'0'), b'A'..=b'F' => Some(c - b'A' + 10), b'a'..=b'f' => Some(c - b'a' + 10), @@ -49,32 +49,35 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option { // yield incorrect results because it will interpret values larger than // `u8::MAX` as unicode. let [c, rest @ ..] = input else { return None }; - let mut ret = base.to_digit(*c)?; - *input = &rest[..]; + let mut ret = base.convert_digit(*c)?; + *input = rest; for _ in 1..base.max_digits() { let [c, rest @ ..] = input else { break }; - let Some(n) = base.to_digit(*c) else { break }; + let Some(n) = base.convert_digit(*c) else { + break; + }; ret = ret.wrapping_mul(base as u8).wrapping_add(n); - *input = &rest[..]; + *input = rest; } Some(ret) } +// spell-checker:disable-next /// Parse `\uHHHH` and `\UHHHHHHHH` // TODO: This should print warnings and possibly halt execution when it fails to parse // TODO: If the character cannot be converted to u32, the input should be printed. fn parse_unicode(input: &mut &[u8], digits: u8) -> Option { let (c, rest) = input.split_first()?; - let mut ret = Base::Hex.to_digit(*c)? as u32; - *input = &rest[..]; + let mut ret = Base::Hex.convert_digit(*c)? as u32; + *input = rest; for _ in 1..digits { let (c, rest) = input.split_first()?; - let n = Base::Hex.to_digit(*c)?; + let n = Base::Hex.convert_digit(*c)?; ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32); - *input = &rest[..]; + *input = rest; } char::from_u32(ret) @@ -91,12 +94,12 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { } } - *rest = &new_rest[..]; + *rest = new_rest; match c { b'\\' => EscapedChar::Byte(b'\\'), b'a' => EscapedChar::Byte(b'\x07'), b'b' => EscapedChar::Byte(b'\x08'), - b'c' => return EscapedChar::End, + b'c' => EscapedChar::End, b'e' => EscapedChar::Byte(b'\x1b'), b'f' => EscapedChar::Byte(b'\x0c'), b'n' => EscapedChar::Byte(b'\n'), diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index cfa9a034fd7..9045b8b90c3 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -42,10 +42,13 @@ use self::{ #[derive(Debug)] pub enum FormatError { - SpecError, + SpecError(Vec), IoError(std::io::Error), NoMoreArguments, InvalidArgument(FormatArgument), + TooManySpecs, + NeedAtLeastOneSpec, + WrongSpecType, } impl Error for FormatError {} @@ -53,18 +56,26 @@ impl UError for FormatError {} impl From for FormatError { fn from(value: std::io::Error) -> Self { - FormatError::IoError(value) + Self::IoError(value) } } impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO: Be more precise about these match self { - FormatError::SpecError => write!(f, "invalid spec"), - FormatError::IoError(_) => write!(f, "io error"), - FormatError::NoMoreArguments => write!(f, "no more arguments"), - FormatError::InvalidArgument(_) => write!(f, "invalid argument"), + Self::SpecError(s) => write!( + f, + "%{}: invalid conversion specification", + String::from_utf8_lossy(s) + ), + // TODO: The next two should print the spec as well + Self::TooManySpecs => write!(f, "format has too many % directives"), + Self::NeedAtLeastOneSpec => write!(f, "format has no % directive"), + // TODO: Error message below needs some work + Self::WrongSpecType => write!(f, "wrong % directive type was given"), + Self::IoError(_) => write!(f, "io error"), + Self::NoMoreArguments => write!(f, "no more arguments"), + Self::InvalidArgument(_) => write!(f, "invalid argument"), } } } @@ -83,7 +94,7 @@ pub trait FormatChar { impl FormatChar for u8 { fn write(&self, mut writer: impl Write) -> std::io::Result> { - writer.write(&[*self])?; + writer.write_all(&[*self])?; Ok(ControlFlow::Continue(())) } } @@ -91,16 +102,16 @@ impl FormatChar for u8 { impl FormatChar for EscapedChar { fn write(&self, mut writer: impl Write) -> std::io::Result> { match self { - EscapedChar::Byte(c) => { - writer.write(&[*c])?; + Self::Byte(c) => { + writer.write_all(&[*c])?; } - EscapedChar::Char(c) => { + Self::Char(c) => { write!(writer, "{c}")?; } - EscapedChar::Backslash(c) => { - writer.write(&[b'\\', *c])?; + Self::Backslash(c) => { + writer.write_all(&[b'\\', *c])?; } - EscapedChar::End => return Ok(ControlFlow::Break(())), + Self::End => return Ok(ControlFlow::Break(())), } Ok(ControlFlow::Continue(())) } @@ -113,8 +124,8 @@ impl FormatItem { args: &mut impl Iterator, ) -> Result, FormatError> { match self { - FormatItem::Spec(spec) => spec.write(writer, args)?, - FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError), + Self::Spec(spec) => spec.write(writer, args)?, + Self::Char(c) => return c.write(writer).map_err(FormatError::IoError), }; Ok(ControlFlow::Continue(())) } @@ -125,7 +136,7 @@ pub fn parse_spec_and_escape( ) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) @@ -133,8 +144,8 @@ pub fn parse_spec_and_escape( [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { - Some(spec) => spec, - None => return Some(Err(FormatError::SpecError)), + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } @@ -152,7 +163,7 @@ pub fn parse_spec_and_escape( fn parse_spec_only(fmt: &[u8]) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(b'%'))) @@ -160,8 +171,8 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { - Some(spec) => spec, - None => return Some(Err(FormatError::SpecError)), + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } @@ -175,7 +186,7 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { let mut current = fmt; std::iter::from_fn(move || match current { - [] => return None, + [] => None, [b'\\', rest @ ..] => { current = rest; Some(parse_escape_code(&mut current)) @@ -248,8 +259,8 @@ pub fn sprintf<'a>( /// A parsed format for a single float value /// -/// This is used by `seq`. It can be constructed with [`FloatFormat::parse`] -/// and can write a value with [`FloatFormat::fmt`]. +/// This is used by `seq`. It can be constructed with [`Format::parse`] +/// and can write a value with [`Format::fmt`]. /// /// It can only accept a single specification without any asterisk parameters. /// If it does get more specifications, it will return an error. @@ -276,7 +287,7 @@ impl Format { } let Some(spec) = spec else { - return Err(FormatError::SpecError); + return Err(FormatError::NeedAtLeastOneSpec); }; let formatter = F::try_from_spec(spec)?; @@ -285,7 +296,7 @@ impl Format { for item in &mut iter { match item? { FormatItem::Spec(_) => { - return Err(FormatError::SpecError); + return Err(FormatError::TooManySpecs); } FormatItem::Char(c) => suffix.push(c), } diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 49edecce085..c9a2b8c166f 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -97,19 +97,19 @@ impl Formatter for SignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -151,7 +151,7 @@ impl Formatter for UnsignedInt { }; if self.precision > s.len() { - s = format!("{:0width$}", s, width = self.precision) + s = format!("{:0width$}", s, width = self.precision); } match self.alignment { @@ -169,19 +169,19 @@ impl Formatter for UnsignedInt { alignment, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -212,7 +212,7 @@ impl Default for Float { width: 0, positive_sign: PositiveSign::None, alignment: NumberAlignment::Left, - precision: 2, + precision: 6, } } } @@ -229,19 +229,23 @@ impl Formatter for Float { }?; } - let s = match self.variant { - FloatVariant::Decimal => { - format_float_decimal(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Scientific => { - format_float_scientific(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Shortest => { - format_float_shortest(x, self.precision, self.case, self.force_decimal) - } - FloatVariant::Hexadecimal => { - format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) + let s = if x.is_finite() { + match self.variant { + FloatVariant::Decimal => { + format_float_decimal(x, self.precision, self.force_decimal) + } + FloatVariant::Scientific => { + format_float_scientific(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Shortest => { + format_float_shortest(x, self.precision, self.case, self.force_decimal) + } + FloatVariant::Hexadecimal => { + format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) + } } + } else { + format_float_non_finite(x, self.case) }; match self.alignment { @@ -265,19 +269,19 @@ impl Formatter for Float { precision, } = s else { - return Err(FormatError::SpecError); + return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), + Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -292,25 +296,16 @@ impl Formatter for Float { } } -fn format_float_nonfinite(f: f64, case: Case) -> String { +fn format_float_non_finite(f: f64, case: Case) -> String { debug_assert!(!f.is_finite()); let mut s = format!("{f}"); if case == Case::Uppercase { s.make_ascii_uppercase(); } - return s; + s } -fn format_float_decimal( - f: f64, - precision: usize, - case: Case, - force_decimal: ForceDecimal, -) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - +fn format_float_decimal(f: f64, precision: usize, force_decimal: ForceDecimal) -> String { if precision == 0 && force_decimal == ForceDecimal::Yes { format!("{f:.0}.") } else { @@ -324,11 +319,6 @@ fn format_float_scientific( case: Case, force_decimal: ForceDecimal, ) -> String { - // If the float is NaN, -Nan, Inf or -Inf, format like any other float - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - if f == 0.0 { return if force_decimal == ForceDecimal::Yes && precision == 0 { "0.e+00".into() @@ -337,13 +327,13 @@ fn format_float_scientific( }; } - let mut exponent: i32 = f.log10().floor() as i32; let mut normalized = f / 10.0_f64.powi(exponent); // If the normalized value will be rounded to a value greater than 10 // we need to correct. - if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 + { normalized /= 10.0; exponent += 1; } @@ -371,11 +361,6 @@ fn format_float_shortest( case: Case, force_decimal: ForceDecimal, ) -> String { - // If the float is NaN, -Nan, Inf or -Inf, format like any other float - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - // Precision here is about how many digits should be displayed // instead of how many digits for the fractional part, this means that if // we pass this to rust's format string, it's always gonna be one less. @@ -398,7 +383,9 @@ fn format_float_shortest( // If the normalized value will be rounded to a value greater than 10 // we need to correct. - if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { + if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) + >= 10.0 + { normalized /= 10.0; exponent += 1; } @@ -412,12 +399,7 @@ fn format_float_shortest( let mut normalized = format!("{normalized:.*}", precision); if force_decimal == ForceDecimal::No { - while normalized.ends_with('0') { - normalized.pop(); - } - if normalized.ends_with('.') { - normalized.pop(); - } + strip_zeros_and_dot(&mut normalized); } let exp_char = match case { @@ -439,12 +421,7 @@ fn format_float_shortest( }; if force_decimal == ForceDecimal::No { - while formatted.ends_with('0') { - formatted.pop(); - } - if formatted.ends_with('.') { - formatted.pop(); - } + strip_zeros_and_dot(&mut formatted); } formatted @@ -457,10 +434,6 @@ fn format_float_hexadecimal( case: Case, force_decimal: ForceDecimal, ) -> String { - if !f.is_finite() { - return format_float_nonfinite(f, case); - } - let (first_digit, mantissa, exponent) = if f == 0.0 { (0, 0, 0) } else { @@ -481,7 +454,16 @@ fn format_float_hexadecimal( s.make_ascii_uppercase(); } - return s; + s +} + +fn strip_zeros_and_dot(s: &mut String) { + while s.ends_with('0') { + s.pop(); + } + if s.ends_with('.') { + s.pop(); + } } #[cfg(test)] @@ -491,7 +473,7 @@ mod test { #[test] fn decimal_float() { use super::format_float_decimal; - let f = |x| format_float_decimal(x, 6, Case::Lowercase, ForceDecimal::No); + let f = |x| format_float_decimal(x, 6, ForceDecimal::No); assert_eq!(f(0.0), "0.000000"); assert_eq!(f(1.0), "1.000000"); assert_eq!(f(100.0), "100.000000"); @@ -576,7 +558,7 @@ mod test { assert_eq!(f(12.3456789), "1e+01"); assert_eq!(f(1000000.0), "1e+06"); assert_eq!(f(99999999.0), "1e+08"); - + let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.0), "0."); assert_eq!(f(1.0), "1."); diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 23c68c06669..e74b6f8660f 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -1,4 +1,6 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety intmax ptrdiff + +use crate::quoting_style::{escape_name, QuotingStyle}; use super::{ num_format::{ @@ -16,11 +18,12 @@ pub enum Spec { align_left: bool, }, String { - width: Option>, precision: Option>, - parse_escape: bool, + width: Option>, align_left: bool, }, + EscapedString, + QuotedString, SignedInt { width: Option>, precision: Option>, @@ -76,12 +79,14 @@ enum Length { } impl Spec { - pub fn parse(rest: &mut &[u8]) -> Option { + pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { // Based on the C++ reference, the spec format looks like: // // %[flags][width][.precision][length]specifier // // However, we have already parsed the '%'. + let mut index = 0; + let start = *rest; let mut minus = false; let mut plus = false; @@ -89,111 +94,101 @@ impl Spec { let mut hash = false; let mut zero = false; - while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) { + while let Some(x) = rest.get(index) { match x { b'-' => minus = true, b'+' => plus = true, b' ' => space = true, b'#' => hash = true, b'0' => zero = true, - _ => unreachable!(), + _ => break, } - *rest = &rest[1..] + index += 1; } - let width = eat_asterisk_or_number(rest); + let alignment = match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }; - let precision = if let Some(b'.') = rest.get(0) { - *rest = &rest[1..]; - Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0))) + let positive_sign = match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }; + + let width = eat_asterisk_or_number(rest, &mut index); + + let precision = if let Some(b'.') = rest.get(index) { + index += 1; + Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0))) } else { None }; - // Parse 0..N length options, keep the last one - // Even though it is just ignored. We might want to use it later and we - // should parse those characters. - // - // TODO: This needs to be configurable: `seq` accepts only one length - // param - let mut _length = None; - loop { - let new_length = rest.get(0).and_then(|c| { - Some(match c { - b'h' => { - if let Some(b'h') = rest.get(1) { - *rest = &rest[1..]; - Length::Char - } else { - Length::Short - } - } - b'l' => { - if let Some(b'l') = rest.get(1) { - *rest = &rest[1..]; - Length::Long - } else { - Length::LongLong - } - } - b'j' => Length::IntMaxT, - b'z' => Length::SizeT, - b't' => Length::PtfDiffT, - b'L' => Length::LongDouble, - _ => return None, - }) - }); - if new_length.is_some() { - *rest = &rest[1..]; - _length = new_length; - } else { - break; - } - } + // We ignore the length. It's not really relevant to printf + let _ = Self::parse_length(rest, &mut index); - let type_spec = rest.get(0)?; - *rest = &rest[1..]; - Some(match type_spec { - b'c' => Spec::Char { - width, - align_left: minus, - }, - b's' => Spec::String { - width, - precision, - parse_escape: false, - align_left: minus, - }, - b'b' => Spec::String { - width, - precision, - parse_escape: true, - align_left: minus, - }, - b'd' | b'i' => Spec::SignedInt { - width, - precision, - alignment: match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }, - positive_sign: match (plus, space) { - (true, _) => PositiveSign::Plus, - (false, true) => PositiveSign::Space, - (false, false) => PositiveSign::None, - }, - }, + let Some(type_spec) = rest.get(index) else { + return Err(&start[..index]); + }; + index += 1; + *rest = &start[index..]; + + Ok(match type_spec { + // GNU accepts minus, plus and space even though they are not used + b'c' => { + if hash || precision.is_some() { + return Err(&start[..index]); + } + Self::Char { + width, + align_left: minus, + } + } + b's' => { + if hash { + return Err(&start[..index]); + } + Self::String { + precision, + width, + align_left: minus, + } + } + b'b' => { + if hash || minus || plus || space || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::EscapedString + } + b'q' => { + if hash || minus || plus || space || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::QuotedString + } + b'd' | b'i' => { + if hash { + return Err(&start[..index]); + } + Self::SignedInt { + width, + precision, + alignment, + positive_sign, + } + } c @ (b'u' | b'o' | b'x' | b'X') => { + // Normal unsigned integer cannot have a prefix + if *c == b'u' && hash { + return Err(&start[..index]); + } let prefix = match hash { false => Prefix::No, true => Prefix::Yes, }; - let alignment = match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }; let variant = match c { b'u' => UnsignedIntVariant::Decimal, b'o' => UnsignedIntVariant::Octal(prefix), @@ -201,14 +196,14 @@ impl Spec { b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), _ => unreachable!(), }; - Spec::UnsignedInt { + Self::UnsignedInt { variant, precision, width, alignment, } } - c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float { + c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float { width, precision, variant: match c { @@ -226,115 +221,157 @@ impl Spec { false => Case::Lowercase, true => Case::Uppercase, }, - alignment: match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }, - positive_sign: match (plus, space) { - (true, _) => PositiveSign::Plus, - (false, true) => PositiveSign::Space, - (false, false) => PositiveSign::None, - }, + alignment, + positive_sign, }, - _ => return None, + _ => return Err(&start[..index]), }) } + fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option { + // Parse 0..N length options, keep the last one + // Even though it is just ignored. We might want to use it later and we + // should parse those characters. + // + // TODO: This needs to be configurable: `seq` accepts only one length + // param + let mut length = None; + loop { + let new_length = rest.get(*index).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(*index + 1) { + *index += 1; + Length::Char + } else { + Length::Short + } + } + b'l' => { + if let Some(b'l') = rest.get(*index + 1) { + *index += 1; + Length::Long + } else { + Length::LongLong + } + } + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + if new_length.is_some() { + *index += 1; + length = new_length; + } else { + break; + } + } + length + } + pub fn write<'a>( &self, - writer: impl Write, + mut writer: impl Write, mut args: impl ArgumentIter<'a>, ) -> Result<(), FormatError> { match self { - &Spec::Char { width, align_left } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - write_padded(writer, args.get_char(), width, false, align_left) + Self::Char { width, align_left } => { + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + write_padded(writer, args.get_char(), width, false, *align_left) } - &Spec::String { + Self::String { width, - precision, - parse_escape, align_left, + precision, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?; + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + + // GNU does do this truncation on a byte level, see for instance: + // printf "%.1s" 🙃 + // > � + // For now, we let printf panic when we truncate within a code point. + // TODO: We need to not use Rust's formatting for aligning the output, + // so that we can just write bytes to stdout without panicking. + let precision = resolve_asterisk(*precision, &mut args)?; let s = args.get_str(); - if parse_escape { - let mut parsed = Vec::new(); - for c in parse_escape_only(s.as_bytes()) { - match c.write(&mut parsed)? { - ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => { - // TODO: This should break the _entire execution_ of printf - break; - } - }; - } - // GNU does do this truncation on a byte level, see for instance: - // printf "%.1s" 🙃 - // > � - // For now, we let printf panic when we truncate within a code point. - // TODO: We need to not use Rust's formatting for aligning the output, - // so that we can just write bytes to stdout without panicking. - let truncated = match precision { - Some(p) if p < parsed.len() => &parsed[..p], - _ => &parsed, - }; - write_padded( - writer, - std::str::from_utf8(&truncated).expect("TODO: Accept invalid utf8"), - width, - false, - align_left, - ) - } else { - let truncated = match precision { - Some(p) if p < s.len() => &s[..p], - _ => s, + let truncated = match precision { + Some(p) if p < s.len() => &s[..p], + _ => s, + }; + write_padded(writer, truncated, width, false, *align_left) + } + Self::EscapedString => { + let s = args.get_str(); + let mut parsed = Vec::new(); + for c in parse_escape_only(s.as_bytes()) { + match c.write(&mut parsed)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => { + // TODO: This should break the _entire execution_ of printf + break; + } }; - write_padded(writer, truncated, width, false, align_left) } + writer.write_all(&parsed).map_err(FormatError::IoError) + } + Self::QuotedString => { + let s = args.get_str(); + writer + .write_all( + escape_name( + s.as_ref(), + &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }, + ) + .as_bytes(), + ) + .map_err(FormatError::IoError) } - &Spec::SignedInt { + Self::SignedInt { width, precision, positive_sign, alignment, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); let i = args.get_i64(); num_format::SignedInt { width, precision, - positive_sign, - alignment, + positive_sign: *positive_sign, + alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } - &Spec::UnsignedInt { + Self::UnsignedInt { variant, width, precision, alignment, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(0); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); let i = args.get_u64(); num_format::UnsignedInt { - variant, + variant: *variant, precision, width, - alignment, + alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } - &Spec::Float { + Self::Float { variant, case, force_decimal, @@ -343,18 +380,18 @@ impl Spec { alignment, precision, } => { - let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); - let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); + let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6); let f = args.get_f64(); num_format::Float { - variant, - case, - force_decimal, width, - positive_sign, - alignment, precision, + variant: *variant, + case: *case, + force_decimal: *force_decimal, + positive_sign: *positive_sign, + alignment: *alignment, } .fmt(writer, f) .map_err(FormatError::IoError) @@ -390,23 +427,26 @@ fn write_padded( .map_err(FormatError::IoError) } -fn eat_asterisk_or_number(rest: &mut &[u8]) -> Option> { - if let Some(b'*') = rest.get(0) { - *rest = &rest[1..]; +fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option> { + if let Some(b'*') = rest.get(*index) { + *index += 1; Some(CanAsterisk::Asterisk) } else { - eat_number(rest).map(CanAsterisk::Fixed) + eat_number(rest, index).map(CanAsterisk::Fixed) } } -fn eat_number(rest: &mut &[u8]) -> Option { - match rest.iter().position(|b| !b.is_ascii_digit()) { +fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { + match rest[*index..].iter().position(|b| !b.is_ascii_digit()) { None | Some(0) => None, Some(i) => { // TODO: This might need to handle errors better // For example in case of overflow. - let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap(); - *rest = &rest[i..]; + let parsed = std::str::from_utf8(&rest[*index..(*index + i)]) + .unwrap() + .parse() + .unwrap(); + *index += i; Some(parsed) } } From 131c310bcbd0c437f55ca0b45677b3d66be8c4ce Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 21 Nov 2023 08:49:19 +0100 Subject: [PATCH 049/429] Cargo.toml: default_features -> default-features --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2f3af2c83d1..ba701b2d5e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -330,7 +330,7 @@ walkdir = "2.4" winapi-util = "0.1.6" windows-sys = { version = "0.48.0", default-features = false } xattr = "1.0.1" -zip = { version = "0.6.6", default_features = false, features = ["deflate"] } +zip = { version = "0.6.6", default-features = false, features = ["deflate"] } hex = "0.4.3" md-5 = "0.10.6" From d2ede927367d87a79901cf45e4fee2dfa1c8f7dc Mon Sep 17 00:00:00 2001 From: ALXD Date: Mon, 20 Nov 2023 14:19:20 +0100 Subject: [PATCH 050/429] expr: make error messages align with GNU expr --- src/uu/expr/src/syntax_tree.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c55fb0bdc6a..b19c13c0adf 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -13,6 +13,7 @@ use num_bigint::BigInt; use num_traits::Zero; use onig::{Regex, RegexOptions, Syntax}; +use uucore::display::Quotable; use crate::tokens::Token; @@ -214,7 +215,7 @@ pub fn tokens_to_ast( assert!(op_stack.is_empty()); maybe_dump_rpn(&out_stack); - let result = ast_from_rpn(&mut out_stack); + let result = ast_from_rpn(&mut out_stack, None); if out_stack.is_empty() { maybe_dump_ast(&result); result @@ -253,9 +254,12 @@ fn maybe_dump_rpn(rpn: &TokenStack) { } } -fn ast_from_rpn(rpn: &mut TokenStack) -> Result, String> { +fn ast_from_rpn(rpn: &mut TokenStack, op_type: Option<&str>) -> Result, String> { match rpn.pop() { - None => Err("syntax error (premature end of expression)".to_owned()), + None => Err(match op_type { + Some(value) => format!("syntax error: unexpected argument {}", value.quote()), + None => "missing operand".to_owned(), + }), Some((token_idx, Token::Value { value })) => Ok(AstNode::new_leaf(token_idx, &value)), @@ -281,7 +285,7 @@ fn maybe_ast_node( ) -> Result, String> { let mut operands = Vec::with_capacity(arity); for _ in 0..arity { - let operand = ast_from_rpn(rpn)?; + let operand = ast_from_rpn(rpn, Some(op_type))?; operands.push(operand); } operands.reverse(); From 8b650a7a9b5c6356425c37fa0ef49c28c49aea76 Mon Sep 17 00:00:00 2001 From: ALXD Date: Mon, 20 Nov 2023 14:29:06 +0100 Subject: [PATCH 051/429] expr: add tests for precise error messages --- tests/by-util/test_expr.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 28cfcf0ec90..18125fe99dd 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -6,6 +6,14 @@ use crate::common::util::TestScenario; +#[test] +fn test_no_arguments() { + new_ucmd!() + .fails() + .code_is(2) + .stderr_only("expr: missing operand\n"); +} + #[test] fn test_simple_values() { // null or 0 => EXIT_VALUE == 1 @@ -275,6 +283,12 @@ fn test_substr() { #[test] fn test_invalid_substr() { + new_ucmd!() + .args(&["56", "substr"]) + .fails() + .code_is(2) + .stderr_only("expr: syntax error: unexpected argument 'substr'\n"); + new_ucmd!() .args(&["substr", "abc", "0", "1"]) .fails() From a0ac3dd22975ceadfb42498d62b216cc264f0a86 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 21 Nov 2023 12:38:12 +0100 Subject: [PATCH 052/429] fuzz printf (#5556) Co-authored-by: Daniel Hofstetter --- .github/workflows/fuzzing.yml | 1 + fuzz/Cargo.toml | 7 ++ fuzz/fuzz_targets/fuzz_printf.rs | 110 +++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 fuzz/fuzz_targets/fuzz_printf.rs diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index cbb0574dee2..2274f6905c1 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -41,6 +41,7 @@ jobs: # https://github.com/uutils/coreutils/issues/5311 - { name: fuzz_date, should_pass: false } - { name: fuzz_expr, should_pass: true } + - { name: fuzz_printf, should_pass: false } - { name: fuzz_parse_glob, should_pass: true } - { name: fuzz_parse_size, should_pass: true } - { name: fuzz_parse_time, should_pass: true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 549f9a6b762..630af4650b6 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,7 @@ uucore = { path = "../src/uucore/" } uu_date = { path = "../src/uu/date/" } uu_test = { path = "../src/uu/test/" } uu_expr = { path = "../src/uu/expr/" } +uu_printf = { path = "../src/uu/printf/" } # Prevent this from interfering with workspaces @@ -28,6 +29,12 @@ path = "fuzz_targets/fuzz_date.rs" test = false doc = false +[[bin]] +name = "fuzz_printf" +path = "fuzz_targets/fuzz_printf.rs" +test = false +doc = false + [[bin]] name = "fuzz_expr" path = "fuzz_targets/fuzz_expr.rs" diff --git a/fuzz/fuzz_targets/fuzz_printf.rs b/fuzz/fuzz_targets/fuzz_printf.rs new file mode 100644 index 00000000000..78bb3e3ce3b --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_printf.rs @@ -0,0 +1,110 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore parens + +#![no_main] +use libfuzzer_sys::fuzz_target; +use uu_printf::uumain; + +use rand::seq::SliceRandom; +use rand::Rng; +use std::ffi::OsString; + +mod fuzz_common; +use crate::fuzz_common::CommandResult; +use crate::fuzz_common::{ + compare_result, generate_and_run_uumain, generate_random_string, run_gnu_cmd, +}; + +static CMD_PATH: &str = "printf"; + +fn generate_escape_sequence(rng: &mut impl Rng) -> String { + let escape_sequences = [ + "\\\"", + "\\\\", + "\\a", + "\\b", + "\\c", + "\\e", + "\\f", + "\\n", + "\\r", + "\\t", + "\\v", + "\\000", + "\\x00", + "\\u0000", + "\\U00000000", + "%%", + ]; + escape_sequences.choose(rng).unwrap().to_string() +} + +fn generate_printf() -> String { + let mut rng = rand::thread_rng(); + let format_specifiers = ["%s", "%d", "%f", "%x", "%o", "%c", "%b", "%q"]; + let mut printf_str = String::new(); + // Add a 20% chance of generating an invalid format specifier + if rng.gen_bool(0.2) { + printf_str.push_str("%z"); // Invalid format specifier + } else { + let specifier = *format_specifiers.choose(&mut rng).unwrap(); + printf_str.push_str(specifier); + + // Add a 20% chance of introducing complex format strings + if rng.gen_bool(0.2) { + printf_str.push_str(&format!(" %{}", rng.gen_range(1..=1000))); + } else { + // Add a random string or number after the specifier + if specifier == "%s" { + printf_str.push_str(&format!( + " {}", + generate_random_string(rng.gen_range(1..=10)) + )); + } else { + printf_str.push_str(&format!(" {}", rng.gen_range(1..=1000))); + } + } + } + + // Add a 10% chance of including an escape sequence + if rng.gen_bool(0.1) { + printf_str.push_str(&generate_escape_sequence(&mut rng)); + } + printf_str +} + +fuzz_target!(|_data: &[u8]| { + let printf_input = generate_printf(); + let mut args = vec![OsString::from("printf")]; + args.extend(printf_input.split_whitespace().map(OsString::from)); + let rust_result = generate_and_run_uumain(&args, uumain); + + let gnu_result = match run_gnu_cmd(CMD_PATH, &args[1..], false) { + Ok(result) => result, + Err(error_result) => { + eprintln!("Failed to run GNU command:"); + eprintln!("Stderr: {}", error_result.stderr); + eprintln!("Exit Code: {}", error_result.exit_code); + CommandResult { + stdout: String::new(), + stderr: error_result.stderr, + exit_code: error_result.exit_code, + } + } + }; + + compare_result( + "printf", + &format!("{:?}", &args[1..]), + &rust_result.stdout, + &gnu_result.stdout, + &rust_result.stderr, + &gnu_result.stderr, + rust_result.exit_code, + gnu_result.exit_code, + false, // Set to true if you want to fail on stderr diff + ); +}); From da7c168f9d7c27e09c582a99e6dba9c68955c3ec Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 20 Nov 2023 17:46:47 +0100 Subject: [PATCH 053/429] all: reduce imports needed for show and show_if_err macros --- src/uu/cut/src/cut.rs | 2 +- src/uu/dd/src/dd.rs | 4 ++-- src/uu/head/src/head.rs | 2 +- src/uu/mkdir/src/mkdir.rs | 2 +- src/uu/mv/src/mv.rs | 2 +- src/uu/numfmt/src/numfmt.rs | 2 +- src/uu/realpath/src/realpath.rs | 4 ++-- src/uu/shred/src/shred.rs | 2 +- src/uu/tail/src/tail.rs | 2 +- src/uu/touch/src/touch.rs | 2 +- src/uucore/src/lib/macros.rs | 3 ++- 11 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 05e8bc6e424..1a2a8ea01e9 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -17,7 +17,7 @@ use uucore::line_ending::LineEnding; use self::searcher::Searcher; use matcher::{ExactMatcher, Matcher, WhitespaceMatcher}; use uucore::ranges::Range; -use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_if_err}; +use uucore::{format_usage, help_about, help_section, help_usage, show_error, show_if_err}; mod matcher; mod searcher; diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index b79ae22da4e..2472fba9b0b 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -52,9 +52,9 @@ use uucore::display::Quotable; #[cfg(unix)] use uucore::error::set_exit_code; use uucore::error::{FromIo, UResult}; -use uucore::{format_usage, help_about, help_section, help_usage, show_error}; #[cfg(target_os = "linux")] -use uucore::{show, show_if_err}; +use uucore::show_if_err; +use uucore::{format_usage, help_about, help_section, help_usage, show_error}; const ABOUT: &str = help_about!("dd.md"); const AFTER_HELP: &str = help_section!("after help", "dd.md"); diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 5d0d3beddc9..83bdd37390a 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -9,7 +9,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write}; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::error::{FromIo, UResult, USimpleError}; use uucore::line_ending::LineEnding; use uucore::lines::lines; use uucore::{format_usage, help_about, help_usage, show}; diff --git a/src/uu/mkdir/src/mkdir.rs b/src/uu/mkdir/src/mkdir.rs index 4121278b69a..c29905ef4a7 100644 --- a/src/uu/mkdir/src/mkdir.rs +++ b/src/uu/mkdir/src/mkdir.rs @@ -16,7 +16,7 @@ use uucore::error::{UResult, USimpleError}; #[cfg(not(windows))] use uucore::mode; use uucore::{display::Quotable, fs::dir_strip_dot_for_creation}; -use uucore::{format_usage, help_about, help_section, help_usage, show, show_if_err}; +use uucore::{format_usage, help_about, help_section, help_usage, show_if_err}; static DEFAULT_PERM: u32 = 0o777; diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 036024f9907..60dabf34582 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -21,7 +21,7 @@ use std::os::windows; use std::path::{Path, PathBuf}; use uucore::backup_control::{self, source_is_target_backup}; use uucore::display::Quotable; -use uucore::error::{set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError}; +use uucore::error::{set_exit_code, FromIo, UResult, USimpleError, UUsageError}; use uucore::fs::{are_hardlinks_or_one_way_symlink_to_same_file, are_hardlinks_to_same_file}; use uucore::update_control; // These are exposed for projects (e.g. nushell) that want to create an `Options` value, which diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index d1785209d06..d158072fbb4 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -13,7 +13,7 @@ use std::str::FromStr; use units::{IEC_BASES, SI_BASES}; use uucore::display::Quotable; -use uucore::error::{UError, UResult}; +use uucore::error::UResult; use uucore::ranges::Range; use uucore::{format_usage, help_about, help_section, help_usage, show, show_error}; diff --git a/src/uu/realpath/src/realpath.rs b/src/uu/realpath/src/realpath.rs index 64806fbabf6..b099a5f377c 100644 --- a/src/uu/realpath/src/realpath.rs +++ b/src/uu/realpath/src/realpath.rs @@ -15,13 +15,13 @@ use std::{ use uucore::fs::make_path_relative_to; use uucore::{ display::{print_verbatim, Quotable}, - error::{FromIo, UResult}, + error::{FromIo, UClapError, UResult}, format_usage, fs::{canonicalize, MissingHandling, ResolveMode}, help_about, help_usage, line_ending::LineEnding, + show_if_err, }; -use uucore::{error::UClapError, show, show_if_err}; static ABOUT: &str = help_about!("realpath.md"); const USAGE: &str = help_usage!("realpath.md"); diff --git a/src/uu/shred/src/shred.rs b/src/uu/shred/src/shred.rs index eb63f0e5f2e..04f81bf23fd 100644 --- a/src/uu/shred/src/shred.rs +++ b/src/uu/shred/src/shred.rs @@ -17,7 +17,7 @@ use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::parse_size::parse_size_u64; -use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_if_err}; +use uucore::{format_usage, help_about, help_section, help_usage, show_error, show_if_err}; const ABOUT: &str = help_about!("shred.md"); const USAGE: &str = help_usage!("shred.md"); diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 0488e0808ed..edac4b151cb 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -31,7 +31,7 @@ use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; -use uucore::error::{get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError}; +use uucore::error::{get_exit_code, set_exit_code, FromIo, UResult, USimpleError}; use uucore::{show, show_error}; #[uucore::main] diff --git a/src/uu/touch/src/touch.rs b/src/uu/touch/src/touch.rs index d9399a051f6..51c09801b0a 100644 --- a/src/uu/touch/src/touch.rs +++ b/src/uu/touch/src/touch.rs @@ -17,7 +17,7 @@ use std::ffi::OsString; use std::fs::{self, File}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::error::{FromIo, UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show}; const ABOUT: &str = help_about!("touch.md"); diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index ad86d530828..d1a09c281ab 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -91,6 +91,7 @@ pub static UTILITY_IS_SECOND_ARG: AtomicBool = AtomicBool::new(false); #[macro_export] macro_rules! show( ($err:expr) => ({ + use $crate::error::UError; let e = $err; $crate::error::set_exit_code(e.code()); eprintln!("{}: {}", $crate::util_name(), e); @@ -131,7 +132,7 @@ macro_rules! show( macro_rules! show_if_err( ($res:expr) => ({ if let Err(e) = $res { - show!(e); + $crate::show!(e); } }) ); From 17d21d2d9c7c421202cd692e427ab13bfd01bf60 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Tue, 21 Nov 2023 22:05:41 +0800 Subject: [PATCH 054/429] expr: check prefix operation --- src/uu/expr/src/syntax_tree.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c55fb0bdc6a..119f325a3d1 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -332,8 +332,12 @@ fn push_token_to_either_stack( } Token::PrefixOp { .. } | Token::ParOpen => { - op_stack.push((token_idx, token.clone())); - Ok(()) + if out_stack.is_empty() { + op_stack.push((token_idx, token.clone())); + Ok(()) + } else { + Err(String::from("syntax error (operation should be prefix)")) + } } Token::ParClose => move_till_match_paren(out_stack, op_stack), From 44702940d2437a4427638f94870f57cde5d5f8c0 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Tue, 21 Nov 2023 22:06:20 +0800 Subject: [PATCH 055/429] tests/expr: check prefix operation --- tests/by-util/test_expr.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 28cfcf0ec90..8582082c483 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -100,6 +100,11 @@ fn test_parenthesis() { .args(&["(", "1", "+", "1", ")", "*", "2"]) .succeeds() .stdout_only("4\n"); + + new_ucmd!() + .args(&["1", "(", ")"]) + .fails() + .stderr_only("expr: syntax error (operation should be prefix)\n"); } #[test] @@ -221,6 +226,11 @@ fn test_index() { .args(&["index", "αbcdef_f", "f"]) .succeeds() .stdout_only("6\n"); + + new_ucmd!() + .args(&["αbcdef", "index", "α"]) + .fails() + .stderr_only("expr: syntax error (operation should be prefix)\n"); } #[test] @@ -234,6 +244,11 @@ fn test_length() { .args(&["length", "abcdef"]) .succeeds() .stdout_only("6\n"); + + new_ucmd!() + .args(&["abcdef", "length"]) + .fails() + .stderr_only("expr: syntax error (operation should be prefix)\n"); } #[test] @@ -271,6 +286,11 @@ fn test_substr() { .args(&["substr", "abc", "1", "1"]) .succeeds() .stdout_only("a\n"); + + new_ucmd!() + .args(&["abc", "substr", "1", "1"]) + .fails() + .stderr_only("expr: syntax error (operation should be prefix)\n"); } #[test] From 0822511fdcd00d95554601983c2d2e72becaa2ce Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 21 Nov 2023 16:49:20 +0100 Subject: [PATCH 056/429] test/printf: ignoring rounding up to 2 This is a limitation of the current implementation, which should ultimately use "long double" precision instead of f64. --- tests/by-util/test_printf.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 875896a9f02..dfd13159043 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -296,9 +296,18 @@ fn sub_num_float_e_no_round() { } #[test] -fn sub_num_float_round() { +fn sub_num_float_round_to_one() { new_ucmd!() - .args(&["two is %f", "1.9999996"]) + .args(&["one is %f", "0.9999995"]) + .succeeds() + .stdout_only("one is 1.000000"); +} + +#[test] +#[ignore = "Requires 'long double' precision floats to be used internally"] +fn sub_num_float_round_to_two() { + new_ucmd!() + .args(&["two is %f", "1.9999995"]) .succeeds() .stdout_only("two is 2.000000"); } From 18b5c22567f76fdafd138d31bf1dcaf3c3cce7c2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 22:24:11 +0000 Subject: [PATCH 057/429] fix(deps): update rust crate data-encoding to 2.5 --- Cargo.lock | 4 ++-- src/uucore/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c89c87df41c..3cf5dbcc551 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -690,9 +690,9 @@ checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" [[package]] name = "data-encoding" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" +checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" [[package]] name = "data-encoding-macro" diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 370c8a3864c..cbe34d5f7c8 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -33,7 +33,7 @@ time = { workspace = true, optional = true, features = [ "macros", ] } # * "problem" dependencies (pinned) -data-encoding = { version = "2.4", optional = true } +data-encoding = { version = "2.5", optional = true } data-encoding-macro = { version = "0.1.13", optional = true } z85 = { version = "3.0.5", optional = true } libc = { workspace = true, optional = true } From 3425ee8d5957e0ad75d9b7fad873ca56ac3d2721 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 22 Nov 2023 06:09:44 +0000 Subject: [PATCH 058/429] fix(deps): update rust crate data-encoding-macro to 0.1.14 --- Cargo.lock | 8 ++++---- src/uucore/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cf5dbcc551..d7682f839e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -696,9 +696,9 @@ checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" [[package]] name = "data-encoding-macro" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c904b33cc60130e1aeea4956ab803d08a3f4a0ca82d64ed757afac3891f2bb99" +checksum = "20c01c06f5f429efdf2bae21eb67c28b3df3cf85b7dd2d8ef09c0838dac5d33e" dependencies = [ "data-encoding", "data-encoding-macro-internal", @@ -706,9 +706,9 @@ dependencies = [ [[package]] name = "data-encoding-macro-internal" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fdf3fce3ce863539ec1d7fd1b6dcc3c645663376b43ed376bbf887733e4f772" +checksum = "0047d07f2c89b17dd631c80450d69841a6b5d7fb17278cbc43d7e4cfcf2576f3" dependencies = [ "data-encoding", "syn 1.0.109", diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index cbe34d5f7c8..fabf068bb4d 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -34,7 +34,7 @@ time = { workspace = true, optional = true, features = [ ] } # * "problem" dependencies (pinned) data-encoding = { version = "2.5", optional = true } -data-encoding-macro = { version = "0.1.13", optional = true } +data-encoding-macro = { version = "0.1.14", optional = true } z85 = { version = "3.0.5", optional = true } libc = { workspace = true, optional = true } once_cell = { workspace = true } From e95add794066612aa007f2c7e49855ec30693c4d Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 22 Nov 2023 12:38:10 +0100 Subject: [PATCH 059/429] uucore/format: fix license headers and improve docs --- .../src/lib/features/format/argument.rs | 12 +++++ src/uucore/src/lib/features/format/escape.rs | 11 +++++ src/uucore/src/lib/features/format/mod.rs | 46 +++++++++++++------ .../src/lib/features/format/num_format.rs | 7 +++ src/uucore/src/lib/features/format/spec.rs | 11 ++++- 5 files changed, 71 insertions(+), 16 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 6370c4177e8..db18cf51890 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -1,7 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + use os_display::Quotable; use crate::{error::set_exit_code, show_warning}; +/// An argument for formatting +/// +/// Each of these variants is only accepted by their respective directives. For +/// example, [`FormatArgument::Char`] requires a `%c` directive. +/// +/// The [`FormatArgument::Unparsed`] variant contains a string that can be +/// parsed into other types. This is used by the `printf` utility. #[derive(Clone, Debug)] pub enum FormatArgument { Char(char), diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs index 188dd1892b5..d20da3e7e38 100644 --- a/src/uucore/src/lib/features/format/escape.rs +++ b/src/uucore/src/lib/features/format/escape.rs @@ -1,8 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Parsing of escape sequences + #[derive(Debug)] pub enum EscapedChar { + /// A single byte Byte(u8), + /// A unicode character Char(char), + /// A character prefixed with a backslash (i.e. an invalid escape sequence) Backslash(u8), + /// Specifies that the string should stop (`\c`) End, } diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 9045b8b90c3..d213d0359cf 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -1,23 +1,34 @@ -//! Main entry point for our implementation of printf. +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! `printf`-style formatting +//! +//! Rust has excellent formatting capabilities, but the coreutils require very +//! specific formatting that needs to work exactly like the GNU utilities. +//! Naturally, the GNU behavior is based on the C `printf` functionality. //! -//! The [`printf`] and [`sprintf`] closely match the behavior of the +//! Additionally, we need support for escape sequences for the `printf` utility. +//! +//! The [`printf`] and [`sprintf`] functions closely match the behavior of the //! corresponding C functions: the former renders a formatted string //! to stdout, the latter renders to a new [`String`] object. //! -//! In addition to the [`printf`] and [`sprintf`] functions, we expose the -//! [`Format`] struct, which represents a parsed format string. This reduces -//! the need for parsing a format string multiple times and assures that no -//! parsing errors occur during writing. -//! //! There are three kinds of parsing that we might want to do: //! -//! 1. Only `printf` specifiers (for e.g. `seq`, `dd`) -//! 2. Only escape sequences (for e.g. `echo`) -//! 3. Both `printf` specifiers and escape sequences (for e.g. `printf`) +//! 1. Parse only `printf` directives (for e.g. `seq`, `dd`) +//! 2. Parse only escape sequences (for e.g. `echo`) +//! 3. Parse both `printf` specifiers and escape sequences (for e.g. `printf`) //! -//! This module aims to combine all three use cases. - -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety +//! This module aims to combine all three use cases. An iterator parsing each +//! of these cases is provided by [`parse_escape_only`], [`parse_spec_only`] +//! and [`parse_spec_and_escape`], respectively. +//! +//! There is a special [`Format`] type, which can be used to parse a format +//! string containing exactly one directive and does not use any `*` in that +//! directive. This format can be printed in a type-safe manner without failing +//! (modulo IO errors). mod argument; mod escape; @@ -131,6 +142,7 @@ impl FormatItem { } } +/// Parse a format string containing % directives and escape sequences pub fn parse_spec_and_escape( fmt: &[u8], ) -> impl Iterator, FormatError>> + '_ { @@ -160,7 +172,10 @@ pub fn parse_spec_and_escape( }) } -fn parse_spec_only(fmt: &[u8]) -> impl Iterator, FormatError>> + '_ { +/// Parse a format string containing % directives +pub fn parse_spec_only( + fmt: &[u8], +) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => None, @@ -183,7 +198,8 @@ fn parse_spec_only(fmt: &[u8]) -> impl Iterator, Fo }) } -fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { +/// Parse a format string containing escape sequences +pub fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => None, diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index c9a2b8c166f..6fd177d1325 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -1,3 +1,10 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Utilities for formatting numbers in various formats + use std::io::Write; use super::{ diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index e74b6f8660f..7c0d0236764 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -1,4 +1,9 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety intmax ptrdiff +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (vars) intmax ptrdiff use crate::quoting_style::{escape_name, QuotingStyle}; @@ -11,6 +16,10 @@ use super::{ }; use std::{fmt::Display, io::Write, ops::ControlFlow}; +/// A parsed specification for formatting a value +/// +/// This might require more than one argument to resolve width or precision +/// values that are given as `*`. #[derive(Debug)] pub enum Spec { Char { From 2e77d99dd4258a853e172a96ed5349d6bd2e169b Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 23 Nov 2023 14:35:02 +0100 Subject: [PATCH 060/429] expr: fail fast if there are no operands --- src/uu/expr/src/expr.rs | 6 +++++- tests/by-util/test_expr.rs | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index ea559090c92..909c4c37653 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -5,7 +5,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use uucore::{ - error::{UResult, USimpleError}, + error::{UResult, USimpleError, UUsageError}, format_usage, help_about, help_section, help_usage, }; @@ -58,6 +58,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(|v| v.into_iter().map(|s| s.as_ref()).collect::>()) .unwrap_or_default(); + if token_strings.is_empty() { + return Err(UUsageError::new(2, "missing operand")); + } + match process_expr(&token_strings[..]) { Ok(expr_result) => print_expr_ok(&expr_result), Err(expr_error) => Err(USimpleError::new(2, &expr_error)), diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 72d7687b7d1..41fc8d4540b 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -11,7 +11,7 @@ fn test_no_arguments() { new_ucmd!() .fails() .code_is(2) - .stderr_only("expr: missing operand\n"); + .usage_error("missing operand"); } #[test] From c2bfb6a465aac1dce51cc04510820893fad0c1dd Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 23 Nov 2023 15:03:53 +0100 Subject: [PATCH 061/429] expr: adapt error messages, revert most of #5559 --- src/uu/expr/src/syntax_tree.rs | 28 ++++++++++++++++++---------- tests/by-util/test_expr.rs | 18 ++++++++---------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 89c23e41249..0654f2ac331 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -215,7 +215,7 @@ pub fn tokens_to_ast( assert!(op_stack.is_empty()); maybe_dump_rpn(&out_stack); - let result = ast_from_rpn(&mut out_stack, None); + let result = ast_from_rpn(&mut out_stack); if out_stack.is_empty() { maybe_dump_ast(&result); result @@ -254,13 +254,9 @@ fn maybe_dump_rpn(rpn: &TokenStack) { } } -fn ast_from_rpn(rpn: &mut TokenStack, op_type: Option<&str>) -> Result, String> { +fn ast_from_rpn(rpn: &mut TokenStack) -> Result, String> { match rpn.pop() { - None => Err(match op_type { - Some(value) => format!("syntax error: unexpected argument {}", value.quote()), - None => "missing operand".to_owned(), - }), - + None => Err("syntax error (premature end of expression)".to_owned()), Some((token_idx, Token::Value { value })) => Ok(AstNode::new_leaf(token_idx, &value)), Some((token_idx, Token::InfixOp { value, .. })) => { @@ -285,7 +281,7 @@ fn maybe_ast_node( ) -> Result, String> { let mut operands = Vec::with_capacity(arity); for _ in 0..arity { - let operand = ast_from_rpn(rpn, Some(op_type))?; + let operand = ast_from_rpn(rpn)?; operands.push(operand); } operands.reverse(); @@ -335,12 +331,24 @@ fn push_token_to_either_stack( } } - Token::PrefixOp { .. } | Token::ParOpen => { + Token::ParOpen => { if out_stack.is_empty() { op_stack.push((token_idx, token.clone())); Ok(()) } else { - Err(String::from("syntax error (operation should be prefix)")) + Err("syntax error: unexpected argument '('".to_string()) + } + } + + Token::PrefixOp { value, .. } => { + if out_stack.is_empty() { + op_stack.push((token_idx, token.clone())); + Ok(()) + } else { + Err(format!( + "syntax error: unexpected argument {}", + value.quote() + )) } } diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 41fc8d4540b..f29752f66c9 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -112,7 +112,8 @@ fn test_parenthesis() { new_ucmd!() .args(&["1", "(", ")"]) .fails() - .stderr_only("expr: syntax error (operation should be prefix)\n"); + .code_is(2) + .stderr_only("expr: syntax error: unexpected argument '('\n"); } #[test] @@ -238,7 +239,8 @@ fn test_index() { new_ucmd!() .args(&["αbcdef", "index", "α"]) .fails() - .stderr_only("expr: syntax error (operation should be prefix)\n"); + .code_is(2) + .stderr_only("expr: syntax error: unexpected argument 'index'\n"); } #[test] @@ -256,7 +258,8 @@ fn test_length() { new_ucmd!() .args(&["abcdef", "length"]) .fails() - .stderr_only("expr: syntax error (operation should be prefix)\n"); + .code_is(2) + .stderr_only("expr: syntax error: unexpected argument 'length'\n"); } #[test] @@ -298,17 +301,12 @@ fn test_substr() { new_ucmd!() .args(&["abc", "substr", "1", "1"]) .fails() - .stderr_only("expr: syntax error (operation should be prefix)\n"); + .code_is(2) + .stderr_only("expr: syntax error: unexpected argument 'substr'\n"); } #[test] fn test_invalid_substr() { - new_ucmd!() - .args(&["56", "substr"]) - .fails() - .code_is(2) - .stderr_only("expr: syntax error: unexpected argument 'substr'\n"); - new_ucmd!() .args(&["substr", "abc", "0", "1"]) .fails() From 7efe33108a8bebada31e962cfb8c0d5a70b07efe Mon Sep 17 00:00:00 2001 From: Coba Weel <122735+cobaweel@users.noreply.github.com> Date: Wed, 22 Nov 2023 16:15:59 -0800 Subject: [PATCH 062/429] Fix issue 5576 (regex matching bug in expr) Issue 5576 reported a bug in expr, found by the fuzzer. The problem turns out to be with the regex match operator `:`, which is defined in POSIX and the GNU manual to match the pattern only when it occurs at the beginning of the string, i.e., the regex has an implicit `^` prepended to it. We hadn't been doing that. --- src/uu/expr/src/syntax_tree.rs | 3 ++- tests/by-util/test_expr.rs | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 0654f2ac331..2260b2e2186 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -498,7 +498,8 @@ fn infix_operator_and(values: &[String]) -> String { fn operator_match(values: &[String]) -> Result { assert!(values.len() == 2); - let re = Regex::with_options(&values[1], RegexOptions::REGEX_OPTION_NONE, Syntax::grep()) + let re_string = format!("^{}", &values[1]); + let re = Regex::with_options(&re_string, RegexOptions::REGEX_OPTION_NONE, Syntax::grep()) .map_err(|err| err.description().to_string())?; Ok(if re.captures_len() > 0 { re.captures(&values[0]) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index f29752f66c9..ebc2c832feb 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -289,6 +289,10 @@ fn test_regex() { .args(&["-5", ":", "-\\{0,1\\}[0-9]*$"]) .succeeds() .stdout_only("2\n"); + new_ucmd!() + .args(&["abc", ":", "bc"]) + .fails() + .stdout_only("0\n"); } #[test] From 550f3b0c488451bbe9f930e0520d558f01e3c891 Mon Sep 17 00:00:00 2001 From: zoze0 Date: Fri, 24 Nov 2023 00:41:11 +0800 Subject: [PATCH 063/429] uucore: add support for loongarch64 (#5574) * uucore: add support for loongarch64 * add loongarch --------- Co-authored-by: Sylvestre Ledru --- .vscode/cspell.dictionaries/acronyms+names.wordlist.txt | 1 + src/uucore/src/lib/features/fs.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt index c004ea2f822..4a59ed094bd 100644 --- a/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt +++ b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt @@ -37,6 +37,7 @@ aarch flac impls lzma +loongarch # * names BusyBox diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index f8593dfede5..de4c0b08dbe 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -119,6 +119,7 @@ impl FileInformation { not(target_os = "solaris"), not(target_arch = "aarch64"), not(target_arch = "riscv64"), + not(target_arch = "loongarch64"), target_pointer_width = "64" ))] return self.0.st_nlink; @@ -133,6 +134,7 @@ impl FileInformation { target_os = "solaris", target_arch = "aarch64", target_arch = "riscv64", + target_arch = "loongarch64", not(target_pointer_width = "64") ) ))] From 4dc46f10e9f3636ad8ac68b579b73461b8035bee Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 21 Nov 2023 11:04:38 -0500 Subject: [PATCH 064/429] split: pass GNU test l-chunk --- src/uu/split/src/split.rs | 188 ++++++++++++++++++---------- src/uu/split/src/strategy.rs | 9 +- tests/by-util/test_split.rs | 233 +++++++++++++++++++---------------- 3 files changed, 259 insertions(+), 171 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 592e4eedde9..4e2af0be4d8 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1130,14 +1130,68 @@ impl<'a> Write for LineBytesChunkWriter<'a> { } } +/// Output file parameters +struct OutFile { + filename: String, + maybe_writer: Option>>, +} + +impl OutFile { + /// Get the writer for the output file + /// Instantiate the writer if it has not been instantiated upfront + fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter>> { + if self.maybe_writer.is_some() { + Ok(self.maybe_writer.as_mut().unwrap()) + } else { + // Writer was not instantiated upfront + // Instantiate it and record for future use + self.maybe_writer = Some(settings.instantiate_current_writer(self.filename.as_str())?); + Ok(self.maybe_writer.as_mut().unwrap()) + } + } +} + +/// Generate a set of Output Files +/// This is a helper function to [`n_chunks_by_byte`], [`n_chunks_by_line`] +/// and [`n_chunks_by_line_round_robin`]. +/// Each OutFile is generated with filename, while the writer for it could be +/// optional, to be instantiated later by the calling function as needed. +/// Optional writers could happen in [`n_chunks_by_line`] +/// if `elide_empty_files` parameter is set to `true`. +fn get_out_files( + num_files: u64, + settings: &Settings, + is_writer_optional: bool, +) -> UResult> { + // This object is responsible for creating the filename for each chunk + let mut filename_iterator: FilenameIterator<'_> = + FilenameIterator::new(&settings.prefix, &settings.suffix) + .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; + let mut out_files: Vec = Vec::new(); + for _ in 0..num_files { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let maybe_writer = if is_writer_optional { + None + } else { + Some(settings.instantiate_current_writer(filename.as_str())?) + }; + out_files.push(OutFile { + filename, + maybe_writer, + }); + } + Ok(out_files) +} + /// Split a file or STDIN into a specific number of chunks by byte. -/// If in Kth chunk of N mode - print the k-th chunk to STDOUT. /// /// When file size cannot be evenly divided into the number of chunks of the same size, /// the first X chunks are 1 byte longer than the rest, /// where X is a modulus reminder of (file size % number of chunks) /// -/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` +/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk` /// /// In N chunks mode - this function always creates one output file for each chunk, even /// if there is an error reading or writing one of the chunks or if @@ -1207,7 +1261,7 @@ where // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut writers = vec![]; + let mut out_files: Vec = Vec::new(); // Calculate chunk size base and modulo reminder // to be used in calculating chunk_size later on @@ -1219,16 +1273,7 @@ where // This will create each of the underlying files // or stdin pipes to child shell/command processes if in `--filter` mode if kth_chunk.is_none() { - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) - .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); - } + out_files = get_out_files(num_chunks, settings, false)?; } for i in 1_u64..=num_chunks { @@ -1272,7 +1317,7 @@ where } None => { let idx = (i - 1) as usize; - let writer = writers.get_mut(idx).unwrap(); + let writer = out_files[idx].get_writer(settings)?; writer.write_all(buf)?; } } @@ -1284,9 +1329,14 @@ where } /// Split a file or STDIN into a specific number of chunks by line. -/// If in Kth chunk of N mode - print the k-th chunk to STDOUT. /// -/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` +/// It is most likely that input cannot be evenly divided into the number of chunks +/// of the same size in bytes or number of lines, since we cannot break lines. +/// It is also likely that there could be empty files (having `elide_empty_files` is disabled) +/// when a long line overlaps one or more chunks. +/// +/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk` +/// Note: the `elide_empty_files` flag is ignored in this mode /// /// In N chunks mode - this function always creates one output file for each chunk, even /// if there is an error reading or writing one of the chunks or if @@ -1322,76 +1372,97 @@ where let initial_buf = &mut Vec::new(); let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; let reader = initial_buf.chain(reader); - let chunk_size = (num_bytes / num_chunks) as usize; // If input file is empty and we would not have determined the Kth chunk // in the Kth chunk of N chunk mode, then terminate immediately. // This happens on `split -n l/3/10 /dev/null`, for example. - if kth_chunk.is_some() && num_bytes == 0 { + // Similarly, if input file is empty and `elide_empty_files` parameter is enabled, + // then we would have written zero chunks of output, + // so terminate immediately as well. + // This happens on `split -e -n l/3 /dev/null`, for example. + if num_bytes == 0 && (kth_chunk.is_some() || settings.elide_empty_files) { return Ok(()); } // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut writers = vec![]; + let mut out_files: Vec = Vec::new(); + + // Calculate chunk size base and modulo reminder + // to be used in calculating `num_bytes_should_be_written` later on + let chunk_size_base = num_bytes / num_chunks; + let chunk_size_reminder = num_bytes % num_chunks; // If in N chunks mode - // Create one writer for each chunk. - // This will create each of the underlying files - // or stdin pipes to child shell/command processes if in `--filter` mode + // Generate filenames for each file and + // if `elide_empty_files` parameter is NOT enabled - instantiate the writer + // which will create each of the underlying files or stdin pipes + // to child shell/command processes if in `--filter` mode. + // Otherwise keep writer optional, to be instantiated later if there is data + // to write for the associated chunk. if kth_chunk.is_none() { - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) - .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); - } + out_files = get_out_files(num_chunks, settings, settings.elide_empty_files)?; } - let mut num_bytes_remaining_in_current_chunk = chunk_size; - let mut i = 1; + let mut chunk_number = 1; let sep = settings.separator; + let mut num_bytes_should_be_written = chunk_size_base + (chunk_size_reminder > 0) as u64; + let mut num_bytes_written = 0; for line_result in reader.split(sep) { - // add separator back in at the end of the line let mut line = line_result?; - line.push(sep); + // add separator back in at the end of the line, + // since `reader.split(sep)` removes it, + // except if the last line did not end with separator character + if (num_bytes_written + line.len() as u64) < num_bytes { + line.push(sep); + } let bytes = line.as_slice(); match kth_chunk { - Some(chunk_number) => { - if i == chunk_number { + Some(kth) => { + if chunk_number == kth { stdout_writer.write_all(bytes)?; } } None => { - let idx = (i - 1) as usize; - let maybe_writer = writers.get_mut(idx); - let writer = maybe_writer.unwrap(); + // Should write into a file + let idx = (chunk_number - 1) as usize; + let writer = out_files[idx].get_writer(settings)?; custom_write_all(bytes, writer, settings)?; } } - let num_bytes = bytes.len(); - if num_bytes >= num_bytes_remaining_in_current_chunk { - num_bytes_remaining_in_current_chunk = chunk_size; - i += 1; - } else { - num_bytes_remaining_in_current_chunk -= num_bytes; + // Advance to the next chunk if the current one is filled. + // There could be a situation when a long line, which started in current chunk, + // would overlap the next chunk (or even several next chunks), + // and since we cannot break lines for this split strategy, we could end up with + // empty files in place(s) of skipped chunk(s) + let num_line_bytes = bytes.len() as u64; + num_bytes_written += num_line_bytes; + let mut skipped = -1; + while num_bytes_should_be_written <= num_bytes_written { + num_bytes_should_be_written += + chunk_size_base + (chunk_size_reminder > chunk_number) as u64; + chunk_number += 1; + skipped += 1; } - if let Some(chunk_number) = kth_chunk { - if i > chunk_number { + // If a chunk was skipped and `elide_empty_files` flag is set, + // roll chunk_number back to preserve sequential continuity + // of file names for files written to, + // except for Kth chunk of N mode + if settings.elide_empty_files && skipped > 0 && kth_chunk.is_none() { + chunk_number -= skipped as u64; + } + + if let Some(kth) = kth_chunk { + if chunk_number > kth { break; } } } - Ok(()) } @@ -1432,23 +1503,14 @@ where // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut writers = vec![]; + let mut out_files: Vec = Vec::new(); // If in N chunks mode // Create one writer for each chunk. // This will create each of the underlying files // or stdin pipes to child shell/command processes if in `--filter` mode if kth_chunk.is_none() { - // This object is responsible for creating the filename for each chunk. - let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix) - .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; - for _ in 0..num_chunks { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = settings.instantiate_current_writer(filename.as_str())?; - writers.push(writer); - } + out_files = get_out_files(num_chunks, settings, false)?; } let num_chunks: usize = num_chunks.try_into().unwrap(); @@ -1470,9 +1532,7 @@ where } } None => { - let maybe_writer = writers.get_mut(i % num_chunks); - let writer = maybe_writer.unwrap(); - + let writer = out_files[i % num_chunks].get_writer(settings)?; let writer_stdin_open = custom_write_all(bytes, writer, settings)?; if !writer_stdin_open { closed_writers += 1; diff --git a/src/uu/split/src/strategy.rs b/src/uu/split/src/strategy.rs index e85abcee58b..7b934f72047 100644 --- a/src/uu/split/src/strategy.rs +++ b/src/uu/split/src/strategy.rs @@ -8,7 +8,10 @@ use crate::{OPT_BYTES, OPT_LINES, OPT_LINE_BYTES, OPT_NUMBER}; use clap::{parser::ValueSource, ArgMatches}; use std::fmt; -use uucore::parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError}; +use uucore::{ + display::Quotable, + parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError}, +}; /// Sub-strategy of the [`Strategy::Number`] /// Splitting a file into a specific number of chunks. @@ -208,10 +211,10 @@ impl fmt::Display for StrategyError { Self::Lines(e) => write!(f, "invalid number of lines: {e}"), Self::Bytes(e) => write!(f, "invalid number of bytes: {e}"), Self::NumberType(NumberTypeError::NumberOfChunks(s)) => { - write!(f, "invalid number of chunks: {s}") + write!(f, "invalid number of chunks: {}", s.quote()) } Self::NumberType(NumberTypeError::ChunkNumber(s)) => { - write!(f, "invalid chunk number: {s}") + write!(f, "invalid chunk number: {}", s.quote()) } Self::MultipleWays => write!(f, "cannot split in more than one way"), } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 0ae2af5cb92..2c9a56bddee 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -606,13 +606,13 @@ fn test_split_obs_lines_as_other_option_value() { .args(&["-n", "-200", "file"]) .fails() .code_is(1) - .stderr_contains("split: invalid number of chunks: -200\n"); + .stderr_contains("split: invalid number of chunks: '-200'\n"); scene .ucmd() .args(&["--number", "-e200", "file"]) .fails() .code_is(1) - .stderr_contains("split: invalid number of chunks: -e200\n"); + .stderr_contains("split: invalid number of chunks: '-e200'\n"); } /// Test for using more than one obsolete lines option (standalone) @@ -708,7 +708,7 @@ fn test_split_overflow_bytes_size() { fn test_split_stdin_num_chunks() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["--number=1"]).pipe_in("").succeeds(); - assert_eq!(file_read(&at, "xaa"), ""); + assert_eq!(at.read("xaa"), ""); assert!(!at.plus("xab").exists()); } @@ -727,8 +727,8 @@ fn test_split_stdin_num_line_chunks() { ucmd.args(&["--number=l/2"]) .pipe_in("1\n2\n3\n4\n5\n") .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n"); - assert_eq!(file_read(&at, "xab"), "4\n5\n"); + assert_eq!(at.read("xaa"), "1\n2\n3\n"); + assert_eq!(at.read("xab"), "4\n5\n"); assert!(!at.plus("xac").exists()); } @@ -741,12 +741,6 @@ fn test_split_stdin_num_kth_line_chunk() { .stdout_only("2\n"); } -fn file_read(at: &AtPath, filename: &str) -> String { - let mut s = String::new(); - at.open(filename).read_to_string(&mut s).unwrap(); - s -} - /// Test for the default suffix length behavior: dynamically increasing size. #[test] fn test_alphabetic_dynamic_suffix_length() { @@ -766,11 +760,11 @@ fn test_alphabetic_dynamic_suffix_length() { for i in b'a'..=b'y' { for j in b'a'..=b'z' { let filename = format!("x{}{}", i as char, j as char); - let contents = file_read(&at, &filename); + let contents = at.read(&filename); assert_eq!(contents, "a"); } } - assert_eq!(file_read(&at, "xzaaa"), "a"); + assert_eq!(at.read("xzaaa"), "a"); } /// Test for the default suffix length behavior: dynamically increasing size. @@ -790,10 +784,10 @@ fn test_numeric_dynamic_suffix_length() { .succeeds(); for i in 0..90 { let filename = format!("x{i:02}"); - let contents = file_read(&at, &filename); + let contents = at.read(&filename); assert_eq!(contents, "a"); } - assert_eq!(file_read(&at, "x9000"), "a"); + assert_eq!(at.read("x9000"), "a"); } #[test] @@ -812,10 +806,10 @@ fn test_hex_dynamic_suffix_length() { .succeeds(); for i in 0..240 { let filename = format!("x{i:02x}"); - let contents = file_read(&at, &filename); + let contents = at.read(&filename); assert_eq!(contents, "a"); } - assert_eq!(file_read(&at, "xf000"), "a"); + assert_eq!(at.read("xf000"), "a"); } /// Test for dynamic suffix length (auto-widening) disabled when suffix start number is specified @@ -833,7 +827,7 @@ fn test_dynamic_suffix_length_on_with_suffix_start_no_value() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-b", "1", "--numeric-suffixes", "ninetyonebytes.txt"]) .succeeds(); - assert_eq!(file_read(&at, "x9000"), "a"); + assert_eq!(at.read("x9000"), "a"); } /// Test for suffix auto-width with --number strategy and suffix start number @@ -845,8 +839,8 @@ fn test_suffix_auto_width_with_number() { let glob = Glob::new(&at, ".", r"x\d\d\d$"); assert_eq!(glob.count(), 100); assert_eq!(glob.collate(), at.read_bytes("fivelines.txt")); - assert_eq!(file_read(&at, "x001"), "1\n"); - assert_eq!(file_read(&at, "x100"), ""); + assert_eq!(at.read("x001"), "1\n"); + assert_eq!(at.read("x100"), ""); new_ucmd!() .args(&["--numeric-suffixes=100", "--number=r/100", "fivelines.txt"]) @@ -926,17 +920,12 @@ creating file 'xaf' #[test] fn test_number_n() { let (at, mut ucmd) = at_and_ucmd!(); - let file_read = |f| { - let mut s = String::new(); - at.open(f).read_to_string(&mut s).unwrap(); - s - }; ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds(); - assert_eq!(file_read("xaa"), "abcdef"); - assert_eq!(file_read("xab"), "ghijkl"); - assert_eq!(file_read("xac"), "mnopq"); - assert_eq!(file_read("xad"), "rstuv"); - assert_eq!(file_read("xae"), "wxyz\n"); + assert_eq!(at.read("xaa"), "abcdef"); + assert_eq!(at.read("xab"), "ghijkl"); + assert_eq!(at.read("xac"), "mnopq"); + assert_eq!(at.read("xad"), "rstuv"); + assert_eq!(at.read("xae"), "wxyz\n"); #[cfg(unix)] new_ucmd!() .args(&["--number=100", "/dev/null"]) @@ -974,11 +963,11 @@ fn test_number_kth_of_n() { new_ucmd!() .args(&["--number=0/5", "asciilowercase.txt"]) .fails() - .stderr_contains("split: invalid chunk number: 0"); + .stderr_contains("split: invalid chunk number: '0'"); new_ucmd!() .args(&["--number=10/5", "asciilowercase.txt"]) .fails() - .stderr_contains("split: invalid chunk number: 10"); + .stderr_contains("split: invalid chunk number: '10'"); #[cfg(target_pointer_width = "64")] new_ucmd!() .args(&[ @@ -986,7 +975,7 @@ fn test_number_kth_of_n() { "asciilowercase.txt", ]) .fails() - .stderr_contains("split: invalid number of chunks: 18446744073709551616"); + .stderr_contains("split: invalid number of chunks: '18446744073709551616'"); } #[test] @@ -1020,32 +1009,27 @@ fn test_number_kth_of_n_round_robin() { "fivelines.txt", ]) .fails() - .stderr_contains("split: invalid number of chunks: 18446744073709551616"); + .stderr_contains("split: invalid number of chunks: '18446744073709551616'"); new_ucmd!() .args(&["--number", "r/0/3", "fivelines.txt"]) .fails() - .stderr_contains("split: invalid chunk number: 0"); + .stderr_contains("split: invalid chunk number: '0'"); new_ucmd!() .args(&["--number", "r/10/3", "fivelines.txt"]) .fails() - .stderr_contains("split: invalid chunk number: 10"); + .stderr_contains("split: invalid chunk number: '10'"); } #[test] fn test_split_number_with_io_blksize() { let (at, mut ucmd) = at_and_ucmd!(); - let file_read = |f| { - let mut s = String::new(); - at.open(f).read_to_string(&mut s).unwrap(); - s - }; ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"]) .succeeds(); - assert_eq!(file_read("xaa"), "abcdef"); - assert_eq!(file_read("xab"), "ghijkl"); - assert_eq!(file_read("xac"), "mnopq"); - assert_eq!(file_read("xad"), "rstuv"); - assert_eq!(file_read("xae"), "wxyz\n"); + assert_eq!(at.read("xaa"), "abcdef"); + assert_eq!(at.read("xab"), "ghijkl"); + assert_eq!(at.read("xac"), "mnopq"); + assert_eq!(at.read("xad"), "rstuv"); + assert_eq!(at.read("xae"), "wxyz\n"); } #[test] @@ -1153,7 +1137,7 @@ fn test_allow_empty_files() { } #[test] -fn test_elide_empty_files() { +fn test_elide_empty_files_n_chunks() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-e", "-n", "4", "threebytes.txt"]) .succeeds() @@ -1167,7 +1151,7 @@ fn test_elide_empty_files() { #[test] #[cfg(unix)] -fn test_elide_dev_null() { +fn test_elide_dev_null_n_chunks() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-e", "-n", "3", "/dev/null"]) .succeeds() @@ -1191,24 +1175,58 @@ fn test_dev_zero() { } #[test] -fn test_lines() { +fn test_elide_empty_files_l_chunks() { let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-e", "-n", "l/7", "fivelines.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "1\n"); + assert_eq!(at.read("xab"), "2\n"); + assert_eq!(at.read("xac"), "3\n"); + assert_eq!(at.read("xad"), "4\n"); + assert_eq!(at.read("xae"), "5\n"); + assert!(!at.plus("xaf").exists()); + assert!(!at.plus("xag").exists()); +} - let file_read = |f| { - let mut s = String::new(); - at.open(f).read_to_string(&mut s).unwrap(); - s - }; +#[test] +#[cfg(unix)] +fn test_elide_dev_null_l_chunks() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-e", "-n", "l/3", "/dev/null"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert!(!at.plus("xaa").exists()); + assert!(!at.plus("xab").exists()); + assert!(!at.plus("xac").exists()); +} +#[test] +#[cfg(unix)] +fn test_number_by_bytes_dev_zero() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "3", "/dev/zero"]) + .fails() + .stderr_only("split: /dev/zero: cannot determine file size\n"); + assert!(!at.plus("xaa").exists()); + assert!(!at.plus("xab").exists()); + assert!(!at.plus("xac").exists()); +} + +#[test] +fn test_number_by_lines() { + let (at, mut ucmd) = at_and_ucmd!(); // Split into two files without splitting up lines. ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds(); - assert_eq!(file_read("xaa"), "1\n2\n3\n"); - assert_eq!(file_read("xab"), "4\n5\n"); + assert_eq!(at.read("xaa"), "1\n2\n3\n"); + assert_eq!(at.read("xab"), "4\n5\n"); } #[test] -fn test_lines_kth() { +fn test_number_by_lines_kth() { new_ucmd!() .args(&["-n", "l/3/10", "onehundredlines.txt"]) .succeeds() @@ -1217,13 +1235,27 @@ fn test_lines_kth() { #[test] #[cfg(unix)] -fn test_lines_kth_dev_null() { +fn test_number_by_lines_kth_dev_null() { new_ucmd!() .args(&["-n", "l/3/10", "/dev/null"]) .succeeds() .stdout_only(""); } +#[test] +fn test_number_by_lines_kth_no_end_sep() { + new_ucmd!() + .args(&["-n", "l/3/10"]) + .pipe_in("1\n2222\n3\n4") + .succeeds() + .stdout_only("2222\n"); + new_ucmd!() + .args(&["-e", "-n", "l/8/10"]) + .pipe_in("1\n2222\n3\n4") + .succeeds() + .stdout_only("3\n"); +} + #[test] fn test_line_bytes() { let (at, mut ucmd) = at_and_ucmd!(); @@ -1588,17 +1620,10 @@ fn test_effective_suffix_hex_last() { #[test] fn test_round_robin() { let (at, mut ucmd) = at_and_ucmd!(); - - let file_read = |f| { - let mut s = String::new(); - at.open(f).read_to_string(&mut s).unwrap(); - s - }; - ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds(); - assert_eq!(file_read("xaa"), "1\n3\n5\n"); - assert_eq!(file_read("xab"), "2\n4\n"); + assert_eq!(at.read("xaa"), "1\n3\n5\n"); + assert_eq!(at.read("xab"), "2\n4\n"); } #[test] @@ -1631,7 +1656,7 @@ fn test_split_invalid_input() { .args(&["-n", "0", "file"]) .fails() .no_stdout() - .stderr_contains("split: invalid number of chunks: 0"); + .stderr_contains("split: invalid number of chunks: '0'"); } /// Test if there are invalid (non UTF-8) in the arguments - unix @@ -1690,9 +1715,9 @@ fn test_split_separator_nl_lines() { .pipe_in("1\n2\n3\n4\n5\n") .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\n2\n"); - assert_eq!(file_read(&at, "xab"), "3\n4\n"); - assert_eq!(file_read(&at, "xac"), "5\n"); + assert_eq!(at.read("xaa"), "1\n2\n"); + assert_eq!(at.read("xab"), "3\n4\n"); + assert_eq!(at.read("xac"), "5\n"); assert!(!at.plus("xad").exists()); } @@ -1703,9 +1728,9 @@ fn test_split_separator_nl_line_bytes() { .pipe_in("1\n2\n3\n4\n5\n") .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\n2\n"); - assert_eq!(file_read(&at, "xab"), "3\n4\n"); - assert_eq!(file_read(&at, "xac"), "5\n"); + assert_eq!(at.read("xaa"), "1\n2\n"); + assert_eq!(at.read("xab"), "3\n4\n"); + assert_eq!(at.read("xac"), "5\n"); assert!(!at.plus("xad").exists()); } @@ -1715,9 +1740,9 @@ fn test_split_separator_nl_number_l() { ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\n2\n"); - assert_eq!(file_read(&at, "xab"), "3\n4\n"); - assert_eq!(file_read(&at, "xac"), "5\n"); + assert_eq!(at.read("xaa"), "1\n2\n"); + assert_eq!(at.read("xab"), "3\n4\n"); + assert_eq!(at.read("xac"), "5\n"); assert!(!at.plus("xad").exists()); } @@ -1727,9 +1752,9 @@ fn test_split_separator_nl_number_r() { ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\n4\n"); - assert_eq!(file_read(&at, "xab"), "2\n5\n"); - assert_eq!(file_read(&at, "xac"), "3\n"); + assert_eq!(at.read("xaa"), "1\n4\n"); + assert_eq!(at.read("xab"), "2\n5\n"); + assert_eq!(at.read("xac"), "3\n"); assert!(!at.plus("xad").exists()); } @@ -1739,9 +1764,9 @@ fn test_split_separator_nul_lines() { ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\x002\0"); - assert_eq!(file_read(&at, "xab"), "3\x004\0"); - assert_eq!(file_read(&at, "xac"), "5\0"); + assert_eq!(at.read("xaa"), "1\x002\0"); + assert_eq!(at.read("xab"), "3\x004\0"); + assert_eq!(at.read("xac"), "5\0"); assert!(!at.plus("xad").exists()); } @@ -1751,9 +1776,9 @@ fn test_split_separator_nul_line_bytes() { ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\x002\0"); - assert_eq!(file_read(&at, "xab"), "3\x004\0"); - assert_eq!(file_read(&at, "xac"), "5\0"); + assert_eq!(at.read("xaa"), "1\x002\0"); + assert_eq!(at.read("xab"), "3\x004\0"); + assert_eq!(at.read("xac"), "5\0"); assert!(!at.plus("xad").exists()); } @@ -1763,9 +1788,9 @@ fn test_split_separator_nul_number_l() { ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\x002\0"); - assert_eq!(file_read(&at, "xab"), "3\x004\0"); - assert_eq!(file_read(&at, "xac"), "5\0"); + assert_eq!(at.read("xaa"), "1\x002\0"); + assert_eq!(at.read("xab"), "3\x004\0"); + assert_eq!(at.read("xac"), "5\0"); assert!(!at.plus("xad").exists()); } @@ -1775,9 +1800,9 @@ fn test_split_separator_nul_number_r() { ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1\x004\0"); - assert_eq!(file_read(&at, "xab"), "2\x005\0"); - assert_eq!(file_read(&at, "xac"), "3\0"); + assert_eq!(at.read("xaa"), "1\x004\0"); + assert_eq!(at.read("xab"), "2\x005\0"); + assert_eq!(at.read("xac"), "3\0"); assert!(!at.plus("xad").exists()); } @@ -1787,9 +1812,9 @@ fn test_split_separator_semicolon_lines() { ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1;2;"); - assert_eq!(file_read(&at, "xab"), "3;4;"); - assert_eq!(file_read(&at, "xac"), "5;"); + assert_eq!(at.read("xaa"), "1;2;"); + assert_eq!(at.read("xab"), "3;4;"); + assert_eq!(at.read("xac"), "5;"); assert!(!at.plus("xad").exists()); } @@ -1799,9 +1824,9 @@ fn test_split_separator_semicolon_line_bytes() { ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1;2;"); - assert_eq!(file_read(&at, "xab"), "3;4;"); - assert_eq!(file_read(&at, "xac"), "5;"); + assert_eq!(at.read("xaa"), "1;2;"); + assert_eq!(at.read("xab"), "3;4;"); + assert_eq!(at.read("xac"), "5;"); assert!(!at.plus("xad").exists()); } @@ -1811,9 +1836,9 @@ fn test_split_separator_semicolon_number_l() { ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1;2;"); - assert_eq!(file_read(&at, "xab"), "3;4;"); - assert_eq!(file_read(&at, "xac"), "5;"); + assert_eq!(at.read("xaa"), "1;2;"); + assert_eq!(at.read("xab"), "3;4;"); + assert_eq!(at.read("xac"), "5;"); assert!(!at.plus("xad").exists()); } @@ -1823,9 +1848,9 @@ fn test_split_separator_semicolon_number_r() { ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"]) .succeeds(); - assert_eq!(file_read(&at, "xaa"), "1;4;"); - assert_eq!(file_read(&at, "xab"), "2;5;"); - assert_eq!(file_read(&at, "xac"), "3;"); + assert_eq!(at.read("xaa"), "1;4;"); + assert_eq!(at.read("xab"), "2;5;"); + assert_eq!(at.read("xac"), "3;"); assert!(!at.plus("xad").exists()); } From 84b5e6f0a1275ffd6816d4478284ed2ef27e5f1e Mon Sep 17 00:00:00 2001 From: Clint Teece Date: Sat, 25 Nov 2023 09:13:12 -0500 Subject: [PATCH 065/429] du: start printing output immediately (#5552) * du: very rough draft of continuously printing output * du: clean up printing logic, still needs some polishing * du: gracefully handle case where `du` returns no `Stat`s * du: print output using separate thread * du: clean up print thread implementation * du: send ownership of `Stat`s to printing thread as soon as `du` is done with them * du: add basic error handling for communication between threads, use `StatPrinter` to handle printing thread logic * du: move printing grand total into `StatPrinter`, and move initialization of printing-related variables into `StatPrinter::new` * du: clean up calculation of `convert_size` function, and separate printing a single stat our into its own method in `StatPrinter` * du: have printing thread handle printing IO-related errors, to ensure error messages and regular output message are written one at a time * du: add comment explaining print thread, remove outdated comments and clippy allows * du: restore clippy allows for cognitive complexity --------- Co-authored-by: clint --- src/uu/du/src/du.rs | 317 +++++++++++++++++++++++++++----------------- 1 file changed, 193 insertions(+), 124 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 148b197df33..dc03a64f218 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -16,8 +16,6 @@ use std::fs::File; use std::fs::Metadata; use std::io::BufRead; use std::io::BufReader; -use std::io::Result; -use std::iter; #[cfg(not(windows))] use std::os::unix::fs::MetadataExt; #[cfg(windows)] @@ -27,15 +25,17 @@ use std::os::windows::io::AsRawHandle; use std::path::Path; use std::path::PathBuf; use std::str::FromStr; +use std::sync::mpsc; +use std::thread; use std::time::{Duration, UNIX_EPOCH}; use std::{error::Error, fmt::Display}; use uucore::display::{print_verbatim, Quotable}; use uucore::error::FromIo; -use uucore::error::{set_exit_code, UError, UResult, USimpleError}; +use uucore::error::{UError, UResult, USimpleError}; use uucore::line_ending::LineEnding; use uucore::parse_glob; use uucore::parse_size::{parse_size_u64, ParseSizeError}; -use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_warning}; +use uucore::{format_usage, help_about, help_section, help_usage, show, show_warning}; #[cfg(windows)] use windows_sys::Win32::Foundation::HANDLE; #[cfg(windows)] @@ -81,6 +81,7 @@ const USAGE: &str = help_usage!("du.md"); // TODO: Support Z & Y (currently limited by size of u64) const UNITS: [(char, u32); 6] = [('E', 6), ('P', 5), ('T', 4), ('G', 3), ('M', 2), ('K', 1)]; +#[derive(Clone)] struct Options { all: bool, max_depth: Option, @@ -93,7 +94,7 @@ struct Options { verbose: bool, } -#[derive(PartialEq)] +#[derive(PartialEq, Clone)] enum Deref { All, Args(Vec), @@ -119,7 +120,7 @@ struct Stat { } impl Stat { - fn new(path: &Path, options: &Options) -> Result { + fn new(path: &Path, options: &Options) -> std::io::Result { // Determine whether to dereference (follow) the symbolic link let should_dereference = match &options.dereference { Deref::All => true, @@ -290,7 +291,6 @@ fn choose_size(matches: &ArgMatches, stat: &Stat) -> u64 { } // this takes `my_stat` to avoid having to stat files multiple times. -// XXX: this should use the impl Trait return type when it is stabilized #[allow(clippy::cognitive_complexity)] fn du( mut my_stat: Stat, @@ -298,18 +298,16 @@ fn du( depth: usize, seen_inodes: &mut HashSet, exclude: &[Pattern], -) -> Box> { - let mut stats = vec![]; - let mut futures = vec![]; - + print_tx: &mpsc::Sender>, +) -> Result>>> { if my_stat.is_dir { let read = match fs::read_dir(&my_stat.path) { Ok(read) => read, Err(e) => { - show!( - e.map_err_context(|| format!("cannot read directory {}", my_stat.path.quote())) - ); - return Box::new(iter::once(my_stat)); + print_tx.send(Err(e.map_err_context(|| { + format!("cannot read directory {}", my_stat.path.quote()) + })))?; + return Ok(my_stat); } }; @@ -354,44 +352,48 @@ fn du( } } } - futures.push(du( + + let this_stat = du( this_stat, options, depth + 1, seen_inodes, exclude, - )); + print_tx, + )?; + + if !options.separate_dirs { + my_stat.size += this_stat.size; + my_stat.blocks += this_stat.blocks; + my_stat.inodes += this_stat.inodes; + } + print_tx.send(Ok(StatPrintInfo { + stat: this_stat, + depth: depth + 1, + }))?; } else { my_stat.size += this_stat.size; my_stat.blocks += this_stat.blocks; my_stat.inodes += 1; if options.all { - stats.push(this_stat); + print_tx.send(Ok(StatPrintInfo { + stat: this_stat, + depth: depth + 1, + }))?; } } } - Err(e) => show!( - e.map_err_context(|| format!("cannot access {}", entry.path().quote())) - ), + Err(e) => print_tx.send(Err(e.map_err_context(|| { + format!("cannot access {}", entry.path().quote()) + })))?, } } - Err(error) => show_error!("{}", error), + Err(error) => print_tx.send(Err(error.into()))?, } } } - stats.extend(futures.into_iter().flatten().filter(|stat| { - if !options.separate_dirs && stat.path.parent().unwrap() == my_stat.path { - my_stat.size += stat.size; - my_stat.blocks += stat.blocks; - my_stat.inodes += stat.inodes; - } - options - .max_depth - .map_or(true, |max_depth| depth < max_depth) - })); - stats.push(my_stat); - Box::new(stats.into_iter()) + Ok(my_stat) } fn convert_size_human(size: u64, multiplier: u64, _block_size: u64) -> String { @@ -426,7 +428,7 @@ fn convert_size_other(size: u64, _multiplier: u64, block_size: u64) -> String { format!("{}", ((size as f64) / (block_size as f64)).ceil()) } -fn get_convert_size_fn(matches: &ArgMatches) -> Box String> { +fn get_convert_size_fn(matches: &ArgMatches) -> Box String + Send> { if matches.get_flag(options::HUMAN_READABLE) || matches.get_flag(options::SI) { Box::new(convert_size_human) } else if matches.get_flag(options::BYTES) { @@ -532,6 +534,137 @@ fn build_exclude_patterns(matches: &ArgMatches) -> UResult> { Ok(exclude_patterns) } +struct StatPrintInfo { + stat: Stat, + depth: usize, +} + +struct StatPrinter { + matches: ArgMatches, + threshold: Option, + summarize: bool, + time_format_str: String, + line_ending: LineEnding, + options: Options, + convert_size: Box String + Send>, +} + +impl StatPrinter { + fn new(matches: ArgMatches, options: Options, summarize: bool) -> UResult { + let block_size = read_block_size( + matches + .get_one::(options::BLOCK_SIZE) + .map(|s| s.as_str()), + )?; + + let multiplier: u64 = if matches.get_flag(options::SI) { + 1000 + } else { + 1024 + }; + + let convert_size_fn = get_convert_size_fn(&matches); + + let convert_size: Box String + Send> = if options.inodes { + Box::new(|size: u64| size.to_string()) + } else { + Box::new(move |size: u64| convert_size_fn(size, multiplier, block_size)) + }; + + let threshold = match matches.get_one::(options::THRESHOLD) { + Some(s) => match Threshold::from_str(s) { + Ok(t) => Some(t), + Err(e) => { + return Err(USimpleError::new( + 1, + format_error_message(&e, s, options::THRESHOLD), + )) + } + }, + None => None, + }; + + let time_format_str = + parse_time_style(matches.get_one::("time-style").map(|s| s.as_str()))? + .to_string(); + + let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::NULL)); + + Ok(Self { + matches, + threshold, + summarize, + time_format_str, + line_ending, + options, + convert_size, + }) + } + + fn print_stats(&self, rx: &mpsc::Receiver>) -> UResult<()> { + let mut grand_total = 0; + loop { + let received = rx.recv(); + + match received { + Ok(message) => match message { + Ok(stat_info) => { + let size = choose_size(&self.matches, &stat_info.stat); + + if stat_info.depth == 0 { + grand_total += size; + } + + if !self + .threshold + .map_or(false, |threshold| threshold.should_exclude(size)) + && self + .options + .max_depth + .map_or(true, |max_depth| stat_info.depth <= max_depth) + && (!self.summarize || stat_info.depth == 0) + { + self.print_stat(&stat_info.stat, size)?; + } + } + Err(e) => show!(e), + }, + Err(_) => break, + } + } + + if self.options.total { + print!("{}\ttotal", (self.convert_size)(grand_total)); + print!("{}", self.line_ending); + } + + Ok(()) + } + + fn print_stat(&self, stat: &Stat, size: u64) -> UResult<()> { + if self.matches.contains_id(options::TIME) { + let tm = { + let secs = self + .matches + .get_one::(options::TIME) + .map(|s| get_time_secs(s, stat)) + .transpose()? + .unwrap_or(stat.modified); + DateTime::::from(UNIX_EPOCH + Duration::from_secs(secs)) + }; + let time_str = tm.format(&self.time_format_str).to_string(); + print!("{}\t{}\t", (self.convert_size)(size), time_str); + } else { + print!("{}\t", (self.convert_size)(size)); + } + + print_verbatim(&stat.path).unwrap(); + print!("{}", self.line_ending); + + Ok(()) + } +} + #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { @@ -582,49 +715,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { show_warning!("options --apparent-size and -b are ineffective with --inodes"); } - let block_size = read_block_size( - matches - .get_one::(options::BLOCK_SIZE) - .map(|s| s.as_str()), - )?; - - let threshold = match matches.get_one::(options::THRESHOLD) { - Some(s) => match Threshold::from_str(s) { - Ok(t) => Some(t), - Err(e) => { - return Err(USimpleError::new( - 1, - format_error_message(&e, s, options::THRESHOLD), - )) - } - }, - None => None, - }; - - let multiplier: u64 = if matches.get_flag(options::SI) { - 1000 - } else { - 1024 - }; - - let convert_size_fn = get_convert_size_fn(&matches); - - let convert_size = |size: u64| { - if options.inodes { - size.to_string() - } else { - convert_size_fn(size, multiplier, block_size) - } - }; - - let time_format_str = - parse_time_style(matches.get_one::("time-style").map(|s| s.as_str()))?; - - let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::NULL)); + // Use separate thread to print output, so we can print finished results while computation is still running + let stat_printer = StatPrinter::new(matches.clone(), options.clone(), summarize)?; + let (print_tx, rx) = mpsc::channel::>(); + let printing_thread = thread::spawn(move || stat_printer.print_stats(&rx)); let excludes = build_exclude_patterns(&matches)?; - let mut grand_total = 0; 'loop_file: for path in files { // Skip if we don't want to ignore anything if !&excludes.is_empty() { @@ -647,63 +744,35 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if let Some(inode) = stat.inode { seen_inodes.insert(inode); } - let iter = du(stat, &options, 0, &mut seen_inodes, &excludes); - - // Sum up all the returned `Stat`s and display results - let (_, len) = iter.size_hint(); - let len = len.unwrap(); - for (index, stat) in iter.enumerate() { - let size = choose_size(&matches, &stat); + let stat = du(stat, &options, 0, &mut seen_inodes, &excludes, &print_tx) + .map_err(|e| USimpleError::new(1, e.to_string()))?; - if threshold.map_or(false, |threshold| threshold.should_exclude(size)) { - continue; - } - - if matches.contains_id(options::TIME) { - let tm = { - let secs = matches - .get_one::(options::TIME) - .map(|s| get_time_secs(s, &stat)) - .transpose()? - .unwrap_or(stat.modified); - DateTime::::from(UNIX_EPOCH + Duration::from_secs(secs)) - }; - if !summarize || index == len - 1 { - let time_str = tm.format(time_format_str).to_string(); - print!("{}\t{}\t", convert_size(size), time_str); - print_verbatim(stat.path).unwrap(); - print!("{line_ending}"); - } - } else if !summarize || index == len - 1 { - print!("{}\t", convert_size(size)); - print_verbatim(stat.path).unwrap(); - print!("{line_ending}"); - } - if options.total && index == (len - 1) { - // The last element will be the total size of the the path under - // path_string. We add it to the grand total. - grand_total += size; - } - } + print_tx + .send(Ok(StatPrintInfo { stat, depth: 0 })) + .map_err(|e| USimpleError::new(1, e.to_string()))?; } else { - show_error!( - "{}: {}", - path.to_string_lossy().maybe_quote(), - "No such file or directory" - ); - set_exit_code(1); + print_tx + .send(Err(USimpleError::new( + 1, + format!( + "{}: No such file or directory", + path.to_string_lossy().maybe_quote() + ), + ))) + .map_err(|e| USimpleError::new(1, e.to_string()))?; } } - if options.total { - print!("{}\ttotal", convert_size(grand_total)); - print!("{line_ending}"); - } + drop(print_tx); + + printing_thread + .join() + .map_err(|_| USimpleError::new(1, "Printing thread panicked."))??; Ok(()) } -fn get_time_secs(s: &str, stat: &Stat) -> std::result::Result { +fn get_time_secs(s: &str, stat: &Stat) -> Result { let secs = match s { "ctime" | "status" => stat.modified, "access" | "atime" | "use" => stat.accessed, @@ -966,7 +1035,7 @@ enum Threshold { impl FromStr for Threshold { type Err = ParseSizeError; - fn from_str(s: &str) -> std::result::Result { + fn from_str(s: &str) -> Result { let offset = usize::from(s.starts_with(&['-', '+'][..])); let size = parse_size_u64(&s[offset..])?; From dc92a434ef0f64a833d8b657268068eb3a25ab22 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Fri, 24 Nov 2023 17:25:16 -0500 Subject: [PATCH 066/429] split: handling system limit on open files --- src/uu/split/src/platform/unix.rs | 43 ++++-- src/uu/split/src/platform/windows.rs | 23 ++- src/uu/split/src/split.rs | 204 +++++++++++++++++++-------- 3 files changed, 191 insertions(+), 79 deletions(-) diff --git a/src/uu/split/src/platform/unix.rs b/src/uu/split/src/platform/unix.rs index c2bf7216b57..1fd990e0a91 100644 --- a/src/uu/split/src/platform/unix.rs +++ b/src/uu/split/src/platform/unix.rs @@ -117,22 +117,37 @@ impl Drop for FilterWriter { pub fn instantiate_current_writer( filter: &Option, filename: &str, + is_new: bool, ) -> Result>> { match filter { - None => Ok(BufWriter::new(Box::new( - // write to the next file - std::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(std::path::Path::new(&filename)) - .map_err(|_| { - Error::new( - ErrorKind::Other, - format!("unable to open '{filename}'; aborting"), - ) - })?, - ) as Box)), + None => { + let file = if is_new { + // create new file + std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(std::path::Path::new(&filename)) + .map_err(|_| { + Error::new( + ErrorKind::Other, + format!("unable to open '{filename}'; aborting"), + ) + })? + } else { + // re-open file that we previously created to append to it + std::fs::OpenOptions::new() + .append(true) + .open(std::path::Path::new(&filename)) + .map_err(|_| { + Error::new( + ErrorKind::Other, + format!("unable to re-open '{filename}'; aborting"), + ) + })? + }; + Ok(BufWriter::new(Box::new(file) as Box)) + } Some(ref filter_command) => Ok(BufWriter::new(Box::new( // spawn a shell command and write to it FilterWriter::new(filter_command, filename)?, diff --git a/src/uu/split/src/platform/windows.rs b/src/uu/split/src/platform/windows.rs index 8b90789896f..a531d6abc1f 100644 --- a/src/uu/split/src/platform/windows.rs +++ b/src/uu/split/src/platform/windows.rs @@ -14,9 +14,10 @@ use uucore::fs; pub fn instantiate_current_writer( _filter: &Option, filename: &str, + is_new: bool, ) -> Result>> { - Ok(BufWriter::new(Box::new( - // write to the next file + let file = if is_new { + // create new file std::fs::OpenOptions::new() .write(true) .create(true) @@ -25,10 +26,22 @@ pub fn instantiate_current_writer( .map_err(|_| { Error::new( ErrorKind::Other, - format!("'{filename}' would overwrite input; aborting"), + format!("unable to open '{filename}'; aborting"), ) - })?, - ) as Box)) + })? + } else { + // re-open file that we previously created to append to it + std::fs::OpenOptions::new() + .append(true) + .open(std::path::Path::new(&filename)) + .map_err(|_| { + Error::new( + ErrorKind::Other, + format!("unable to re-open '{filename}'; aborting"), + ) + })? + }; + Ok(BufWriter::new(Box::new(file) as Box)) } pub fn paths_refer_to_same_file(p1: &str, p2: &str) -> bool { diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 4e2af0be4d8..e5f9032c947 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore nbbbb ncccc hexdigit +// spell-checker:ignore nbbbb ncccc hexdigit getmaxstdio mod filenames; mod number; @@ -563,7 +563,11 @@ impl Settings { Ok(result) } - fn instantiate_current_writer(&self, filename: &str) -> io::Result>> { + fn instantiate_current_writer( + &self, + filename: &str, + is_new: bool, + ) -> io::Result>> { if platform::paths_refer_to_same_file(&self.input, filename) { return Err(io::Error::new( ErrorKind::Other, @@ -571,7 +575,7 @@ impl Settings { )); } - platform::instantiate_current_writer(&self.filter, filename) + platform::instantiate_current_writer(&self.filter, filename, is_new) } } @@ -748,7 +752,7 @@ impl<'a> ByteChunkWriter<'a> { if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = settings.instantiate_current_writer(&filename)?; + let inner = settings.instantiate_current_writer(&filename, true)?; Ok(ByteChunkWriter { settings, chunk_size, @@ -786,7 +790,7 @@ impl<'a> Write for ByteChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = self.settings.instantiate_current_writer(&filename)?; + self.inner = self.settings.instantiate_current_writer(&filename, true)?; } // If the capacity of this chunk is greater than the number of @@ -872,7 +876,7 @@ impl<'a> LineChunkWriter<'a> { if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = settings.instantiate_current_writer(&filename)?; + let inner = settings.instantiate_current_writer(&filename, true)?; Ok(LineChunkWriter { settings, chunk_size, @@ -907,7 +911,7 @@ impl<'a> Write for LineChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = self.settings.instantiate_current_writer(&filename)?; + self.inner = self.settings.instantiate_current_writer(&filename, true)?; self.num_lines_remaining_in_current_chunk = self.chunk_size; } @@ -979,7 +983,7 @@ impl<'a> LineBytesChunkWriter<'a> { if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = settings.instantiate_current_writer(&filename)?; + let inner = settings.instantiate_current_writer(&filename, true)?; Ok(LineBytesChunkWriter { settings, chunk_size, @@ -1045,7 +1049,7 @@ impl<'a> Write for LineBytesChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = self.settings.instantiate_current_writer(&filename)?; + self.inner = self.settings.instantiate_current_writer(&filename, true)?; self.num_bytes_remaining_in_current_chunk = self.chunk_size.try_into().unwrap(); } @@ -1134,55 +1138,135 @@ impl<'a> Write for LineBytesChunkWriter<'a> { struct OutFile { filename: String, maybe_writer: Option>>, + is_new: bool, } -impl OutFile { - /// Get the writer for the output file - /// Instantiate the writer if it has not been instantiated upfront - fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter>> { - if self.maybe_writer.is_some() { - Ok(self.maybe_writer.as_mut().unwrap()) - } else { - // Writer was not instantiated upfront - // Instantiate it and record for future use - self.maybe_writer = Some(settings.instantiate_current_writer(self.filename.as_str())?); - Ok(self.maybe_writer.as_mut().unwrap()) +// impl OutFile { +// /// Get the writer for the output file. +// /// Instantiate the writer if it has not been instantiated upfront +// /// or temporarily closed to free up system resources +// fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter>> { +// if self.maybe_writer.is_some() { +// Ok(self.maybe_writer.as_mut().unwrap()) +// } else { +// // Writer was not instantiated upfront or was temporarily closed due to system resources constraints. +// // Instantiate it and record for future use. +// self.maybe_writer = +// Some(settings.instantiate_current_writer(self.filename.as_str(), self.is_new)?); +// Ok(self.maybe_writer.as_mut().unwrap()) +// } +// } +// } + +/// A set of output files +/// Used in [`n_chunks_by_byte`], [`n_chunks_by_line`] +/// and [`n_chunks_by_line_round_robin`] functions. +type OutFiles = Vec; +trait ManageOutFiles { + /// Initialize a new set of output files + /// Each OutFile is generated with filename, while the writer for it could be + /// optional, to be instantiated later by the calling function as needed. + /// Optional writers could happen in the following situations: + /// * in [`n_chunks_by_line`] if `elide_empty_files` parameter is set to `true` + /// * if the number of files is greater than system limit for open files + fn init(num_files: u64, settings: &Settings, is_writer_optional: bool) -> UResult + where + Self: Sized; + /// Get the writer for the output file by index. + /// If system limit of open files has been reached + /// it will try to close one of previously instantiated writers + /// to free up resources and re-try instantiating current writer, + /// except for `--filter` mode. + /// The writers that get closed to free up resources for the current writer + /// are flagged as `is_new=false`, so they can be re-opened for appending + /// instead of created anew if we need to keep writing into them later, + /// i.e. in case of round robin distribution as in [`n_chunks_by_line_round_robin`] + fn get_writer( + &mut self, + idx: usize, + settings: &Settings, + ) -> UResult<&mut BufWriter>>; +} + +impl ManageOutFiles for OutFiles { + fn init(num_files: u64, settings: &Settings, is_writer_optional: bool) -> UResult { + // This object is responsible for creating the filename for each chunk + let mut filename_iterator: FilenameIterator<'_> = + FilenameIterator::new(&settings.prefix, &settings.suffix) + .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; + let mut out_files: Self = Self::new(); + for _ in 0..num_files { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let maybe_writer = if is_writer_optional { + None + } else { + let instantiated = settings.instantiate_current_writer(filename.as_str(), true); + // If there was an error instantiating the writer for a file, + // it could be due to hitting the system limit of open files, + // so record it as None and let [`get_writer`] function handle closing/re-opening + // of writers as needed within system limits. + // However, for `--filter` child process writers - propagate the error, + // as working around system limits of open files for child shell processes + // is currently not supported (same as in GNU) + match instantiated { + Ok(writer) => Some(writer), + Err(e) if settings.filter.is_some() => { + return Err(e.into()); + } + Err(_) => None, + } + }; + out_files.push(OutFile { + filename, + maybe_writer, + is_new: true, + }); } + Ok(out_files) } -} -/// Generate a set of Output Files -/// This is a helper function to [`n_chunks_by_byte`], [`n_chunks_by_line`] -/// and [`n_chunks_by_line_round_robin`]. -/// Each OutFile is generated with filename, while the writer for it could be -/// optional, to be instantiated later by the calling function as needed. -/// Optional writers could happen in [`n_chunks_by_line`] -/// if `elide_empty_files` parameter is set to `true`. -fn get_out_files( - num_files: u64, - settings: &Settings, - is_writer_optional: bool, -) -> UResult> { - // This object is responsible for creating the filename for each chunk - let mut filename_iterator: FilenameIterator<'_> = - FilenameIterator::new(&settings.prefix, &settings.suffix) - .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?; - let mut out_files: Vec = Vec::new(); - for _ in 0..num_files { - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let maybe_writer = if is_writer_optional { - None + fn get_writer( + &mut self, + idx: usize, + settings: &Settings, + ) -> UResult<&mut BufWriter>> { + if self[idx].maybe_writer.is_some() { + Ok(self[idx].maybe_writer.as_mut().unwrap()) } else { - Some(settings.instantiate_current_writer(filename.as_str())?) - }; - out_files.push(OutFile { - filename, - maybe_writer, - }); + // Writer was not instantiated upfront or was temporarily closed due to system resources constraints. + // Instantiate it and record for future use. + let maybe_writer = + settings.instantiate_current_writer(self[idx].filename.as_str(), self[idx].is_new); + if let Ok(writer) = maybe_writer { + self[idx].maybe_writer = Some(writer); + Ok(self[idx].maybe_writer.as_mut().unwrap()) + } else if settings.filter.is_some() { + // Propagate error if in `--filter` mode + Err(maybe_writer.err().unwrap().into()) + } else { + // Could have hit system limit for open files. + // Try to close one previously instantiated writer first + for (i, out_file) in self.iter_mut().enumerate() { + if i != idx && out_file.maybe_writer.is_some() { + out_file.maybe_writer.as_mut().unwrap().flush()?; + out_file.maybe_writer = None; + out_file.is_new = false; + break; + } + } + // And then try to instantiate the writer again + // If this fails - give up and propagate the error + self[idx].maybe_writer = + Some(settings.instantiate_current_writer( + self[idx].filename.as_str(), + self[idx].is_new, + )?); + Ok(self[idx].maybe_writer.as_mut().unwrap()) + } + } } - Ok(out_files) } /// Split a file or STDIN into a specific number of chunks by byte. @@ -1261,7 +1345,7 @@ where // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut out_files: Vec = Vec::new(); + let mut out_files: OutFiles = OutFiles::new(); // Calculate chunk size base and modulo reminder // to be used in calculating chunk_size later on @@ -1273,7 +1357,7 @@ where // This will create each of the underlying files // or stdin pipes to child shell/command processes if in `--filter` mode if kth_chunk.is_none() { - out_files = get_out_files(num_chunks, settings, false)?; + out_files = OutFiles::init(num_chunks, settings, false)?; } for i in 1_u64..=num_chunks { @@ -1317,7 +1401,7 @@ where } None => { let idx = (i - 1) as usize; - let writer = out_files[idx].get_writer(settings)?; + let writer = out_files.get_writer(idx, settings)?; writer.write_all(buf)?; } } @@ -1387,7 +1471,7 @@ where // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut out_files: Vec = Vec::new(); + let mut out_files: OutFiles = OutFiles::new(); // Calculate chunk size base and modulo reminder // to be used in calculating `num_bytes_should_be_written` later on @@ -1402,7 +1486,7 @@ where // Otherwise keep writer optional, to be instantiated later if there is data // to write for the associated chunk. if kth_chunk.is_none() { - out_files = get_out_files(num_chunks, settings, settings.elide_empty_files)?; + out_files = OutFiles::init(num_chunks, settings, settings.elide_empty_files)?; } let mut chunk_number = 1; @@ -1429,7 +1513,7 @@ where None => { // Should write into a file let idx = (chunk_number - 1) as usize; - let writer = out_files[idx].get_writer(settings)?; + let writer = out_files.get_writer(idx, settings)?; custom_write_all(bytes, writer, settings)?; } } @@ -1503,14 +1587,14 @@ where // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files - let mut out_files: Vec = Vec::new(); + let mut out_files: OutFiles = OutFiles::new(); // If in N chunks mode // Create one writer for each chunk. // This will create each of the underlying files // or stdin pipes to child shell/command processes if in `--filter` mode if kth_chunk.is_none() { - out_files = get_out_files(num_chunks, settings, false)?; + out_files = OutFiles::init(num_chunks, settings, false)?; } let num_chunks: usize = num_chunks.try_into().unwrap(); @@ -1532,7 +1616,7 @@ where } } None => { - let writer = out_files[i % num_chunks].get_writer(settings)?; + let writer = out_files.get_writer(i % num_chunks, settings)?; let writer_stdin_open = custom_write_all(bytes, writer, settings)?; if !writer_stdin_open { closed_writers += 1; From 440e7b1a597091b5b0e3d81007f55ea6fbe96ee3 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Fri, 24 Nov 2023 19:10:47 -0500 Subject: [PATCH 067/429] split: r-chunk test compliance --- src/uu/split/src/split.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index e5f9032c947..932013ad9b3 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1167,7 +1167,7 @@ trait ManageOutFiles { /// Each OutFile is generated with filename, while the writer for it could be /// optional, to be instantiated later by the calling function as needed. /// Optional writers could happen in the following situations: - /// * in [`n_chunks_by_line`] if `elide_empty_files` parameter is set to `true` + /// * in [`n_chunks_by_line`] and [`n_chunks_by_line_round_robin`] if `elide_empty_files` parameter is set to `true` /// * if the number of files is greater than system limit for open files fn init(num_files: u64, settings: &Settings, is_writer_optional: bool) -> UResult where @@ -1584,6 +1584,12 @@ fn n_chunks_by_line_round_robin( where R: BufRead, { + // Get the size of the input in bytes and compute the number + // of bytes per chunk. + let initial_buf = &mut Vec::new(); + let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; + let reader = initial_buf.chain(reader); + // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files @@ -1594,16 +1600,22 @@ where // This will create each of the underlying files // or stdin pipes to child shell/command processes if in `--filter` mode if kth_chunk.is_none() { - out_files = OutFiles::init(num_chunks, settings, false)?; + out_files = OutFiles::init(num_chunks, settings, settings.elide_empty_files)?; } let num_chunks: usize = num_chunks.try_into().unwrap(); let sep = settings.separator; let mut closed_writers = 0; + let mut num_bytes_written = 0; + for (i, line_result) in reader.split(sep).enumerate() { - // add separator back in at the end of the line let mut line = line_result?; - line.push(sep); + // add separator back in at the end of the line, + // since `reader.split(sep)` removes it, + // except if the last line did not end with separator character + if (num_bytes_written + line.len() as u64) < num_bytes { + line.push(sep); + } let bytes = line.as_slice(); match kth_chunk { @@ -1627,6 +1639,8 @@ where } } } + let num_line_bytes = bytes.len() as u64; + num_bytes_written += num_line_bytes; } Ok(()) From 7b2a3e236e86786f35e3c9314cb1e8916ad37247 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Fri, 24 Nov 2023 20:56:05 -0500 Subject: [PATCH 068/429] split: r-chunk tests and infinite input --- src/uu/split/src/split.rs | 66 ++++++++++++------------------------- tests/by-util/test_split.rs | 26 +++++++++++++-- 2 files changed, 44 insertions(+), 48 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 932013ad9b3..a837bcb21ec 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -622,7 +622,7 @@ fn custom_write_all( /// Get the size of the input file in bytes /// Used only for subset of `--number=CHUNKS` strategy, as there is a need -/// to determine input file size upfront in order to know chunk size +/// to determine input file size upfront in order to estimate the chunk size /// to be written into each of N files/chunks: /// * N split into N files based on size of input /// * K/N output Kth of N to stdout @@ -1141,23 +1141,6 @@ struct OutFile { is_new: bool, } -// impl OutFile { -// /// Get the writer for the output file. -// /// Instantiate the writer if it has not been instantiated upfront -// /// or temporarily closed to free up system resources -// fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter>> { -// if self.maybe_writer.is_some() { -// Ok(self.maybe_writer.as_mut().unwrap()) -// } else { -// // Writer was not instantiated upfront or was temporarily closed due to system resources constraints. -// // Instantiate it and record for future use. -// self.maybe_writer = -// Some(settings.instantiate_current_writer(self.filename.as_str(), self.is_new)?); -// Ok(self.maybe_writer.as_mut().unwrap()) -// } -// } -// } - /// A set of output files /// Used in [`n_chunks_by_byte`], [`n_chunks_by_line`] /// and [`n_chunks_by_line_round_robin`] functions. @@ -1551,7 +1534,11 @@ where } /// Split a file or STDIN into a specific number of chunks by line, but -/// assign lines via round-robin +/// assign lines via round-robin. +/// Note: There is no need to know the size of the input upfront for this method, +/// since the lines are assigned to chunks randomly and the size of each chunk +/// does not need to be estimated. As a result, "infinite" inputs are supported +/// for this method, i.e. `yes | split -n r/10` or `yes | split -n r/3/11` /// /// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` /// @@ -1584,12 +1571,6 @@ fn n_chunks_by_line_round_robin( where R: BufRead, { - // Get the size of the input in bytes and compute the number - // of bytes per chunk. - let initial_buf = &mut Vec::new(); - let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; - let reader = initial_buf.chain(reader); - // In Kth chunk of N mode - we will write to stdout instead of to a file. let mut stdout_writer = std::io::stdout().lock(); // In N chunks mode - we will write to `num_chunks` files @@ -1606,23 +1587,20 @@ where let num_chunks: usize = num_chunks.try_into().unwrap(); let sep = settings.separator; let mut closed_writers = 0; - let mut num_bytes_written = 0; - for (i, line_result) in reader.split(sep).enumerate() { - let mut line = line_result?; - // add separator back in at the end of the line, - // since `reader.split(sep)` removes it, - // except if the last line did not end with separator character - if (num_bytes_written + line.len() as u64) < num_bytes { - line.push(sep); - } - let bytes = line.as_slice(); + let mut i = 0; + loop { + let line = &mut Vec::new(); + let num_bytes_read = reader.by_ref().read_until(sep, line)?; + // if there is nothing else to read - exit the loop + if num_bytes_read == 0 { + break; + }; + + let bytes = line.as_slice(); match kth_chunk { Some(chunk_number) => { - // The `.enumerate()` method returns index `i` starting with 0, - // but chunk number is given as a 1-indexed number, - // so compare to `chunk_number - 1` if (i % num_chunks) == (chunk_number - 1) as usize { stdout_writer.write_all(bytes)?; } @@ -1632,17 +1610,15 @@ where let writer_stdin_open = custom_write_all(bytes, writer, settings)?; if !writer_stdin_open { closed_writers += 1; - if closed_writers == num_chunks { - // all writers are closed - stop reading - break; - } } } } - let num_line_bytes = bytes.len() as u64; - num_bytes_written += num_line_bytes; + i += 1; + if closed_writers == num_chunks { + // all writers are closed - stop reading + break; + } } - Ok(()) } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 2c9a56bddee..acb8ab56140 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,11 +2,13 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc rlimit NOFILE use crate::common::util::{AtPath, TestScenario}; use rand::{thread_rng, Rng, SeedableRng}; use regex::Regex; +#[cfg(any(target_os = "linux", target_os = "android"))] +use rlimit::Resource; #[cfg(not(windows))] use std::env; use std::path::Path; @@ -1250,10 +1252,19 @@ fn test_number_by_lines_kth_no_end_sep() { .succeeds() .stdout_only("2222\n"); new_ucmd!() - .args(&["-e", "-n", "l/8/10"]) + .args(&["-e", "-n", "l/2/2"]) .pipe_in("1\n2222\n3\n4") .succeeds() - .stdout_only("3\n"); + .stdout_only("3\n4"); +} + +#[test] +fn test_number_by_lines_rr_kth_no_end_sep() { + new_ucmd!() + .args(&["-n", "r/2/3"]) + .pipe_in("1\n2\n3\n4\n5") + .succeeds() + .stdout_only("2\n5"); } #[test] @@ -1626,6 +1637,15 @@ fn test_round_robin() { assert_eq!(at.read("xab"), "2\n4\n"); } +#[test] +#[cfg(any(target_os = "linux", target_os = "android"))] +fn test_round_robin_limited_file_descriptors() { + new_ucmd!() + .args(&["-n", "r/40", "onehundredlines.txt"]) + .limit(Resource::NOFILE, 9, 9) + .succeeds(); +} + #[test] fn test_split_invalid_input() { // Test if stdout/stderr for '--lines' option is correct From f8f63461264208ccd26842ec02f61f120c751072 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Sat, 25 Nov 2023 18:16:34 -0500 Subject: [PATCH 069/429] wc: fix FilesDisabled error message --- src/uu/wc/src/wc.rs | 12 ++++++++---- tests/by-util/test_wc.rs | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 663bbda151b..ae9b24f5d84 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -167,7 +167,7 @@ impl<'a> Inputs<'a> { None => Ok(Self::Files0From(input)), } } - (Some(_), Some(_)) => Err(WcError::FilesDisabled.into()), + (Some(mut files), Some(_)) => Err(WcError::files_disabled(files.next()).into()), } } @@ -342,8 +342,8 @@ impl TotalWhen { #[derive(Debug, Error)] enum WcError { - #[error("file operands cannot be combined with --files0-from")] - FilesDisabled, + #[error("extra operand '{extra}'\nfile operands cannot be combined with --files0-from")] + FilesDisabled { extra: Cow<'static, str> }, #[error("when reading file names from stdin, no file name of '-' allowed")] StdinReprNotAllowed, #[error("invalid zero-length file name")] @@ -365,11 +365,15 @@ impl WcError { None => Self::ZeroLengthFileName, } } + fn files_disabled(first_extra: Option<&OsString>) -> Self { + let extra = first_extra.unwrap().to_string_lossy().into_owned().into(); + Self::FilesDisabled { extra } + } } impl UError for WcError { fn usage(&self) -> bool { - matches!(self, Self::FilesDisabled) + matches!(self, Self::FilesDisabled { .. }) } } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 6417470c57f..8358a542a23 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -423,7 +423,8 @@ fn test_files_from_pseudo_filesystem() { #[test] fn test_files0_disabled_files_argument() { - const MSG: &str = "file operands cannot be combined with --files0-from"; + const MSG: &str = + "extra operand 'lorem_ipsum.txt'\nfile operands cannot be combined with --files0-from"; new_ucmd!() .args(&["--files0-from=files0_list.txt"]) .arg("lorem_ipsum.txt") From 04e568db7354347a103f88e6a1a0c34c74d54128 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:47:15 +0000 Subject: [PATCH 070/429] chore(deps): update davidanson/markdownlint-cli2-action action to v14 --- .github/workflows/CICD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 66ee23168b8..8dfa0b1d103 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -133,7 +133,7 @@ jobs: shell: bash run: | RUSTDOCFLAGS="-Dwarnings" cargo doc ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-deps --workspace --document-private-items - - uses: DavidAnson/markdownlint-cli2-action@v13 + - uses: DavidAnson/markdownlint-cli2-action@v14 with: fix: "true" globs: | From b6553edfb7ecefe54aecc21bc130e878852c9ba5 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Sat, 25 Nov 2023 17:35:48 +0000 Subject: [PATCH 071/429] hashsum: Remove usage of crash! macro --- src/uu/hashsum/src/hashsum.rs | 204 +++++++++++++++++----------------- 1 file changed, 105 insertions(+), 99 deletions(-) diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index d27b09b98ed..22ef4152070 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -20,12 +20,13 @@ use std::io::{self, stdin, BufRead, BufReader, Read}; use std::iter; use std::num::ParseIntError; use std::path::Path; +use uucore::error::USimpleError; use uucore::error::{FromIo, UError, UResult}; use uucore::sum::{ Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256, }; -use uucore::{crash, display::Quotable, show_warning}; +use uucore::{display::Quotable, show_warning}; use uucore::{format_usage, help_about, help_usage}; const NAME: &str = "hashsum"; @@ -51,28 +52,32 @@ struct Options { /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. -/// -/// # Panics -/// -/// Panics if the length is not a multiple of 8 or if it is greater than 512. -fn create_blake2b(matches: &ArgMatches) -> (&'static str, Box, usize) { +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits or an Err if the length is not a multiple of 8 or if it is +/// greater than 512. +fn create_blake2b(matches: &ArgMatches) -> UResult<(&'static str, Box, usize)> { match matches.get_one::("length") { - Some(0) | None => ("BLAKE2", Box::new(Blake2b::new()) as Box, 512), + Some(0) | None => Ok(("BLAKE2", Box::new(Blake2b::new()) as Box, 512)), Some(length_in_bits) => { if *length_in_bits > 512 { - crash!(1, "Invalid length (maximum digest length is 512 bits)") + return Err(USimpleError::new( + 1, + "Invalid length (maximum digest length is 512 bits)", + )); } if length_in_bits % 8 == 0 { let length_in_bytes = length_in_bits / 8; - ( + Ok(( "BLAKE2", Box::new(Blake2b::with_output_bytes(length_in_bytes)), *length_in_bits, - ) + )) } else { - crash!(1, "Invalid length (expected a multiple of 8)") + Err(USimpleError::new( + 1, + "Invalid length (expected a multiple of 8)", + )) } } } @@ -82,38 +87,36 @@ fn create_blake2b(matches: &ArgMatches) -> (&'static str, Box, usize /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. -/// -/// # Panics -/// -/// Panics if an unsupported output size is provided, or if the `--bits` flag is missing. -fn create_sha3(matches: &ArgMatches) -> (&'static str, Box, usize) { +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits or an Err if an unsupported output size is provided, or if +/// the `--bits` flag is missing. +fn create_sha3(matches: &ArgMatches) -> UResult<(&'static str, Box, usize)> { match matches.get_one::("bits") { - Some(224) => ( + Some(224) => Ok(( "SHA3-224", Box::new(Sha3_224::new()) as Box, 224, - ), - Some(256) => ( + )), + Some(256) => Ok(( "SHA3-256", Box::new(Sha3_256::new()) as Box, 256, - ), - Some(384) => ( + )), + Some(384) => Ok(( "SHA3-384", Box::new(Sha3_384::new()) as Box, 384, - ), - Some(512) => ( + )), + Some(512) => Ok(( "SHA3-512", Box::new(Sha3_512::new()) as Box, 512, - ), - Some(_) => crash!( + )), + Some(_) => Err(USimpleError::new( 1, - "Invalid output size for SHA3 (expected 224, 256, 384, or 512)" - ), - None => crash!(1, "--bits required for SHA3"), + "Invalid output size for SHA3 (expected 224, 256, 384, or 512)", + )), + None => Err(USimpleError::new(1, "--bits required for SHA3")), } } @@ -121,19 +124,16 @@ fn create_sha3(matches: &ArgMatches) -> (&'static str, Box, usize) { /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. -/// -/// # Panics -/// -/// Panics if the `--bits` flag is missing. -fn create_shake128(matches: &ArgMatches) -> (&'static str, Box, usize) { +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits, or an Err if `--bits` flag is missing. +fn create_shake128(matches: &ArgMatches) -> UResult<(&'static str, Box, usize)> { match matches.get_one::("bits") { - Some(bits) => ( + Some(bits) => Ok(( "SHAKE128", Box::new(Shake128::new()) as Box, *bits, - ), - None => crash!(1, "--bits required for SHAKE-128"), + )), + None => Err(USimpleError::new(1, "--bits required for SHAKE-128")), } } @@ -141,19 +141,16 @@ fn create_shake128(matches: &ArgMatches) -> (&'static str, Box, usiz /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. -/// -/// # Panics -/// -/// Panics if the `--bits` flag is missing. -fn create_shake256(matches: &ArgMatches) -> (&'static str, Box, usize) { +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits, or an Err if the `--bits` flag is missing. +fn create_shake256(matches: &ArgMatches) -> UResult<(&'static str, Box, usize)> { match matches.get_one::("bits") { - Some(bits) => ( + Some(bits) => Ok(( "SHAKE256", Box::new(Shake256::new()) as Box, *bits, - ), - None => crash!(1, "--bits required for SHAKE-256"), + )), + None => Err(USimpleError::new(1, "--bits required for SHAKE-256")), } } @@ -166,46 +163,46 @@ fn create_shake256(matches: &ArgMatches) -> (&'static str, Box, usiz /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits, or an Err if a matching algorithm is not found. fn detect_algo( program: &str, matches: &ArgMatches, -) -> (&'static str, Box, usize) { - let (name, alg, output_bits) = match program { - "md5sum" => ("MD5", Box::new(Md5::new()) as Box, 128), - "sha1sum" => ("SHA1", Box::new(Sha1::new()) as Box, 160), - "sha224sum" => ("SHA224", Box::new(Sha224::new()) as Box, 224), - "sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box, 256), - "sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box, 384), - "sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box, 512), +) -> UResult<(&'static str, Box, usize)> { + match program { + "md5sum" => Ok(("MD5", Box::new(Md5::new()) as Box, 128)), + "sha1sum" => Ok(("SHA1", Box::new(Sha1::new()) as Box, 160)), + "sha224sum" => Ok(("SHA224", Box::new(Sha224::new()) as Box, 224)), + "sha256sum" => Ok(("SHA256", Box::new(Sha256::new()) as Box, 256)), + "sha384sum" => Ok(("SHA384", Box::new(Sha384::new()) as Box, 384)), + "sha512sum" => Ok(("SHA512", Box::new(Sha512::new()) as Box, 512)), "b2sum" => create_blake2b(matches), - "b3sum" => ("BLAKE3", Box::new(Blake3::new()) as Box, 256), + "b3sum" => Ok(("BLAKE3", Box::new(Blake3::new()) as Box, 256)), "sha3sum" => create_sha3(matches), - "sha3-224sum" => ( + "sha3-224sum" => Ok(( "SHA3-224", Box::new(Sha3_224::new()) as Box, 224, - ), - "sha3-256sum" => ( + )), + "sha3-256sum" => Ok(( "SHA3-256", Box::new(Sha3_256::new()) as Box, 256, - ), - "sha3-384sum" => ( + )), + "sha3-384sum" => Ok(( "SHA3-384", Box::new(Sha3_384::new()) as Box, 384, - ), - "sha3-512sum" => ( + )), + "sha3-512sum" => Ok(( "SHA3-512", Box::new(Sha3_512::new()) as Box, 512, - ), + )), "shake128sum" => create_shake128(matches), "shake256sum" => create_shake256(matches), _ => create_algorithm_from_flags(matches), - }; - (name, alg, output_bits) + } } /// Creates a hasher instance based on the command-line flags. @@ -216,80 +213,89 @@ fn detect_algo( /// /// # Returns /// -/// Returns a tuple containing the algorithm name, the hasher instance, and the output length in bits. -/// -/// # Panics -/// -/// Panics if multiple hash algorithms are specified or if a required flag is missing. +/// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and +/// the output length in bits or an Err if multiple hash algorithms are specified or if a +/// required flag is missing. #[allow(clippy::cognitive_complexity)] -fn create_algorithm_from_flags(matches: &ArgMatches) -> (&'static str, Box, usize) { +fn create_algorithm_from_flags( + matches: &ArgMatches, +) -> UResult<(&'static str, Box, usize)> { let mut alg: Option> = None; let mut name: &'static str = ""; let mut output_bits = 0; - let mut set_or_crash = |n, val, bits| { + let mut set_or_err = |n, val, bits| { if alg.is_some() { - crash!(1, "You cannot combine multiple hash algorithms!"); + return Err(USimpleError::new( + 1, + "You cannot combine multiple hash algorithms!", + )); }; name = n; alg = Some(val); output_bits = bits; + + Ok(()) }; if matches.get_flag("md5") { - set_or_crash("MD5", Box::new(Md5::new()), 128); + set_or_err("MD5", Box::new(Md5::new()), 128)?; } if matches.get_flag("sha1") { - set_or_crash("SHA1", Box::new(Sha1::new()), 160); + set_or_err("SHA1", Box::new(Sha1::new()), 160)?; } if matches.get_flag("sha224") { - set_or_crash("SHA224", Box::new(Sha224::new()), 224); + set_or_err("SHA224", Box::new(Sha224::new()), 224)?; } if matches.get_flag("sha256") { - set_or_crash("SHA256", Box::new(Sha256::new()), 256); + set_or_err("SHA256", Box::new(Sha256::new()), 256)?; } if matches.get_flag("sha384") { - set_or_crash("SHA384", Box::new(Sha384::new()), 384); + set_or_err("SHA384", Box::new(Sha384::new()), 384)?; } if matches.get_flag("sha512") { - set_or_crash("SHA512", Box::new(Sha512::new()), 512); + set_or_err("SHA512", Box::new(Sha512::new()), 512)?; } if matches.get_flag("b2sum") { - set_or_crash("BLAKE2", Box::new(Blake2b::new()), 512); + set_or_err("BLAKE2", Box::new(Blake2b::new()), 512)?; } if matches.get_flag("b3sum") { - set_or_crash("BLAKE3", Box::new(Blake3::new()), 256); + set_or_err("BLAKE3", Box::new(Blake3::new()), 256)?; } if matches.get_flag("sha3") { - let (n, val, bits) = create_sha3(matches); - set_or_crash(n, val, bits); + let (n, val, bits) = create_sha3(matches)?; + set_or_err(n, val, bits)?; } if matches.get_flag("sha3-224") { - set_or_crash("SHA3-224", Box::new(Sha3_224::new()), 224); + set_or_err("SHA3-224", Box::new(Sha3_224::new()), 224)?; } if matches.get_flag("sha3-256") { - set_or_crash("SHA3-256", Box::new(Sha3_256::new()), 256); + set_or_err("SHA3-256", Box::new(Sha3_256::new()), 256)?; } if matches.get_flag("sha3-384") { - set_or_crash("SHA3-384", Box::new(Sha3_384::new()), 384); + set_or_err("SHA3-384", Box::new(Sha3_384::new()), 384)?; } if matches.get_flag("sha3-512") { - set_or_crash("SHA3-512", Box::new(Sha3_512::new()), 512); + set_or_err("SHA3-512", Box::new(Sha3_512::new()), 512)?; } if matches.get_flag("shake128") { match matches.get_one::("bits") { - Some(bits) => set_or_crash("SHAKE128", Box::new(Shake128::new()), *bits), - None => crash!(1, "--bits required for SHAKE-128"), - } + Some(bits) => set_or_err("SHAKE128", Box::new(Shake128::new()), *bits)?, + None => return Err(USimpleError::new(1, "--bits required for SHAKE-128")), + }; } if matches.get_flag("shake256") { match matches.get_one::("bits") { - Some(bits) => set_or_crash("SHAKE256", Box::new(Shake256::new()), *bits), - None => crash!(1, "--bits required for SHAKE-256"), - } + Some(bits) => set_or_err("SHAKE256", Box::new(Shake256::new()), *bits)?, + None => return Err(USimpleError::new(1, "--bits required for SHAKE-256")), + }; } - let alg = alg.unwrap_or_else(|| crash!(1, "You must specify hash algorithm!")); - (name, alg, output_bits) + let alg = match alg { + Some(a) => a, + None => return Err(USimpleError::new(1, "You must specify hash algorithm!")), + }; + + Ok((name, alg, output_bits)) } // TODO: return custom error type @@ -319,7 +325,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { // least somewhat better from a user's perspective. let matches = command.try_get_matches_from(args)?; - let (name, algo, bits) = detect_algo(&binary_name, &matches); + let (name, algo, bits) = detect_algo(&binary_name, &matches)?; let binary = if matches.get_flag("binary") { true From 16f6a136971f79293cde5b615453eca237d9148c Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Sun, 26 Nov 2023 15:14:48 -0500 Subject: [PATCH 072/429] wc: change where to unwrap --- src/uu/wc/src/wc.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index ae9b24f5d84..83081124d1b 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -167,7 +167,9 @@ impl<'a> Inputs<'a> { None => Ok(Self::Files0From(input)), } } - (Some(mut files), Some(_)) => Err(WcError::files_disabled(files.next()).into()), + (Some(mut files), Some(_)) => { + Err(WcError::files_disabled(files.next().unwrap()).into()) + } } } @@ -365,8 +367,8 @@ impl WcError { None => Self::ZeroLengthFileName, } } - fn files_disabled(first_extra: Option<&OsString>) -> Self { - let extra = first_extra.unwrap().to_string_lossy().into_owned().into(); + fn files_disabled(first_extra: &OsString) -> Self { + let extra = first_extra.to_string_lossy().into_owned().into(); Self::FilesDisabled { extra } } } From 016ae34d50e2e6e5ec50fca6fc88ad257a4758a2 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 12 Mar 2023 19:02:48 -0400 Subject: [PATCH 073/429] dd: add Settings.buffered field Add the `Settings.buffered` field to indicate whether partial output blocks should be buffered until they are complete. --- src/uu/dd/src/dd.rs | 8 +++ src/uu/dd/src/parseargs.rs | 74 +++++++++++++++------------ src/uu/dd/src/parseargs/unit_tests.rs | 1 + 3 files changed, 49 insertions(+), 34 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index b79ae22da4e..7d9138791cf 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -76,6 +76,8 @@ struct Settings { oconv: OConvFlags, oflags: OFlags, status: Option, + /// Whether the output writer should buffer partial blocks until complete. + buffered: bool, } /// A timer which triggers on a given interval @@ -128,6 +130,12 @@ enum Num { Bytes(u64), } +impl Default for Num { + fn default() -> Self { + Self::Blocks(0) + } +} + impl Num { fn force_bytes_if(self, force: bool) -> Self { match self { diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 0ff6e752c02..60ce9a6971f 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -35,41 +35,28 @@ pub enum ParseError { } /// Contains a temporary state during parsing of the arguments -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Default)] pub struct Parser { infile: Option, outfile: Option, - ibs: usize, - obs: usize, + /// The block size option specified on the command-line, if any. + bs: Option, + /// The input block size option specified on the command-line, if any. + ibs: Option, + /// The output block size option specified on the command-line, if any. + obs: Option, cbs: Option, skip: Num, seek: Num, count: Option, conv: ConvFlags, + /// Whether a data-transforming `conv` option has been specified. + is_conv_specified: bool, iflag: IFlags, oflag: OFlags, status: Option, } -impl Default for Parser { - fn default() -> Self { - Self { - ibs: 512, - obs: 512, - cbs: None, - infile: None, - outfile: None, - skip: Num::Blocks(0), - seek: Num::Blocks(0), - count: None, - conv: ConvFlags::default(), - iflag: IFlags::default(), - oflag: OFlags::default(), - status: None, - } - } -} - #[derive(Debug, Default, PartialEq, Eq)] pub struct ConvFlags { ascii: bool, @@ -212,15 +199,34 @@ impl Parser { fsync: conv.fsync, }; + // Input and output block sizes. + // + // The `bs` option takes precedence. If either is not + // provided, `ibs` and `obs` are each 512 bytes by default. + let (ibs, obs) = match self.bs { + None => (self.ibs.unwrap_or(512), self.obs.unwrap_or(512)), + Some(bs) => (bs, bs), + }; + + // Whether to buffer partial output blocks until they are completed. + // + // From the GNU `dd` documentation for the `bs=BYTES` option: + // + // > [...] if no data-transforming 'conv' option is specified, + // > input is copied to the output as soon as it's read, even if + // > it is smaller than the block size. + // + let buffered = self.bs.is_none() || self.is_conv_specified; + let skip = self .skip .force_bytes_if(self.iflag.skip_bytes) - .to_bytes(self.ibs as u64); + .to_bytes(ibs as u64); let seek = self .seek .force_bytes_if(self.oflag.seek_bytes) - .to_bytes(self.obs as u64); + .to_bytes(obs as u64); let count = self.count.map(|c| c.force_bytes_if(self.iflag.count_bytes)); @@ -230,8 +236,9 @@ impl Parser { count, iconv, oconv, - ibs: self.ibs, - obs: self.obs, + ibs, + obs, + buffered, infile: self.infile, outfile: self.outfile, iflags: self.iflag, @@ -244,18 +251,17 @@ impl Parser { match operand.split_once('=') { None => return Err(ParseError::UnrecognizedOperand(operand.to_string())), Some((k, v)) => match k { - "bs" => { - let bs = Self::parse_bytes(k, v)?; - self.ibs = bs; - self.obs = bs; - } + "bs" => self.bs = Some(Self::parse_bytes(k, v)?), "cbs" => self.cbs = Some(Self::parse_bytes(k, v)?), - "conv" => self.parse_conv_flags(v)?, + "conv" => { + self.is_conv_specified = true; + self.parse_conv_flags(v)?; + } "count" => self.count = Some(Self::parse_n(v)?), - "ibs" => self.ibs = Self::parse_bytes(k, v)?, + "ibs" => self.ibs = Some(Self::parse_bytes(k, v)?), "if" => self.infile = Some(v.to_string()), "iflag" => self.parse_input_flags(v)?, - "obs" => self.obs = Self::parse_bytes(k, v)?, + "obs" => self.obs = Some(Self::parse_bytes(k, v)?), "of" => self.outfile = Some(v.to_string()), "oflag" => self.parse_output_flags(v)?, "seek" | "oseek" => self.seek = Self::parse_n(v)?, diff --git a/src/uu/dd/src/parseargs/unit_tests.rs b/src/uu/dd/src/parseargs/unit_tests.rs index 142e49fd0ba..51b0933e926 100644 --- a/src/uu/dd/src/parseargs/unit_tests.rs +++ b/src/uu/dd/src/parseargs/unit_tests.rs @@ -358,6 +358,7 @@ fn parse_icf_tokens_remaining() { fsync: true, ..Default::default() }, + is_conv_specified: true, ..Default::default() }) ); From 5142f35f8395130dd331cc5756a0cd0466a1e74c Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 17 Mar 2023 20:55:49 -0400 Subject: [PATCH 074/429] dd: add BufferedOutput to buffer partial blocks --- src/uu/dd/src/bufferedoutput.rs | 201 ++++++++++++++++++++++++++++++++ src/uu/dd/src/dd.rs | 19 ++- 2 files changed, 209 insertions(+), 11 deletions(-) create mode 100644 src/uu/dd/src/bufferedoutput.rs diff --git a/src/uu/dd/src/bufferedoutput.rs b/src/uu/dd/src/bufferedoutput.rs new file mode 100644 index 00000000000..1735ae10d80 --- /dev/null +++ b/src/uu/dd/src/bufferedoutput.rs @@ -0,0 +1,201 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// +// spell-checker:ignore wstat towrite cdefg bufferedoutput +//! Buffer partial output blocks until they are completed. +//! +//! Use the [`BufferedOutput`] struct to create a buffered form of the +//! [`Output`] writer. +use crate::{Output, WriteStat}; + +/// Buffer partial output blocks until they are completed. +/// +/// Complete blocks are written immediately to the inner [`Output`], +/// but partial blocks are stored in an internal buffer until they are +/// completed. +pub(crate) struct BufferedOutput<'a> { + /// The unbuffered inner block writer. + inner: Output<'a>, + + /// The internal buffer that stores a partial block. + /// + /// The size of this buffer is always less than the output block + /// size (that is, the value of the `obs` command-line option). + buf: Vec, +} + +impl<'a> BufferedOutput<'a> { + /// Add partial block buffering to the given block writer. + /// + /// The internal buffer size is at most the value of `obs` as + /// defined in `inner`. + pub(crate) fn new(inner: Output<'a>) -> Self { + let obs = inner.settings.obs; + Self { + inner, + buf: Vec::with_capacity(obs), + } + } + + pub(crate) fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + self.inner.discard_cache(offset, len); + } + + /// Flush the partial block stored in the internal buffer. + pub(crate) fn flush(&mut self) -> std::io::Result { + let wstat = self.inner.write_blocks(&self.buf)?; + let n = wstat.bytes_total; + for _ in 0..n { + self.buf.remove(0); + } + Ok(wstat) + } + + /// Synchronize the inner block writer. + pub(crate) fn sync(&mut self) -> std::io::Result<()> { + self.inner.sync() + } + + /// Truncate the underlying file to the current stream position, if possible. + pub(crate) fn truncate(&mut self) -> std::io::Result<()> { + self.inner.dst.truncate() + } + + /// Write the given bytes one block at a time. + /// + /// Only complete blocks will be written. Partial blocks will be + /// buffered until enough bytes have been provided to complete a + /// block. The returned [`WriteStat`] object will include the + /// number of blocks written during execution of this function. + pub(crate) fn write_blocks(&mut self, buf: &[u8]) -> std::io::Result { + // Concatenate the old partial block with the new incoming bytes. + let towrite = [&self.buf, buf].concat(); + + // Write all complete blocks to the inner block writer. + // + // For example, if the output block size were 3, the buffered + // partial block were `b"ab"` and the new incoming bytes were + // `b"cdefg"`, then we would write blocks `b"abc"` and + // b`"def"` to the inner block writer. + let n = towrite.len(); + let rem = n % self.inner.settings.obs; + let wstat = self.inner.write_blocks(&towrite[..n - rem])?; + self.buf.clear(); + + // Buffer any remaining bytes as a partial block. + // + // Continuing the example above, the last byte `b"g"` would be + // buffered as a partial block until the next call to + // `write_blocks()`. + for byte in &towrite[n - rem..] { + self.buf.push(*byte); + } + + Ok(wstat) + } +} + +#[cfg(unix)] +#[cfg(test)] +mod tests { + use crate::bufferedoutput::BufferedOutput; + use crate::{Dest, Output, Settings}; + + #[test] + fn test_buffered_output_write_blocks_empty() { + let settings = Settings { + obs: 3, + ..Default::default() + }; + let inner = Output { + dst: Dest::Sink, + settings: &settings, + }; + let mut output = BufferedOutput::new(inner); + let wstat = output.write_blocks(&[]).unwrap(); + assert_eq!(wstat.writes_complete, 0); + assert_eq!(wstat.writes_partial, 0); + assert_eq!(wstat.bytes_total, 0); + assert_eq!(output.buf, vec![]); + } + + #[test] + fn test_buffered_output_write_blocks_partial() { + let settings = Settings { + obs: 3, + ..Default::default() + }; + let inner = Output { + dst: Dest::Sink, + settings: &settings, + }; + let mut output = BufferedOutput::new(inner); + let wstat = output.write_blocks(b"ab").unwrap(); + assert_eq!(wstat.writes_complete, 0); + assert_eq!(wstat.writes_partial, 0); + assert_eq!(wstat.bytes_total, 0); + assert_eq!(output.buf, b"ab"); + } + + #[test] + fn test_buffered_output_write_blocks_complete() { + let settings = Settings { + obs: 3, + ..Default::default() + }; + let inner = Output { + dst: Dest::Sink, + settings: &settings, + }; + let mut output = BufferedOutput::new(inner); + let wstat = output.write_blocks(b"abcd").unwrap(); + assert_eq!(wstat.writes_complete, 1); + assert_eq!(wstat.writes_partial, 0); + assert_eq!(wstat.bytes_total, 3); + assert_eq!(output.buf, b"d"); + } + + #[test] + fn test_buffered_output_write_blocks_append() { + let settings = Settings { + obs: 3, + ..Default::default() + }; + let inner = Output { + dst: Dest::Sink, + settings: &settings, + }; + let mut output = BufferedOutput { + inner, + buf: b"ab".to_vec(), + }; + let wstat = output.write_blocks(b"cdefg").unwrap(); + assert_eq!(wstat.writes_complete, 2); + assert_eq!(wstat.writes_partial, 0); + assert_eq!(wstat.bytes_total, 6); + assert_eq!(output.buf, b"g"); + } + + #[test] + fn test_buffered_output_flush() { + let settings = Settings { + obs: 10, + ..Default::default() + }; + let inner = Output { + dst: Dest::Sink, + settings: &settings, + }; + let mut output = BufferedOutput { + inner, + buf: b"abc".to_vec(), + }; + let wstat = output.flush().unwrap(); + assert_eq!(wstat.writes_complete, 0); + assert_eq!(wstat.writes_partial, 1); + assert_eq!(wstat.bytes_total, 3); + assert_eq!(output.buf, vec![]); + } +} diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 7d9138791cf..9374ca0cd37 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -3,23 +3,20 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, ftype, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE +// spell-checker:ignore fname, ftype, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE bufferedoutput +mod blocks; +mod bufferedoutput; +mod conversion_tables; mod datastructures; -use datastructures::*; - +mod numbers; mod parseargs; -use parseargs::Parser; - -mod conversion_tables; - mod progress; -use progress::{gen_prog_updater, ProgUpdate, ReadStat, StatusLevel, WriteStat}; -mod blocks; use blocks::conv_block_unblock_helper; - -mod numbers; +use datastructures::*; +use parseargs::Parser; +use progress::{gen_prog_updater, ProgUpdate, ReadStat, StatusLevel, WriteStat}; use std::cmp; use std::env; From b383e609988a7f171643a387a81c512cab1257be Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 17 Mar 2023 22:41:10 -0400 Subject: [PATCH 075/429] dd: implement Add for WriteStat --- src/uu/dd/src/progress.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index 4fe04cb0e67..ac7517c2c0c 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -379,6 +379,17 @@ impl std::ops::AddAssign for WriteStat { } } +impl std::ops::Add for WriteStat { + type Output = Self; + fn add(self, other: Self) -> Self { + Self { + writes_complete: self.writes_complete + other.writes_complete, + writes_partial: self.writes_partial + other.writes_partial, + bytes_total: self.bytes_total + other.bytes_total, + } + } +} + /// How much detail to report when printing transfer statistics. /// /// This corresponds to the available settings of the `status` From f343b7e964091507e9373c94da4619c962c8d23c Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 17 Mar 2023 22:41:56 -0400 Subject: [PATCH 076/429] dd: use read statistics for termination condition Correct the behavior of `dd` so that the termination condition of the main loop uses the number of bytes read, not the number of bytes written, when the `count` command-line option is given in bytes instead of blocks. --- src/uu/dd/src/dd.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 9374ca0cd37..b760d98e022 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -901,7 +901,7 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // blocks to this output. Read/write statistics are updated on // each iteration and cumulative statistics are reported to // the progress reporting thread. - while below_count_limit(&i.settings.count, &rstat, &wstat) { + while below_count_limit(&i.settings.count, &rstat) { // Read a block from the input then write the block to the output. // // As an optimization, make an educated guess about the @@ -1108,16 +1108,10 @@ fn calc_loop_bsize( // Decide if the current progress is below a count=N limit or return // true if no such limit is set. -fn below_count_limit(count: &Option, rstat: &ReadStat, wstat: &WriteStat) -> bool { +fn below_count_limit(count: &Option, rstat: &ReadStat) -> bool { match count { - Some(Num::Blocks(n)) => { - let n = *n; - rstat.reads_complete + rstat.reads_partial <= n - } - Some(Num::Bytes(n)) => { - let n = (*n).try_into().unwrap(); - wstat.bytes_total <= n - } + Some(Num::Blocks(n)) => rstat.reads_complete + rstat.reads_partial < *n, + Some(Num::Bytes(n)) => rstat.bytes_total < *n, None => true, } } From ceccd2ecc61ed83d9c66ac55f82913723ca4d96e Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 17 Mar 2023 22:42:24 -0400 Subject: [PATCH 077/429] dd: buffer partial blocks in the output writer Add buffering of partial blocks in the output block writer until they are completed. --- src/uu/dd/src/bufferedoutput.rs | 31 +++++---- src/uu/dd/src/dd.rs | 115 +++++++++++++++++++++++++++----- tests/by-util/test_dd.rs | 78 +++++++++++++++++++++- 3 files changed, 194 insertions(+), 30 deletions(-) diff --git a/src/uu/dd/src/bufferedoutput.rs b/src/uu/dd/src/bufferedoutput.rs index 1735ae10d80..6ac3b430046 100644 --- a/src/uu/dd/src/bufferedoutput.rs +++ b/src/uu/dd/src/bufferedoutput.rs @@ -46,10 +46,8 @@ impl<'a> BufferedOutput<'a> { /// Flush the partial block stored in the internal buffer. pub(crate) fn flush(&mut self) -> std::io::Result { let wstat = self.inner.write_blocks(&self.buf)?; - let n = wstat.bytes_total; - for _ in 0..n { - self.buf.remove(0); - } + let n = wstat.bytes_total.try_into().unwrap(); + self.buf.drain(0..n); Ok(wstat) } @@ -70,8 +68,19 @@ impl<'a> BufferedOutput<'a> { /// block. The returned [`WriteStat`] object will include the /// number of blocks written during execution of this function. pub(crate) fn write_blocks(&mut self, buf: &[u8]) -> std::io::Result { - // Concatenate the old partial block with the new incoming bytes. - let towrite = [&self.buf, buf].concat(); + // Split the incoming buffer into two parts: the bytes to write + // and the bytes to buffer for next time. + // + // If `buf` does not include enough bytes to form a full block, + // just buffer the whole thing and write zero blocks. + let n = self.buf.len() + buf.len(); + let rem = n % self.inner.settings.obs; + let i = buf.len().saturating_sub(rem); + let (to_write, to_buffer) = buf.split_at(i); + + // Concatenate the old partial block with the new bytes to form + // some number of complete blocks. + self.buf.extend_from_slice(to_write); // Write all complete blocks to the inner block writer. // @@ -79,19 +88,15 @@ impl<'a> BufferedOutput<'a> { // partial block were `b"ab"` and the new incoming bytes were // `b"cdefg"`, then we would write blocks `b"abc"` and // b`"def"` to the inner block writer. - let n = towrite.len(); - let rem = n % self.inner.settings.obs; - let wstat = self.inner.write_blocks(&towrite[..n - rem])?; - self.buf.clear(); + let wstat = self.inner.write_blocks(&self.buf)?; // Buffer any remaining bytes as a partial block. // // Continuing the example above, the last byte `b"g"` would be // buffered as a partial block until the next call to // `write_blocks()`. - for byte in &towrite[n - rem..] { - self.buf.push(*byte); - } + self.buf.clear(); + self.buf.extend_from_slice(to_buffer); Ok(wstat) } diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index b760d98e022..645c2496766 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -13,6 +13,7 @@ mod numbers; mod parseargs; mod progress; +use crate::bufferedoutput::BufferedOutput; use blocks::conv_block_unblock_helper; use datastructures::*; use parseargs::Parser; @@ -801,6 +802,68 @@ impl<'a> Output<'a> { Ok(()) } } + + /// Truncate the underlying file to the current stream position, if possible. + fn truncate(&mut self) -> std::io::Result<()> { + self.dst.truncate() + } +} + +/// The block writer either with or without partial block buffering. +enum BlockWriter<'a> { + /// Block writer with partial block buffering. + /// + /// Partial blocks are buffered until completed. + Buffered(BufferedOutput<'a>), + + /// Block writer without partial block buffering. + /// + /// Partial blocks are written immediately. + Unbuffered(Output<'a>), +} + +impl<'a> BlockWriter<'a> { + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + match self { + Self::Unbuffered(o) => o.discard_cache(offset, len), + Self::Buffered(o) => o.discard_cache(offset, len), + } + } + + fn flush(&mut self) -> io::Result { + match self { + Self::Unbuffered(_) => Ok(WriteStat::default()), + Self::Buffered(o) => o.flush(), + } + } + + fn sync(&mut self) -> io::Result<()> { + match self { + Self::Unbuffered(o) => o.sync(), + Self::Buffered(o) => o.sync(), + } + } + + /// Truncate the file to the final cursor location. + fn truncate(&mut self) { + // Calling `set_len()` may result in an error (for example, + // when calling it on `/dev/null`), but we don't want to + // terminate the process when that happens. Instead, we + // suppress the error by calling `Result::ok()`. This matches + // the behavior of GNU `dd` when given the command-line + // argument `of=/dev/null`. + match self { + Self::Unbuffered(o) => o.truncate().ok(), + Self::Buffered(o) => o.truncate().ok(), + }; + } + + fn write_blocks(&mut self, buf: &[u8]) -> std::io::Result { + match self { + Self::Unbuffered(o) => o.write_blocks(buf), + Self::Buffered(o) => o.write_blocks(buf), + } + } } /// Copy the given input data to this output, consuming both. @@ -814,7 +877,7 @@ impl<'a> Output<'a> { /// /// If there is a problem reading from the input or writing to /// this output. -fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { +fn dd_copy(mut i: Input, o: Output) -> std::io::Result<()> { // The read and write statistics. // // These objects are counters, initialized to zero. After each @@ -851,6 +914,9 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { let (prog_tx, rx) = mpsc::channel(); let output_thread = thread::spawn(gen_prog_updater(rx, i.settings.status)); + // Whether to truncate the output file after all blocks have been written. + let truncate = !o.settings.oconv.notrunc; + // Optimization: if no blocks are to be written, then don't // bother allocating any buffers. if let Some(Num::Blocks(0) | Num::Bytes(0)) = i.settings.count { @@ -875,7 +941,15 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { let len = o.dst.len()?.try_into().unwrap(); o.discard_cache(offset, len); } - return finalize(&mut o, rstat, wstat, start, &prog_tx, output_thread); + return finalize( + BlockWriter::Unbuffered(o), + rstat, + wstat, + start, + &prog_tx, + output_thread, + truncate, + ); }; // Create a common buffer with a capacity of the block size. @@ -895,6 +969,16 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { let mut read_offset = 0; let mut write_offset = 0; + let input_nocache = i.settings.iflags.nocache; + let output_nocache = o.settings.oflags.nocache; + + // Add partial block buffering, if needed. + let mut o = if o.settings.buffered { + BlockWriter::Buffered(BufferedOutput::new(o)) + } else { + BlockWriter::Unbuffered(o) + }; + // The main read/write loop. // // Each iteration reads blocks from the input and writes @@ -919,7 +1003,7 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // // TODO Better error handling for overflowing `offset` and `len`. let read_len = rstat_update.bytes_total; - if i.settings.iflags.nocache { + if input_nocache { let offset = read_offset.try_into().unwrap(); let len = read_len.try_into().unwrap(); i.discard_cache(offset, len); @@ -931,7 +1015,7 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // // TODO Better error handling for overflowing `offset` and `len`. let write_len = wstat_update.bytes_total; - if o.settings.oflags.nocache { + if output_nocache { let offset = write_offset.try_into().unwrap(); let len = write_len.try_into().unwrap(); o.discard_cache(offset, len); @@ -951,34 +1035,33 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { prog_tx.send(prog_update).unwrap_or(()); } } - finalize(&mut o, rstat, wstat, start, &prog_tx, output_thread) + finalize(o, rstat, wstat, start, &prog_tx, output_thread, truncate) } /// Flush output, print final stats, and join with the progress thread. fn finalize( - output: &mut Output, + mut output: BlockWriter, rstat: ReadStat, wstat: WriteStat, start: Instant, prog_tx: &mpsc::Sender, output_thread: thread::JoinHandle, + truncate: bool, ) -> std::io::Result<()> { - // Flush the output, if configured to do so. + // Flush the output in case a partial write has been buffered but + // not yet written. + let wstat_update = output.flush()?; + + // Sync the output, if configured to do so. output.sync()?; // Truncate the file to the final cursor location. - // - // Calling `set_len()` may result in an error (for example, - // when calling it on `/dev/null`), but we don't want to - // terminate the process when that happens. Instead, we - // suppress the error by calling `Result::ok()`. This matches - // the behavior of GNU `dd` when given the command-line - // argument `of=/dev/null`. - if !output.settings.oconv.notrunc { - output.dst.truncate().ok(); + if truncate { + output.truncate(); } // Print the final read/write statistics. + let wstat = wstat + wstat_update; let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed(), true); prog_tx.send(prog_update).unwrap_or(()); // Wait for the output thread to finish diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index d5ac8dc801c..a4c70097c8b 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi nabcde nabcdefg abcdefg +// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi nabcde nabcdefg abcdefg fifoname #[cfg(unix)] use crate::common::util::run_ucmd_as_root_with_stdin_stdout; @@ -15,6 +15,8 @@ use regex::Regex; use std::fs::{File, OpenOptions}; use std::io::{BufReader, Read, Write}; use std::path::PathBuf; +#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +use std::process::{Command, Stdio}; #[cfg(not(windows))] use std::thread::sleep; #[cfg(not(windows))] @@ -1582,3 +1584,77 @@ fn test_seek_past_dev() { print!("TEST SKIPPED"); } } + +#[test] +#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +fn test_reading_partial_blocks_from_fifo() { + // Create the FIFO. + let ts = TestScenario::new(util_name!()); + let at = ts.fixtures.clone(); + at.mkfifo("fifo"); + let fifoname = at.plus_as_string("fifo"); + + // Start a `dd` process that reads from the fifo (so it will wait + // until the writer process starts). + let mut reader_command = Command::new(TESTS_BINARY); + let child = reader_command + .args(["dd", "ibs=3", "obs=3", &format!("if={}", fifoname)]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + // Start different processes to write to the FIFO, with a small + // pause in between. + let mut writer_command = Command::new("sh"); + writer_command + .args([ + "-c", + &format!("(printf \"ab\"; sleep 0.1; printf \"cd\") > {}", fifoname), + ]) + .spawn() + .unwrap(); + + let output = child.wait_with_output().unwrap(); + assert_eq!(output.stdout, b"abcd"); + let expected = b"0+2 records in\n1+1 records out\n4 bytes copied"; + assert!(output.stderr.starts_with(expected)); +} + +#[test] +#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +fn test_reading_partial_blocks_from_fifo_unbuffered() { + // Create the FIFO. + let ts = TestScenario::new(util_name!()); + let at = ts.fixtures.clone(); + at.mkfifo("fifo"); + let fifoname = at.plus_as_string("fifo"); + + // Start a `dd` process that reads from the fifo (so it will wait + // until the writer process starts). + // + // `bs=N` takes precedence over `ibs=N` and `obs=N`. + let mut reader_command = Command::new(TESTS_BINARY); + let child = reader_command + .args(["dd", "bs=3", "ibs=1", "obs=1", &format!("if={}", fifoname)]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + // Start different processes to write to the FIFO, with a small + // pause in between. + let mut writer_command = Command::new("sh"); + writer_command + .args([ + "-c", + &format!("(printf \"ab\"; sleep 0.1; printf \"cd\") > {}", fifoname), + ]) + .spawn() + .unwrap(); + + let output = child.wait_with_output().unwrap(); + assert_eq!(output.stdout, b"abcd"); + let expected = b"0+2 records in\n0+2 records out\n4 bytes copied"; + assert!(output.stderr.starts_with(expected)); +} From 8eb66ab7ea93de366165f45127206da04cdb342d Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 27 Nov 2023 11:50:55 +0100 Subject: [PATCH 078/429] printf: remove whitespace, remove redundant spelling ignore and revert matching on result --- src/uu/printf/src/printf.rs | 17 ++++++----------- src/uu/seq/src/number.rs | 2 +- src/uu/seq/src/numberparse.rs | 2 +- src/uu/seq/src/seq.rs | 2 +- .../src/lib/features/format/num_format.rs | 1 - 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index cfb0315cfb0..663411b8952 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -10,9 +10,9 @@ use std::io::stdout; use std::ops::ControlFlow; use clap::{crate_version, Arg, ArgAction, Command}; -use uucore::error::{UError, UResult, UUsageError}; +use uucore::error::{UResult, UUsageError}; use uucore::format::{parse_spec_and_escape, FormatArgument}; -use uucore::{format_usage, help_about, help_section, help_usage, show}; +use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; const HELP: &str = "help"; @@ -49,15 +49,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { while args.peek().is_some() { for item in parse_spec_and_escape(format_string.as_ref()) { - match item { - Ok(item) => { - match item.write(stdout(), &mut args)? { - ControlFlow::Continue(()) => {} - ControlFlow::Break(()) => return Ok(()), - }; - } - Err(e) => show!(e), - } + match item?.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => return Ok(()), + }; } } Ok(()) diff --git a/src/uu/seq/src/number.rs b/src/uu/seq/src/number.rs index 182431a9210..314c842ba15 100644 --- a/src/uu/seq/src/number.rs +++ b/src/uu/seq/src/number.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore extendedbigdecimal extendedbigint +// spell-checker:ignore extendedbigdecimal use num_traits::Zero; use crate::extendedbigdecimal::ExtendedBigDecimal; diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index a82d1e88776..df7c1f7d1dd 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore extendedbigdecimal extendedbigint bigdecimal numberparse +// spell-checker:ignore extendedbigdecimal bigdecimal numberparse //! Parsing numbers for use in `seq`. //! //! This module provides an implementation of [`FromStr`] for the diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 05338864545..33b7636edbc 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse +// spell-checker:ignore (ToDO) extendedbigdecimal numberparse use std::io::{stdout, ErrorKind, Write}; use clap::{crate_version, Arg, ArgAction, Command}; diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 6fd177d1325..51f3336cf61 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -28,7 +28,6 @@ pub enum UnsignedIntVariant { } #[derive(Clone, Copy, Debug)] - pub enum FloatVariant { Decimal, Scientific, From 83784b2d96bc4b68a67186ac57014822a14034d7 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 27 Nov 2023 11:37:39 +0100 Subject: [PATCH 079/429] expr: refactor AST and parsing --- src/uu/expr/src/expr.rs | 87 ++-- src/uu/expr/src/syntax_tree.rs | 924 ++++++++++++++++----------------- src/uu/expr/src/tokens.rs | 147 ------ 3 files changed, 494 insertions(+), 664 deletions(-) delete mode 100644 src/uu/expr/src/tokens.rs diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 909c4c37653..c271f0935fd 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -3,14 +3,19 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::fmt::Display; + use clap::{crate_version, Arg, ArgAction, Command}; +use syntax_tree::AstNode; use uucore::{ - error::{UResult, USimpleError, UUsageError}, + display::Quotable, + error::{UError, UResult}, format_usage, help_about, help_section, help_usage, }; +use crate::syntax_tree::is_truthy; + mod syntax_tree; -mod tokens; mod options { pub const VERSION: &str = "version"; @@ -18,6 +23,51 @@ mod options { pub const EXPRESSION: &str = "expression"; } +pub type ExprResult = Result; + +#[derive(Debug, PartialEq, Eq)] +pub enum ExprError { + UnexpectedArgument(String), + MissingArgument(String), + NonIntegerArgument, + MissingOperand, + DivisionByZero, + InvalidRegexExpression, + ExpectedClosingBraceAfter(String), +} + +impl Display for ExprError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnexpectedArgument(s) => { + write!(f, "syntax error: unexpected argument {}", s.quote()) + } + Self::MissingArgument(s) => { + write!(f, "syntax error: missing argument after {}", s.quote()) + } + Self::NonIntegerArgument => write!(f, "non-integer argument"), + Self::MissingOperand => write!(f, "missing operand"), + Self::DivisionByZero => write!(f, "division by zero"), + Self::InvalidRegexExpression => write!(f, "Invalid regex expression"), + Self::ExpectedClosingBraceAfter(s) => { + write!(f, "expected ')' after {}", s.quote()) + } + } + } +} + +impl std::error::Error for ExprError {} + +impl UError for ExprError { + fn code(&self) -> i32 { + 2 + } + + fn usage(&self) -> bool { + *self == Self::MissingOperand + } +} + pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(crate_version!()) @@ -53,36 +103,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // For expr utility we do not want getopts. // The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)` let matches = uu_app().try_get_matches_from(args)?; - let token_strings = matches + let token_strings: Vec<&str> = matches .get_many::(options::EXPRESSION) .map(|v| v.into_iter().map(|s| s.as_ref()).collect::>()) .unwrap_or_default(); - if token_strings.is_empty() { - return Err(UUsageError::new(2, "missing operand")); - } - - match process_expr(&token_strings[..]) { - Ok(expr_result) => print_expr_ok(&expr_result), - Err(expr_error) => Err(USimpleError::new(2, &expr_error)), + let res = AstNode::parse(&token_strings)?.eval()?; + println!("{res}"); + if !is_truthy(&res) { + return Err(1.into()); } -} - -fn process_expr(token_strings: &[&str]) -> Result { - let maybe_tokens = tokens::strings_to_tokens(token_strings); - let maybe_ast = syntax_tree::tokens_to_ast(maybe_tokens); - evaluate_ast(maybe_ast) -} - -fn print_expr_ok(expr_result: &str) -> UResult<()> { - println!("{expr_result}"); - if expr_result.parse::() == Ok(0) || expr_result.is_empty() { - Err(1.into()) - } else { - Ok(()) - } -} - -fn evaluate_ast(maybe_ast: Result, String>) -> Result { - maybe_ast.and_then(|ast| ast.evaluate()) + Ok(()) } diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 2260b2e2186..f81f1da1ec4 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -3,574 +3,522 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -//! -//! Here we employ shunting-yard algorithm for building AST from tokens according to operators' precedence and associative-ness. -//! * `` -//! - // spell-checker:ignore (ToDO) ints paren prec multibytes use num_bigint::BigInt; -use num_traits::Zero; use onig::{Regex, RegexOptions, Syntax}; -use uucore::display::Quotable; -use crate::tokens::Token; +use crate::{ExprError, ExprResult}; -type TokenStack = Vec<(usize, Token)>; -pub type OperandsList = Vec>; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinOp { + Relation(RelationOp), + Numeric(NumericOp), + String(StringOp), +} -#[derive(Debug)] -pub enum AstNode { - Leaf { - token_idx: usize, - value: String, - }, - Node { - token_idx: usize, - op_type: String, - operands: OperandsList, - }, +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelationOp { + Lt, + Leq, + Eq, + Neq, + Gt, + Geq, } -impl AstNode { - fn debug_dump(&self) { - self.debug_dump_impl(1); - } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum NumericOp { + Add, + Sub, + Mul, + Div, + Mod, +} - fn debug_dump_impl(&self, depth: usize) { - for _ in 0..depth { - print!("\t",); - } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StringOp { + Match, + Index, + And, + Or, +} + +impl BinOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { match self { - Self::Leaf { token_idx, value } => println!( - "Leaf( {} ) at #{} ( evaluate -> {:?} )", - value, - token_idx, - self.evaluate() - ), - Self::Node { - token_idx, - op_type, - operands, - } => { - println!( - "Node( {} ) at #{} ( evaluate -> {:?} )", - op_type, - token_idx, - self.evaluate() - ); - for operand in operands { - operand.debug_dump_impl(depth + 1); - } - } + Self::Relation(op) => op.eval(left, right), + Self::Numeric(op) => op.eval(left, right), + Self::String(op) => op.eval(left, right), } } +} - fn new_node(token_idx: usize, op_type: &str, operands: OperandsList) -> Box { - Box::new(Self::Node { - token_idx, - op_type: op_type.into(), - operands, - }) - } - - fn new_leaf(token_idx: usize, value: &str) -> Box { - Box::new(Self::Leaf { - token_idx, - value: value.into(), - }) +impl RelationOp { + fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { + let a = a.eval()?; + let b = b.eval()?; + let b = if let (Ok(a), Ok(b)) = (a.parse::(), b.parse::()) { + match self { + Self::Lt => a < b, + Self::Leq => a <= b, + Self::Eq => a == b, + Self::Neq => a != b, + Self::Gt => a > b, + Self::Geq => a >= b, + } + } else { + // These comparisons should be using locale settings + match self { + Self::Lt => a < b, + Self::Leq => a <= b, + Self::Eq => a == b, + Self::Neq => a != b, + Self::Gt => a > b, + Self::Geq => a >= b, + } + }; + if b { + Ok("1".into()) + } else { + Ok("0".into()) + } } +} - pub fn evaluate(&self) -> Result { - match self { - Self::Leaf { value, .. } => Ok(value.clone()), - Self::Node { op_type, .. } => match self.operand_values() { - Err(reason) => Err(reason), - Ok(operand_values) => match op_type.as_ref() { - "+" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a + b), &operand_values) - } - "-" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a - b), &operand_values) - } - "*" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a * b), &operand_values) - } - "/" => infix_operator_two_ints( - |a: BigInt, b: BigInt| { - if b.is_zero() { - Err("division by zero".to_owned()) - } else { - Ok(a / b) - } - }, - &operand_values, - ), - "%" => infix_operator_two_ints( - |a: BigInt, b: BigInt| { - if b.is_zero() { - Err("division by zero".to_owned()) - } else { - Ok(a % b) - } - }, - &operand_values, - ), - "=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a == b)), - |a: &String, b: &String| Ok(bool_as_string(a == b)), - &operand_values, - ), - "!=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a != b)), - |a: &String, b: &String| Ok(bool_as_string(a != b)), - &operand_values, - ), - "<" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a < b)), - |a: &String, b: &String| Ok(bool_as_string(a < b)), - &operand_values, - ), - ">" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a > b)), - |a: &String, b: &String| Ok(bool_as_string(a > b)), - &operand_values, - ), - "<=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a <= b)), - |a: &String, b: &String| Ok(bool_as_string(a <= b)), - &operand_values, - ), - ">=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a >= b)), - |a: &String, b: &String| Ok(bool_as_string(a >= b)), - &operand_values, - ), - "|" => Ok(infix_operator_or(&operand_values)), - "&" => Ok(infix_operator_and(&operand_values)), - ":" | "match" => operator_match(&operand_values), - "length" => Ok(prefix_operator_length(&operand_values)), - "index" => Ok(prefix_operator_index(&operand_values)), - "substr" => Ok(prefix_operator_substr(&operand_values)), - - _ => Err(format!("operation not implemented: {op_type}")), - }, +impl NumericOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + let a: BigInt = left + .eval()? + .parse() + .map_err(|_| ExprError::NonIntegerArgument)?; + let b: BigInt = right + .eval()? + .parse() + .map_err(|_| ExprError::NonIntegerArgument)?; + Ok(match self { + Self::Add => a + b, + Self::Sub => a - b, + Self::Mul => a * b, + Self::Div => match a.checked_div(&b) { + Some(x) => x, + None => return Err(ExprError::DivisionByZero), }, + Self::Mod => { + if a.checked_div(&b).is_none() { + return Err(ExprError::DivisionByZero); + }; + a % b + } } + .to_string()) } +} - pub fn operand_values(&self) -> Result, String> { - if let Self::Node { - operands, op_type, .. - } = self - { - let mut out = Vec::with_capacity(operands.len()); - let mut operands = operands.iter(); - - if let Some(value) = operands.next() { - let value = value.evaluate()?; - out.push(value.clone()); - // short-circuit evaluation for `|` and `&` - // push dummy to pass `assert!(values.len() == 2);` - match op_type.as_ref() { - "|" => { - if value_as_bool(&value) { - out.push(String::from("dummy")); - return Ok(out); - } - } - "&" => { - if !value_as_bool(&value) { - out.push(String::from("dummy")); - return Ok(out); +impl StringOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + match self { + Self::Or => { + let left = left.eval()?; + if is_truthy(&left) { + return Ok(left); + } + let right = right.eval()?; + if is_truthy(&right) { + return Ok(right); + } + Ok("0".into()) + } + Self::And => { + let left = left.eval()?; + if !is_truthy(&left) { + return Ok("0".into()); + } + let right = right.eval()?; + if !is_truthy(&right) { + return Ok("0".into()); + } + Ok(left) + } + Self::Match => { + let left = left.eval()?; + let right = right.eval()?; + let re_string = format!("^{}", &right); + let re = Regex::with_options( + &re_string, + RegexOptions::REGEX_OPTION_NONE, + Syntax::grep(), + ) + .map_err(|_| ExprError::InvalidRegexExpression)?; + Ok(if re.captures_len() > 0 { + re.captures(&left) + .map(|captures| captures.at(1).unwrap()) + .unwrap_or("") + .to_string() + } else { + re.find(&left) + .map_or("0".to_string(), |(start, end)| (end - start).to_string()) + }) + } + Self::Index => { + let left = left.eval()?; + let right = right.eval()?; + for (current_idx, ch_h) in left.chars().enumerate() { + for ch_n in right.chars() { + if ch_n == ch_h { + return Ok((current_idx + 1).to_string()); } } - _ => {} } + Ok("0".to_string()) } - - for operand in operands { - let value = operand.evaluate()?; - out.push(value); - } - Ok(out) - } else { - panic!("Invoked .operand_values(&self) not with ASTNode::Node") } } } -pub fn tokens_to_ast( - maybe_tokens: Result, String>, -) -> Result, String> { - maybe_tokens.and_then(|tokens| { - let mut out_stack: TokenStack = Vec::new(); - let mut op_stack: TokenStack = Vec::new(); - - for (token_idx, token) in tokens { - push_token_to_either_stack(token_idx, &token, &mut out_stack, &mut op_stack)?; - } - move_rest_of_ops_to_out(&mut out_stack, &mut op_stack)?; - assert!(op_stack.is_empty()); - - maybe_dump_rpn(&out_stack); - let result = ast_from_rpn(&mut out_stack); - if out_stack.is_empty() { - maybe_dump_ast(&result); - result - } else { - Err( - "syntax error (first RPN token does not represent the root of the expression AST)" - .to_owned(), - ) - } - }) +/// Precedence for infix binary operators +const PRECEDENCE: &[&[(&str, BinOp)]] = &[ + &[("|", BinOp::String(StringOp::Or))], + &[("&", BinOp::String(StringOp::And))], + &[ + ("<", BinOp::Relation(RelationOp::Lt)), + ("<=", BinOp::Relation(RelationOp::Leq)), + ("=", BinOp::Relation(RelationOp::Eq)), + ("!=", BinOp::Relation(RelationOp::Neq)), + (">=", BinOp::Relation(RelationOp::Geq)), + (">", BinOp::Relation(RelationOp::Gt)), + ], + &[ + ("+", BinOp::Numeric(NumericOp::Add)), + ("-", BinOp::Numeric(NumericOp::Sub)), + ], + &[ + ("*", BinOp::Numeric(NumericOp::Mul)), + ("/", BinOp::Numeric(NumericOp::Div)), + ("%", BinOp::Numeric(NumericOp::Mod)), + ], + &[(":", BinOp::String(StringOp::Match))], +]; + +#[derive(Debug, PartialEq, Eq)] +pub enum AstNode { + Leaf { + value: String, + }, + BinOp { + op_type: BinOp, + left: Box, + right: Box, + }, + Substr { + string: Box, + pos: Box, + length: Box, + }, + Length { + string: Box, + }, } -fn maybe_dump_ast(result: &Result, String>) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") { - if debug_var == "1" { - println!("EXPR_DEBUG_AST"); - match result { - Ok(ast) => ast.debug_dump(), - Err(reason) => println!("\terr: {reason:?}"), - } - } +impl AstNode { + pub fn parse(input: &[&str]) -> ExprResult { + Parser::new(input).parse() } -} -#[allow(clippy::ptr_arg)] -fn maybe_dump_rpn(rpn: &TokenStack) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_RPN") { - if debug_var == "1" { - println!("EXPR_DEBUG_RPN"); - for token in rpn { - println!("\t{token:?}"); + pub fn eval(&self) -> ExprResult { + match self { + Self::Leaf { value } => Ok(value.into()), + Self::BinOp { + op_type, + left, + right, + } => op_type.eval(left, right), + Self::Substr { + string, + pos, + length, + } => { + let string = string.eval()?; + + // The GNU docs say: + // + // > If either position or length is negative, zero, or + // > non-numeric, returns the null string. + // + // So we coerce errors into 0 to make that the only case we + // have to care about. + let pos: usize = pos.eval()?.parse().unwrap_or(0); + let length: usize = length.eval()?.parse().unwrap_or(0); + + let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { + return Ok(String::new()); + }; + + Ok(string.chars().skip(pos).take(length).collect()) } + Self::Length { string } => Ok(string.eval()?.chars().count().to_string()), } } } -fn ast_from_rpn(rpn: &mut TokenStack) -> Result, String> { - match rpn.pop() { - None => Err("syntax error (premature end of expression)".to_owned()), - Some((token_idx, Token::Value { value })) => Ok(AstNode::new_leaf(token_idx, &value)), +struct Parser<'a> { + input: &'a [&'a str], + index: usize, +} + +impl<'a> Parser<'a> { + fn new(input: &'a [&'a str]) -> Self { + Self { input, index: 0 } + } - Some((token_idx, Token::InfixOp { value, .. })) => { - maybe_ast_node(token_idx, &value, 2, rpn) + fn next(&mut self) -> ExprResult<&'a str> { + let next = self.input.get(self.index); + if let Some(next) = next { + self.index += 1; + Ok(next) + } else { + // The indexing won't panic, because we know that the input size + // is greater than zero. + Err(ExprError::MissingArgument( + self.input[self.index - 1].into(), + )) } + } - Some((token_idx, Token::PrefixOp { value, arity })) => { - maybe_ast_node(token_idx, &value, arity, rpn) + fn accept(&mut self, f: impl Fn(&str) -> Option) -> Option { + let next = self.input.get(self.index)?; + let tok = f(next); + if let Some(tok) = tok { + self.index += 1; + Some(tok) + } else { + None } + } - Some((token_idx, unexpected_token)) => { - panic!("unexpected token at #{token_idx} {unexpected_token:?}") + fn parse(&mut self) -> ExprResult { + if self.input.is_empty() { + return Err(ExprError::MissingOperand); + } + let res = self.parse_expression()?; + if let Some(arg) = self.input.get(self.index) { + return Err(ExprError::UnexpectedArgument(arg.to_string())); } + Ok(res) } -} -fn maybe_ast_node( - token_idx: usize, - op_type: &str, - arity: usize, - rpn: &mut TokenStack, -) -> Result, String> { - let mut operands = Vec::with_capacity(arity); - for _ in 0..arity { - let operand = ast_from_rpn(rpn)?; - operands.push(operand); + fn parse_expression(&mut self) -> ExprResult { + self.parse_precedence(0) } - operands.reverse(); - Ok(AstNode::new_node(token_idx, op_type, operands)) -} -fn move_rest_of_ops_to_out( - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - loop { - match op_stack.pop() { - None => return Ok(()), - Some((token_idx, Token::ParOpen)) => { - return Err(format!( - "syntax error (Mismatched open-parenthesis at #{token_idx})" - )) - } - Some((token_idx, Token::ParClose)) => { - return Err(format!( - "syntax error (Mismatched close-parenthesis at #{token_idx})" - )) + fn parse_op(&mut self, precedence: usize) -> Option { + self.accept(|s| { + for (op_string, op) in PRECEDENCE[precedence] { + if s == *op_string { + return Some(*op); + } } - Some(other) => out_stack.push(other), - } + None + }) } -} -fn push_token_to_either_stack( - token_idx: usize, - token: &Token, - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - let result = match token { - Token::Value { .. } => { - out_stack.push((token_idx, token.clone())); - Ok(()) + fn parse_precedence(&mut self, precedence: usize) -> ExprResult { + if precedence >= PRECEDENCE.len() { + return self.parse_simple_expression(); } - Token::InfixOp { .. } => { - if op_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - push_op_to_stack(token_idx, token, out_stack, op_stack) - } + let mut left = self.parse_precedence(precedence + 1)?; + while let Some(op) = self.parse_op(precedence) { + let right = self.parse_precedence(precedence + 1)?; + left = AstNode::BinOp { + op_type: op, + left: Box::new(left), + right: Box::new(right), + }; } + Ok(left) + } - Token::ParOpen => { - if out_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - Err("syntax error: unexpected argument '('".to_string()) + fn parse_simple_expression(&mut self) -> ExprResult { + let first = self.next()?; + Ok(match first { + "match" => { + let left = self.parse_expression()?; + let right = self.parse_expression()?; + AstNode::BinOp { + op_type: BinOp::String(StringOp::Match), + left: Box::new(left), + right: Box::new(right), + } } - } - - Token::PrefixOp { value, .. } => { - if out_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - Err(format!( - "syntax error: unexpected argument {}", - value.quote() - )) + "substr" => { + let string = self.parse_expression()?; + let pos = self.parse_expression()?; + let length = self.parse_expression()?; + AstNode::Substr { + string: Box::new(string), + pos: Box::new(pos), + length: Box::new(length), + } } - } - - Token::ParClose => move_till_match_paren(out_stack, op_stack), - }; - maybe_dump_shunting_yard_step(token_idx, token, out_stack, op_stack, &result); - result -} - -#[allow(clippy::ptr_arg)] -fn maybe_dump_shunting_yard_step( - token_idx: usize, - token: &Token, - out_stack: &TokenStack, - op_stack: &TokenStack, - result: &Result<(), String>, -) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_SYA_STEP") { - if debug_var == "1" { - println!("EXPR_DEBUG_SYA_STEP"); - println!("\t{token_idx} => {token:?}"); - println!("\t\tout: {out_stack:?}"); - println!("\t\top : {op_stack:?}"); - println!("\t\tresult: {result:?}"); - } - } -} - -fn push_op_to_stack( - token_idx: usize, - token: &Token, - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - if let Token::InfixOp { - precedence: prec, - left_assoc: la, - .. - } = *token - { - loop { - match op_stack.last() { - None | Some(&(_, Token::ParOpen)) => { - op_stack.push((token_idx, token.clone())); - return Ok(()); + "index" => { + let left = self.parse_expression()?; + let right = self.parse_expression()?; + AstNode::BinOp { + op_type: BinOp::String(StringOp::Index), + left: Box::new(left), + right: Box::new(right), } - - Some(&( - _, - Token::InfixOp { - precedence: prev_prec, - .. - }, - )) => { - if la && prev_prec >= prec || !la && prev_prec > prec { - out_stack.push(op_stack.pop().unwrap()); - } else { - op_stack.push((token_idx, token.clone())); - return Ok(()); - } + } + "length" => { + let string = self.parse_expression()?; + AstNode::Length { + string: Box::new(string), } - - Some(&(_, Token::PrefixOp { .. })) => { - op_stack.push((token_idx, token.clone())); - return Ok(()); + } + "+" => AstNode::Leaf { + value: self.next()?.into(), + }, + "(" => { + let s = self.parse_expression()?; + let close_paren = self.next()?; + if close_paren != ")" { + // Since we have parsed at least a '(', there will be a token + // at `self.index - 1`. So this indexing won't panic. + return Err(ExprError::ExpectedClosingBraceAfter( + self.input[self.index - 1].into(), + )); } - - Some(_) => panic!("Non-operator on op_stack"), + s } - } - } else { - panic!("Expected infix-op") + s => AstNode::Leaf { value: s.into() }, + }) } } -fn move_till_match_paren( - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - loop { - let op = op_stack - .pop() - .ok_or_else(|| "syntax error (Mismatched close-parenthesis)".to_string())?; - match op { - (_, Token::ParOpen) => return Ok(()), - other => out_stack.push(other), - } +/// Determine whether `expr` should evaluate the string as "truthy" +/// +/// Truthy strings are either empty or match the regex "-?0+". +pub fn is_truthy(s: &str) -> bool { + // Edge case: `-` followed by nothing is truthy + if s == "-" { + return true; } + + let mut bytes = s.bytes(); + + // Empty string is falsy + let Some(first) = bytes.next() else { + return false; + }; + + let is_zero = (first == b'-' || first == b'0') && bytes.all(|b| b == b'0'); + !is_zero } -fn infix_operator_two_ints(f: F, values: &[String]) -> Result -where - F: Fn(BigInt, BigInt) -> Result, -{ - assert!(values.len() == 2); - if let Ok(left) = values[0].parse::() { - if let Ok(right) = values[1].parse::() { - return f(left, right).map(|big_int| big_int.to_string()); +#[cfg(test)] +mod test { + use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp}; + + impl From<&str> for AstNode { + fn from(value: &str) -> Self { + Self::Leaf { + value: value.into(), + } } } - Err("Expected an integer operand".to_string()) -} -fn infix_operator_two_ints_or_two_strings( - fi: FI, - fs: FS, - values: &[String], -) -> Result -where - FI: Fn(BigInt, BigInt) -> Result, - FS: Fn(&String, &String) -> Result, -{ - assert!(values.len() == 2); - if let (Some(a_int), Some(b_int)) = ( - values[0].parse::().ok(), - values[1].parse::().ok(), - ) { - match fi(a_int, b_int) { - Ok(result) => Ok(result.to_string()), - Err(reason) => Err(reason), + fn op(op_type: BinOp, left: impl Into, right: impl Into) -> AstNode { + AstNode::BinOp { + op_type, + left: Box::new(left.into()), + right: Box::new(right.into()), } - } else { - fs(&values[0], &values[1]) } -} -fn infix_operator_or(values: &[String]) -> String { - assert!(values.len() == 2); - if value_as_bool(&values[0]) { - values[0].clone() - } else if value_as_bool(&values[1]) { - values[1].clone() - } else { - 0.to_string() + fn length(string: impl Into) -> AstNode { + AstNode::Length { + string: Box::new(string.into()), + } } -} -fn infix_operator_and(values: &[String]) -> String { - assert!(values.len() == 2); - if value_as_bool(&values[0]) && value_as_bool(&values[1]) { - values[0].clone() - } else { - 0.to_string() + fn substr( + string: impl Into, + pos: impl Into, + length: impl Into, + ) -> AstNode { + AstNode::Substr { + string: Box::new(string.into()), + pos: Box::new(pos.into()), + length: Box::new(length.into()), + } } -} -fn operator_match(values: &[String]) -> Result { - assert!(values.len() == 2); - let re_string = format!("^{}", &values[1]); - let re = Regex::with_options(&re_string, RegexOptions::REGEX_OPTION_NONE, Syntax::grep()) - .map_err(|err| err.description().to_string())?; - Ok(if re.captures_len() > 0 { - re.captures(&values[0]) - .map(|captures| captures.at(1).unwrap()) - .unwrap_or("") - .to_string() - } else { - re.find(&values[0]) - .map_or("0".to_string(), |(start, end)| (end - start).to_string()) - }) -} - -fn prefix_operator_length(values: &[String]) -> String { - assert!(values.len() == 1); - // Use chars().count() as we can have some multibytes chars - // See https://github.com/uutils/coreutils/issues/3132 - values[0].chars().count().to_string() -} - -fn prefix_operator_index(values: &[String]) -> String { - assert!(values.len() == 2); - let haystack = &values[0]; - let needles = &values[1]; - - for (current_idx, ch_h) in haystack.chars().enumerate() { - for ch_n in needles.chars() { - if ch_n == ch_h { - return (current_idx + 1).to_string(); - } + #[test] + fn infix_operators() { + let cases = [ + ("|", BinOp::String(StringOp::Or)), + ("&", BinOp::String(StringOp::And)), + ("<", BinOp::Relation(RelationOp::Lt)), + ("<=", BinOp::Relation(RelationOp::Leq)), + ("=", BinOp::Relation(RelationOp::Eq)), + ("!=", BinOp::Relation(RelationOp::Neq)), + (">=", BinOp::Relation(RelationOp::Geq)), + (">", BinOp::Relation(RelationOp::Gt)), + ("+", BinOp::Numeric(NumericOp::Add)), + ("-", BinOp::Numeric(NumericOp::Sub)), + ("*", BinOp::Numeric(NumericOp::Mul)), + ("/", BinOp::Numeric(NumericOp::Div)), + ("%", BinOp::Numeric(NumericOp::Mod)), + (":", BinOp::String(StringOp::Match)), + ]; + for (string, value) in cases { + assert_eq!(AstNode::parse(&["1", string, "2"]), Ok(op(value, "1", "2"))); } } - "0".to_string() -} - -fn prefix_operator_substr(values: &[String]) -> String { - assert!(values.len() == 3); - let subj = &values[0]; - let idx = match values[1] - .parse::() - .ok() - .and_then(|v| v.checked_sub(1)) - { - Some(i) => i, - None => return String::new(), - }; - let len = match values[2].parse::() { - Ok(i) => i, - Err(_) => return String::new(), - }; - - subj.chars().skip(idx).take(len).collect() -} - -fn bool_as_int(b: bool) -> u8 { - u8::from(b) -} -fn bool_as_string(b: bool) -> String { - if b { - "1".to_string() - } else { - "0".to_string() + #[test] + fn other_operators() { + assert_eq!( + AstNode::parse(&["match", "1", "2"]), + Ok(op(BinOp::String(StringOp::Match), "1", "2")), + ); + assert_eq!( + AstNode::parse(&["index", "1", "2"]), + Ok(op(BinOp::String(StringOp::Index), "1", "2")), + ); + assert_eq!(AstNode::parse(&["length", "1"]), Ok(length("1")),); + assert_eq!( + AstNode::parse(&["substr", "1", "2", "3"]), + Ok(substr("1", "2", "3")), + ); } -} -fn value_as_bool(s: &str) -> bool { - if s.is_empty() { - return false; - } - match s.parse::() { - Ok(n) => n != Zero::zero(), - Err(_) => true, + #[test] + fn precedence() { + assert_eq!( + AstNode::parse(&["1", "+", "2", "*", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Add), + "1", + op(BinOp::Numeric(NumericOp::Mul), "2", "3") + )) + ); + assert_eq!( + AstNode::parse(&["(", "1", "+", "2", ")", "*", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Mul), + op(BinOp::Numeric(NumericOp::Add), "1", "2"), + "3" + )) + ); + assert_eq!( + AstNode::parse(&["1", "*", "2", "+", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Add), + op(BinOp::Numeric(NumericOp::Mul), "1", "2"), + "3" + )), + ); } } diff --git a/src/uu/expr/src/tokens.rs b/src/uu/expr/src/tokens.rs deleted file mode 100644 index f499881c138..00000000000 --- a/src/uu/expr/src/tokens.rs +++ /dev/null @@ -1,147 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. - -//! -//! The following tokens are present in the expr grammar: -//! * integer literal; -//! * string literal; -//! * infix binary operators; -//! * prefix operators. -//! -//! According to the man-page of expr we have expression split into tokens (each token -- separate CLI-argument). -//! Hence all we need is to map the strings into the Token structures, except for some ugly fiddling with +-escaping. -//! - -// spell-checker:ignore (ToDO) paren - -#[derive(Debug, Clone)] -pub enum Token { - Value { - value: String, - }, - - ParOpen, - ParClose, - - InfixOp { - precedence: u8, - left_assoc: bool, - value: String, - }, - - PrefixOp { - arity: usize, - value: String, - }, -} - -impl Token { - fn new_infix_op(v: &str, left_assoc: bool, precedence: u8) -> Self { - Self::InfixOp { - left_assoc, - precedence, - value: v.into(), - } - } - - fn new_value(v: &str) -> Self { - Self::Value { value: v.into() } - } - - fn is_infix_plus(&self) -> bool { - match self { - Self::InfixOp { value, .. } => value == "+", - _ => false, - } - } - - fn is_a_value(&self) -> bool { - matches!(*self, Self::Value { .. }) - } - - fn is_a_close_paren(&self) -> bool { - matches!(*self, Self::ParClose) - } -} - -pub fn strings_to_tokens(strings: &[&str]) -> Result, String> { - let mut tokens_acc = Vec::with_capacity(strings.len()); - let mut tok_idx = 1; - - for s in strings { - let token_if_not_escaped = match *s { - "(" => Token::ParOpen, - ")" => Token::ParClose, - - "^" => Token::new_infix_op(s, false, 7), - - ":" => Token::new_infix_op(s, true, 6), - - "*" | "/" | "%" => Token::new_infix_op(s, true, 5), - - "+" | "-" => Token::new_infix_op(s, true, 4), - - "=" | "!=" | "<" | ">" | "<=" | ">=" => Token::new_infix_op(s, true, 3), - - "&" => Token::new_infix_op(s, true, 2), - - "|" => Token::new_infix_op(s, true, 1), - - "match" | "index" => Token::PrefixOp { - arity: 2, - value: s.to_string(), - }, - "substr" => Token::PrefixOp { - arity: 3, - value: s.to_string(), - }, - "length" => Token::PrefixOp { - arity: 1, - value: s.to_string(), - }, - - _ => Token::new_value(s), - }; - push_token_if_not_escaped(&mut tokens_acc, tok_idx, token_if_not_escaped, s); - tok_idx += 1; - } - maybe_dump_tokens_acc(&tokens_acc); - - Ok(tokens_acc) -} - -fn maybe_dump_tokens_acc(tokens_acc: &[(usize, Token)]) { - use std::env; - - if let Ok(debug_var) = env::var("EXPR_DEBUG_TOKENS") { - if debug_var == "1" { - println!("EXPR_DEBUG_TOKENS"); - for token in tokens_acc { - println!("\t{token:?}"); - } - } - } -} - -fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, token: Token, s: &str) { - // `+` may be escaped such as `expr + 1` and `expr 1 + + 1` - let prev_is_plus = match acc.last() { - None => false, - Some(t) => t.1.is_infix_plus(), - }; - let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 { - let pre_prev = &acc[acc.len() - 2]; - !(pre_prev.1.is_a_value() || pre_prev.1.is_a_close_paren()) - } else { - prev_is_plus - }; - - if should_use_as_escaped { - acc.pop(); - acc.push((tok_idx, Token::new_value(s))); - } else { - acc.push((tok_idx, token)); - } -} From 01c32a5220ef036bdc1d9bae8928336a815db619 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 11:40:33 +0100 Subject: [PATCH 080/429] fmt: clean up some small bits --- src/uu/fmt/src/linebreak.rs | 37 +++--- src/uu/fmt/src/parasplit.rs | 232 ++++++++++++++++++------------------ 2 files changed, 132 insertions(+), 137 deletions(-) diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index fbd990fff1e..7cd65d86149 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -46,7 +46,7 @@ pub fn break_lines( ostream: &mut BufWriter, ) -> std::io::Result<()> { // indent - let p_indent = ¶.indent_str[..]; + let p_indent = ¶.indent_str; let p_indent_len = para.indent_len; // words @@ -55,14 +55,12 @@ pub fn break_lines( // the first word will *always* appear on the first line // make sure of this here - let (w, w_len) = match p_words_words.next() { - Some(winfo) => (winfo.word, winfo.word_nchars), - None => { - return ostream.write_all(b"\n"); - } + let Some(winfo) = p_words_words.next() else { + return ostream.write_all(b"\n"); }; + // print the init, if it exists, and get its length - let p_init_len = w_len + let p_init_len = winfo.word_nchars + if opts.crown || opts.tagged { // handle "init" portion ostream.write_all(para.init_str.as_bytes())?; @@ -75,8 +73,9 @@ pub fn break_lines( // except that mail headers get no indent at all 0 }; + // write first word after writing init - ostream.write_all(w.as_bytes())?; + ostream.write_all(winfo.word.as_bytes())?; // does this paragraph require uniform spacing? let uniform = para.mail_header || opts.uniform; @@ -103,15 +102,16 @@ fn break_simple<'a, T: Iterator>>( mut iter: T, args: &mut BreakArgs<'a>, ) -> std::io::Result<()> { - iter.try_fold((args.init_len, false), |l, winfo| { - accum_words_simple(args, l, winfo) + iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| { + accum_words_simple(args, l, prev_punct, winfo) })?; args.ostream.write_all(b"\n") } fn accum_words_simple<'a>( args: &mut BreakArgs<'a>, - (l, prev_punct): (usize, bool), + l: usize, + prev_punct: bool, winfo: &'a WordInfo<'a>, ) -> std::io::Result<(usize, bool)> { // compute the length of this word, considering how tabs will expand at this position on the line @@ -233,14 +233,14 @@ fn find_kp_breakpoints<'a, T: Iterator>>( linebreak: None, break_before: false, demerits: 0, - prev_rat: 0.0f32, + prev_rat: 0.0, length: args.init_len, fresh: false, }]; // this vec holds the current active linebreaks; next_ holds the breaks that will be active for // the next word - let active_breaks = &mut vec![0]; - let next_active_breaks = &mut vec![]; + let mut active_breaks = vec![0]; + let mut next_active_breaks = vec![]; let stretch = (args.opts.width - args.opts.goal) as isize; let minlength = args.opts.goal - stretch as usize; @@ -248,10 +248,7 @@ fn find_kp_breakpoints<'a, T: Iterator>>( let mut is_sentence_start = false; let mut least_demerits = 0; loop { - let w = match iter.next() { - None => break, - Some(w) => w, - }; + let Some(w) = iter.next() else { break }; // if this is the last word, we don't add additional demerits for this break let (is_last_word, is_sentence_end) = match iter.peek() { @@ -358,13 +355,13 @@ fn find_kp_breakpoints<'a, T: Iterator>>( least_demerits = cmp::max(ld_next, 0); } // swap in new list of active breaks - mem::swap(active_breaks, next_active_breaks); + mem::swap(&mut active_breaks, &mut next_active_breaks); // If this was the last word in a sentence, the next one must be the first in the next. is_sentence_start = is_sentence_end; } // return the best path - build_best_path(&linebreaks, active_breaks) + build_best_path(&linebreaks, &active_breaks) } fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> { diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 68c8f78fa89..311ddbc9b83 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -52,18 +52,22 @@ impl Line { } } -// each line's prefix has to be considered to know whether to merge it with -// the next line or not +/// Each line's prefix has to be considered to know whether to merge it with +/// the next line or not #[derive(Debug)] pub struct FileLine { line: String, - indent_end: usize, // the end of the indent, always the start of the text - pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix - indent_len: usize, // display length of indent taking into account tabs - prefix_len: usize, // PREFIX indent length taking into account tabs + /// The end of the indent, always the start of the text + indent_end: usize, + /// The end of the PREFIX's indent, that is, the spaces before the prefix + pfxind_end: usize, + /// Display length of indent taking into account tabs + indent_len: usize, + /// PREFIX indent length taking into account tabs + prefix_len: usize, } -// iterator that produces a stream of Lines from a file +/// Iterator that produces a stream of Lines from a file pub struct FileLines<'a> { opts: &'a FmtOptions, lines: Lines<&'a mut FileOrStdReader>, @@ -74,7 +78,7 @@ impl<'a> FileLines<'a> { FileLines { opts, lines } } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, usize) { if !self.opts.use_prefix { return (true, 0); @@ -83,7 +87,7 @@ impl<'a> FileLines<'a> { FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { if !self.opts.use_anti_prefix { return true; @@ -148,13 +152,7 @@ impl<'a> Iterator for FileLines<'a> { type Item = Line; fn next(&mut self) -> Option { - let n = match self.lines.next() { - Some(t) => match t { - Ok(tt) => tt, - Err(_) => return None, - }, - None => return None, - }; + let n = self.lines.next()?.ok()?; // if this line is entirely whitespace, // emit a blank line @@ -205,24 +203,33 @@ impl<'a> Iterator for FileLines<'a> { } } -// a paragraph : a collection of FileLines that are to be formatted -// plus info about the paragraph's indentation -// (but we only retain the String from the FileLine; the other info -// is only there to help us in deciding how to merge lines into Paragraphs +/// A paragraph : a collection of FileLines that are to be formatted +/// plus info about the paragraph's indentation +/// +/// We only retain the String from the FileLine; the other info +/// is only there to help us in deciding how to merge lines into Paragraphs #[derive(Debug)] pub struct Paragraph { - lines: Vec, // the lines of the file - pub init_str: String, // string representing the init, that is, the first line's indent - pub init_len: usize, // printable length of the init string considering TABWIDTH - init_end: usize, // byte location of end of init in first line String - pub indent_str: String, // string representing indent - pub indent_len: usize, // length of above - indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) - pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case + /// the lines of the file + lines: Vec, + /// string representing the init, that is, the first line's indent + pub init_str: String, + /// printable length of the init string considering TABWIDTH + pub init_len: usize, + /// byte location of end of init in first line String + init_end: usize, + /// string representing indent + pub indent_str: String, + /// length of above + pub indent_len: usize, + /// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) + indent_end: usize, + /// we need to know if this is a mail header because we do word splitting differently in that case + pub mail_header: bool, } -// an iterator producing a stream of paragraphs from a stream of lines -// given a set of options. +/// An iterator producing a stream of paragraphs from a stream of lines +/// given a set of options. pub struct ParagraphStream<'a> { lines: Peekable>, next_mail: bool, @@ -240,7 +247,7 @@ impl<'a> ParagraphStream<'a> { } } - // detect RFC822 mail header + /// Detect RFC822 mail header fn is_mail_header(line: &FileLine) -> bool { // a mail header begins with either "From " (envelope sender line) // or with a sequence of printable ASCII chars (33 to 126, inclusive, @@ -276,12 +283,9 @@ impl<'a> Iterator for ParagraphStream<'a> { #[allow(clippy::cognitive_complexity)] fn next(&mut self) -> Option> { // return a NoFormatLine in an Err; it should immediately be output - let noformat = match self.lines.peek() { - None => return None, - Some(l) => match *l { - Line::FormatLine(_) => false, - Line::NoFormatLine(_, _) => true, - }, + let noformat = match self.lines.peek()? { + Line::FormatLine(_) => false, + Line::NoFormatLine(_, _) => true, }; // found a NoFormatLine, immediately dump it out @@ -305,95 +309,89 @@ impl<'a> Iterator for ParagraphStream<'a> { let mut in_mail = false; let mut second_done = false; // for when we use crown or tagged mode loop { - { - // peek ahead - // need to explicitly force fl out of scope before we can call self.lines.next() - let fl = match self.lines.peek() { - None => break, - Some(l) => match *l { - Line::FormatLine(ref x) => x, - Line::NoFormatLine(..) => break, - }, - }; + // peek ahead + // need to explicitly force fl out of scope before we can call self.lines.next() + let Some(Line::FormatLine(fl)) = self.lines.peek() else { + break; + }; - if p_lines.is_empty() { - // first time through the loop, get things set up - // detect mail header - if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { - in_mail = true; - // there can't be any indent or pfxind because otherwise is_mail_header - // would fail since there cannot be any whitespace before the colon in a - // valid header field - indent_str.push_str(" "); - indent_len = 2; + if p_lines.is_empty() { + // first time through the loop, get things set up + // detect mail header + if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { + in_mail = true; + // there can't be any indent or pfxind because otherwise is_mail_header + // would fail since there cannot be any whitespace before the colon in a + // valid header field + indent_str.push_str(" "); + indent_len = 2; + } else { + if self.opts.crown || self.opts.tagged { + init_str.push_str(&fl.line[..fl.indent_end]); + init_len = fl.indent_len; + init_end = fl.indent_end; } else { - if self.opts.crown || self.opts.tagged { - init_str.push_str(&fl.line[..fl.indent_end]); - init_len = fl.indent_len; - init_end = fl.indent_end; - } else { - second_done = true; - } - - // these will be overwritten in the 2nd line of crown or tagged mode, but - // we are not guaranteed to get to the 2nd line, e.g., if the next line - // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around - indent_str.push_str(&fl.line[..fl.indent_end]); - indent_len = fl.indent_len; - indent_end = fl.indent_end; - - // save these to check for matching lines - prefix_len = fl.prefix_len; - pfxind_end = fl.pfxind_end; - - // in tagged mode, add 4 spaces of additional indenting by default - // (gnu fmt's behavior is different: it seems to find the closest column to - // indent_end that is divisible by 3. But honestly that behavior seems - // pretty arbitrary. - // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. - if self.opts.tagged { - indent_str.push_str(" "); - indent_len += 4; - } - } - } else if in_mail { - // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { - break; // this line does not begin with spaces + second_done = true; } - } else if !second_done { - // now we have enough info to handle crown margin and tagged mode - // in both crown and tagged modes we require that prefix_len is the same - if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { - break; - } - - // in tagged mode, indent has to be *different* on following lines - if self.opts.tagged - && indent_len - 4 == fl.indent_len - && indent_end == fl.indent_end - { - break; - } - - // this is part of the same paragraph, get the indent info from this line - indent_str.clear(); + // these will be overwritten in the 2nd line of crown or tagged mode, but + // we are not guaranteed to get to the 2nd line, e.g., if the next line + // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around indent_str.push_str(&fl.line[..fl.indent_end]); indent_len = fl.indent_len; indent_end = fl.indent_end; - second_done = true; - } else { - // detect mismatch - if indent_end != fl.indent_end - || pfxind_end != fl.pfxind_end - || indent_len != fl.indent_len - || prefix_len != fl.prefix_len - { - break; + // save these to check for matching lines + prefix_len = fl.prefix_len; + pfxind_end = fl.pfxind_end; + + // in tagged mode, add 4 spaces of additional indenting by default + // (gnu fmt's behavior is different: it seems to find the closest column to + // indent_end that is divisible by 3. But honestly that behavior seems + // pretty arbitrary. + // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. + if self.opts.tagged { + indent_str.push_str(" "); + indent_len += 4; } } + } else if in_mail { + // lines following mail headers must begin with spaces + if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { + break; // this line does not begin with spaces + } + } else if !second_done { + // now we have enough info to handle crown margin and tagged mode + + // in both crown and tagged modes we require that prefix_len is the same + if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { + break; + } + + // in tagged mode, indent has to be *different* on following lines + if self.opts.tagged + && indent_len - 4 == fl.indent_len + && indent_end == fl.indent_end + { + break; + } + + // this is part of the same paragraph, get the indent info from this line + indent_str.clear(); + indent_str.push_str(&fl.line[..fl.indent_end]); + indent_len = fl.indent_len; + indent_end = fl.indent_end; + + second_done = true; + } else { + // detect mismatch + if indent_end != fl.indent_end + || pfxind_end != fl.pfxind_end + || indent_len != fl.indent_len + || prefix_len != fl.prefix_len + { + break; + } } p_lines.push(self.lines.next().unwrap().get_formatline().line); @@ -429,7 +427,7 @@ pub struct ParaWords<'a> { } impl<'a> ParaWords<'a> { - pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> { + pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self { let mut pw = ParaWords { opts, para, From d78923e4ccda95db136c358913a09642f4ee9729 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 11:54:43 +0100 Subject: [PATCH 081/429] fmt: extract determining options to separate function --- src/uu/fmt/src/fmt.rs | 194 +++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 99 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index c30d923b76b..3a494c868e5 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix -use clap::{crate_version, Arg, ArgAction, Command}; +use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::cmp; use std::fs::File; use std::io::{stdin, stdout, Write}; @@ -40,6 +40,9 @@ static OPT_TAB_WIDTH: &str = "tab-width"; static ARG_FILES: &str = "files"; +// by default, goal is 93% of width +const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; + pub type FileOrStdReader = BufReader>; pub struct FmtOptions { crown: bool, @@ -59,25 +62,97 @@ pub struct FmtOptions { tabwidth: usize, } -impl Default for FmtOptions { - fn default() -> Self { - Self { - crown: false, - tagged: false, - mail: false, - uniform: false, - quick: false, - split_only: false, - use_prefix: false, - prefix: String::new(), - xprefix: false, - use_anti_prefix: false, - anti_prefix: String::new(), - xanti_prefix: false, - width: 75, - goal: 70, - tabwidth: 8, +impl FmtOptions { + fn from_matches(matches: &ArgMatches) -> UResult { + let mut tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); + let mut crown = matches.get_flag(OPT_CROWN_MARGIN); + + let mail = matches.get_flag(OPT_PRESERVE_HEADERS); + let uniform = matches.get_flag(OPT_UNIFORM_SPACING); + let quick = matches.get_flag(OPT_QUICK); + let split_only = matches.get_flag(OPT_SPLIT_ONLY); + + if crown { + tagged = false; + } + if split_only { + crown = false; + tagged = false; + } + + let xprefix = matches.contains_id(OPT_EXACT_PREFIX); + let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); + + let mut prefix = String::new(); + let mut use_prefix = false; + if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { + prefix = s; + use_prefix = true; + }; + + let mut anti_prefix = String::new(); + let mut use_anti_prefix = false; + if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { + anti_prefix = s; + use_anti_prefix = true; + }; + + let mut width = 75; + let mut goal = 70; + if let Some(w) = matches.get_one::(OPT_WIDTH) { + width = *w; + if width > MAX_WIDTH { + return Err(USimpleError::new( + 1, + format!("invalid width: '{}': Numerical result out of range", width), + )); + } + goal = cmp::min(width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, width - 3); + }; + + if let Some(g) = matches.get_one::(OPT_GOAL) { + goal = *g; + if !matches.contains_id(OPT_WIDTH) { + width = cmp::max(goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, goal + 3); + } else if goal > width { + return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + } + }; + + let mut tabwidth = 8; + if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { + tabwidth = match s.parse::() { + Ok(t) => t, + Err(e) => { + return Err(USimpleError::new( + 1, + format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e), + )); + } + }; + }; + + if tabwidth < 1 { + tabwidth = 1; } + + Ok(Self { + crown, + tagged, + mail, + uniform, + quick, + split_only, + use_prefix, + prefix, + xprefix, + use_anti_prefix, + anti_prefix, + xanti_prefix, + width, + goal, + tabwidth, + }) } } @@ -90,12 +165,7 @@ impl Default for FmtOptions { /// # Returns /// /// A tuple containing a vector of file names and a `FmtOptions` struct. -#[allow(clippy::cognitive_complexity)] -#[allow(clippy::field_reassign_with_default)] fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions)> { - // by default, goal is 93% of width - const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; - let matches = uu_app().try_get_matches_from(args)?; let mut files: Vec = matches @@ -103,81 +173,7 @@ fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); - let mut fmt_opts = FmtOptions::default(); - - fmt_opts.tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); - if matches.get_flag(OPT_CROWN_MARGIN) { - fmt_opts.crown = true; - fmt_opts.tagged = false; - } - fmt_opts.mail = matches.get_flag(OPT_PRESERVE_HEADERS); - fmt_opts.uniform = matches.get_flag(OPT_UNIFORM_SPACING); - fmt_opts.quick = matches.get_flag(OPT_QUICK); - if matches.get_flag(OPT_SPLIT_ONLY) { - fmt_opts.split_only = true; - fmt_opts.crown = false; - fmt_opts.tagged = false; - } - fmt_opts.xprefix = matches.contains_id(OPT_EXACT_PREFIX); - fmt_opts.xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); - - if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { - fmt_opts.prefix = s; - fmt_opts.use_prefix = true; - }; - - if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { - fmt_opts.anti_prefix = s; - fmt_opts.use_anti_prefix = true; - }; - - if let Some(width) = matches.get_one::(OPT_WIDTH) { - fmt_opts.width = *width; - if fmt_opts.width > MAX_WIDTH { - return Err(USimpleError::new( - 1, - format!( - "invalid width: '{}': Numerical result out of range", - fmt_opts.width, - ), - )); - } - fmt_opts.goal = cmp::min( - fmt_opts.width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, - fmt_opts.width - 3, - ); - }; - - if let Some(goal) = matches.get_one::(OPT_GOAL) { - fmt_opts.goal = *goal; - if !matches.contains_id(OPT_WIDTH) { - fmt_opts.width = cmp::max( - fmt_opts.goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, - fmt_opts.goal + 3, - ); - } else if fmt_opts.goal > fmt_opts.width { - return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); - } - }; - - if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { - fmt_opts.tabwidth = match s.parse::() { - Ok(t) => t, - Err(e) => { - return Err(USimpleError::new( - 1, - format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e), - )); - } - }; - }; - - if fmt_opts.tabwidth < 1 { - fmt_opts.tabwidth = 1; - } - - // immutable now - let fmt_opts = fmt_opts; + let fmt_opts = FmtOptions::from_matches(&matches)?; if files.is_empty() { files.push("-".to_owned()); From f5206ce783d1606432c20f67d8ab027fcab06e7c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:05:35 +0100 Subject: [PATCH 082/429] fmt: merge prefix and use_prefix options (same for anti_prefix) --- src/uu/fmt/src/fmt.rs | 23 ++++------------------- src/uu/fmt/src/parasplit.rs | 22 +++++++++------------- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3a494c868e5..3a02c642902 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -49,11 +49,9 @@ pub struct FmtOptions { tagged: bool, mail: bool, split_only: bool, - use_prefix: bool, - prefix: String, + prefix: Option, xprefix: bool, - use_anti_prefix: bool, - anti_prefix: String, + anti_prefix: Option, xanti_prefix: bool, uniform: bool, quick: bool, @@ -83,19 +81,8 @@ impl FmtOptions { let xprefix = matches.contains_id(OPT_EXACT_PREFIX); let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); - let mut prefix = String::new(); - let mut use_prefix = false; - if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { - prefix = s; - use_prefix = true; - }; - - let mut anti_prefix = String::new(); - let mut use_anti_prefix = false; - if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { - anti_prefix = s; - use_anti_prefix = true; - }; + let prefix = matches.get_one::(OPT_PREFIX).map(String::from); + let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); let mut width = 75; let mut goal = 70; @@ -143,10 +130,8 @@ impl FmtOptions { uniform, quick, split_only, - use_prefix, prefix, xprefix, - use_anti_prefix, anti_prefix, xanti_prefix, width, diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 311ddbc9b83..f22400dff20 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -80,24 +80,20 @@ impl<'a> FileLines<'a> { /// returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, usize) { - if !self.opts.use_prefix { + let Some(prefix) = &self.opts.prefix else { return (true, 0); - } + }; - FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) + FileLines::match_prefix_generic(prefix, line, self.opts.xprefix) } /// returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { - if !self.opts.use_anti_prefix { + let Some(anti_prefix) = &self.opts.anti_prefix else { return true; - } + }; - match FileLines::match_prefix_generic( - &self.opts.anti_prefix[..], - line, - self.opts.xanti_prefix, - ) { + match FileLines::match_prefix_generic(anti_prefix, line, self.opts.xanti_prefix) { (true, _) => false, (_, _) => true, } @@ -176,7 +172,7 @@ impl<'a> Iterator for FileLines<'a> { // not truly blank we will not allow mail headers on the // following line) if pmatch - && n[poffset + self.opts.prefix.len()..] + && n[poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len())..] .chars() .all(char::is_whitespace) { @@ -190,7 +186,7 @@ impl<'a> Iterator for FileLines<'a> { } // figure out the indent, prefix, and prefixindent ending points - let prefix_end = poffset + self.opts.prefix.len(); + let prefix_end = poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len()); let (indent_end, prefix_len, indent_len) = self.compute_indent(&n[..], prefix_end); Some(Line::FormatLine(FileLine { @@ -357,7 +353,7 @@ impl<'a> Iterator for ParagraphStream<'a> { } } else if in_mail { // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { + if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.pfxind_end == 0) { break; // this line does not begin with spaces } } else if !second_done { From 96ca5e609eacf4fd09316da2c4bbd165ff052273 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:22:46 +0100 Subject: [PATCH 083/429] fmt: refactor width and goal calculation --- src/uu/fmt/src/fmt.rs | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3a02c642902..0ed32641fbb 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -6,7 +6,6 @@ // spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; -use std::cmp; use std::fs::File; use std::io::{stdin, stdout, Write}; use std::io::{BufReader, BufWriter, Read, Stdout}; @@ -84,28 +83,33 @@ impl FmtOptions { let prefix = matches.get_one::(OPT_PREFIX).map(String::from); let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); - let mut width = 75; - let mut goal = 70; - if let Some(w) = matches.get_one::(OPT_WIDTH) { - width = *w; - if width > MAX_WIDTH { - return Err(USimpleError::new( - 1, - format!("invalid width: '{}': Numerical result out of range", width), - )); + let width_opt = matches.get_one::(OPT_WIDTH); + let goal_opt = matches.get_one::(OPT_GOAL); + let (width, goal) = match (width_opt, goal_opt) { + (Some(&w), Some(&g)) => { + if g > w { + return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + } + (w, g) } - goal = cmp::min(width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, width - 3); - }; - - if let Some(g) = matches.get_one::(OPT_GOAL) { - goal = *g; - if !matches.contains_id(OPT_WIDTH) { - width = cmp::max(goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, goal + 3); - } else if goal > width { - return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + (Some(&w), None) => { + let g = (w * DEFAULT_GOAL_TO_WIDTH_RATIO / 100).min(w - 3); + (w, g) } + (None, Some(&g)) => { + let w = (g * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO).max(g + 3); + (w, g) + } + (None, None) => (75, 70), }; + if width > MAX_WIDTH { + return Err(USimpleError::new( + 1, + format!("invalid width: '{}': Numerical result out of range", width), + )); + } + let mut tabwidth = 8; if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { tabwidth = match s.parse::() { From 8a494530572ca1a2221416a284144a9e44177f8e Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:24:18 +0100 Subject: [PATCH 084/429] fmt: clean up imports --- src/uu/fmt/src/fmt.rs | 4 ++-- src/uu/fmt/src/linebreak.rs | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 0ed32641fbb..3461a79ba7f 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -13,8 +13,8 @@ use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show_warning}; -use self::linebreak::break_lines; -use self::parasplit::ParagraphStream; +use linebreak::break_lines; +use parasplit::ParagraphStream; mod linebreak; mod parasplit; diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index 7cd65d86149..306c15f3614 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -5,10 +5,8 @@ // spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen -use std::cmp; -use std::i64; use std::io::{BufWriter, Stdout, Write}; -use std::mem; +use std::{cmp, i64, mem}; use uucore::crash; From 0b4d4b610cc510a7aff4095447ddf8195cf27072 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:30:01 +0100 Subject: [PATCH 085/429] fmt: put options into module and change static to const --- src/uu/fmt/src/fmt.rs | 100 +++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3461a79ba7f..e44b7e0e5be 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -7,8 +7,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::fs::File; -use std::io::{stdin, stdout, Write}; -use std::io::{BufReader, BufWriter, Read, Stdout}; +use std::io::{stdin, stdout, BufReader, BufWriter, Read, Stdout, Write}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show_warning}; @@ -19,25 +18,26 @@ use parasplit::ParagraphStream; mod linebreak; mod parasplit; -static ABOUT: &str = help_about!("fmt.md"); +const ABOUT: &str = help_about!("fmt.md"); const USAGE: &str = help_usage!("fmt.md"); -static MAX_WIDTH: usize = 2500; - -static OPT_CROWN_MARGIN: &str = "crown-margin"; -static OPT_TAGGED_PARAGRAPH: &str = "tagged-paragraph"; -static OPT_PRESERVE_HEADERS: &str = "preserve-headers"; -static OPT_SPLIT_ONLY: &str = "split-only"; -static OPT_UNIFORM_SPACING: &str = "uniform-spacing"; -static OPT_PREFIX: &str = "prefix"; -static OPT_SKIP_PREFIX: &str = "skip-prefix"; -static OPT_EXACT_PREFIX: &str = "exact-prefix"; -static OPT_EXACT_SKIP_PREFIX: &str = "exact-skip-prefix"; -static OPT_WIDTH: &str = "width"; -static OPT_GOAL: &str = "goal"; -static OPT_QUICK: &str = "quick"; -static OPT_TAB_WIDTH: &str = "tab-width"; - -static ARG_FILES: &str = "files"; +const MAX_WIDTH: usize = 2500; + +mod options { + pub const CROWN_MARGIN: &str = "crown-margin"; + pub const TAGGED_PARAGRAPH: &str = "tagged-paragraph"; + pub const PRESERVE_HEADERS: &str = "preserve-headers"; + pub const SPLIT_ONLY: &str = "split-only"; + pub const UNIFORM_SPACING: &str = "uniform-spacing"; + pub const PREFIX: &str = "prefix"; + pub const SKIP_PREFIX: &str = "skip-prefix"; + pub const EXACT_PREFIX: &str = "exact-prefix"; + pub const EXACT_SKIP_PREFIX: &str = "exact-skip-prefix"; + pub const WIDTH: &str = "width"; + pub const GOAL: &str = "goal"; + pub const QUICK: &str = "quick"; + pub const TAB_WIDTH: &str = "tab-width"; + pub const FILES: &str = "files"; +} // by default, goal is 93% of width const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; @@ -61,13 +61,13 @@ pub struct FmtOptions { impl FmtOptions { fn from_matches(matches: &ArgMatches) -> UResult { - let mut tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); - let mut crown = matches.get_flag(OPT_CROWN_MARGIN); + let mut tagged = matches.get_flag(options::TAGGED_PARAGRAPH); + let mut crown = matches.get_flag(options::CROWN_MARGIN); - let mail = matches.get_flag(OPT_PRESERVE_HEADERS); - let uniform = matches.get_flag(OPT_UNIFORM_SPACING); - let quick = matches.get_flag(OPT_QUICK); - let split_only = matches.get_flag(OPT_SPLIT_ONLY); + let mail = matches.get_flag(options::PRESERVE_HEADERS); + let uniform = matches.get_flag(options::UNIFORM_SPACING); + let quick = matches.get_flag(options::QUICK); + let split_only = matches.get_flag(options::SPLIT_ONLY); if crown { tagged = false; @@ -77,14 +77,16 @@ impl FmtOptions { tagged = false; } - let xprefix = matches.contains_id(OPT_EXACT_PREFIX); - let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); + let xprefix = matches.contains_id(options::EXACT_PREFIX); + let xanti_prefix = matches.contains_id(options::SKIP_PREFIX); - let prefix = matches.get_one::(OPT_PREFIX).map(String::from); - let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); + let prefix = matches.get_one::(options::PREFIX).map(String::from); + let anti_prefix = matches + .get_one::(options::SKIP_PREFIX) + .map(String::from); - let width_opt = matches.get_one::(OPT_WIDTH); - let goal_opt = matches.get_one::(OPT_GOAL); + let width_opt = matches.get_one::(options::WIDTH); + let goal_opt = matches.get_one::(options::GOAL); let (width, goal) = match (width_opt, goal_opt) { (Some(&w), Some(&g)) => { if g > w { @@ -111,7 +113,7 @@ impl FmtOptions { } let mut tabwidth = 8; - if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { + if let Some(s) = matches.get_one::(options::TAB_WIDTH) { tabwidth = match s.parse::() { Ok(t) => t, Err(e) => { @@ -158,7 +160,7 @@ fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions) let matches = uu_app().try_get_matches_from(args)?; let mut files: Vec = matches - .get_many::(ARG_FILES) + .get_many::(options::FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); @@ -242,9 +244,9 @@ pub fn uu_app() -> Command { .override_usage(format_usage(USAGE)) .infer_long_args(true) .arg( - Arg::new(OPT_CROWN_MARGIN) + Arg::new(options::CROWN_MARGIN) .short('c') - .long(OPT_CROWN_MARGIN) + .long(options::CROWN_MARGIN) .help( "First and second line of paragraph \ may have different indentations, in which \ @@ -254,7 +256,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_TAGGED_PARAGRAPH) + Arg::new(options::TAGGED_PARAGRAPH) .short('t') .long("tagged-paragraph") .help( @@ -264,7 +266,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_PRESERVE_HEADERS) + Arg::new(options::PRESERVE_HEADERS) .short('m') .long("preserve-headers") .help( @@ -274,14 +276,14 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_SPLIT_ONLY) + Arg::new(options::SPLIT_ONLY) .short('s') .long("split-only") .help("Split lines only, do not reflow.") .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_UNIFORM_SPACING) + Arg::new(options::UNIFORM_SPACING) .short('u') .long("uniform-spacing") .help( @@ -294,7 +296,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_PREFIX) + Arg::new(options::PREFIX) .short('p') .long("prefix") .help( @@ -306,7 +308,7 @@ pub fn uu_app() -> Command { .value_name("PREFIX"), ) .arg( - Arg::new(OPT_SKIP_PREFIX) + Arg::new(options::SKIP_PREFIX) .short('P') .long("skip-prefix") .help( @@ -317,7 +319,7 @@ pub fn uu_app() -> Command { .value_name("PSKIP"), ) .arg( - Arg::new(OPT_EXACT_PREFIX) + Arg::new(options::EXACT_PREFIX) .short('x') .long("exact-prefix") .help( @@ -327,7 +329,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_EXACT_SKIP_PREFIX) + Arg::new(options::EXACT_SKIP_PREFIX) .short('X') .long("exact-skip-prefix") .help( @@ -337,7 +339,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_WIDTH) + Arg::new(options::WIDTH) .short('w') .long("width") .help("Fill output lines up to a maximum of WIDTH columns, default 75.") @@ -345,7 +347,7 @@ pub fn uu_app() -> Command { .value_parser(clap::value_parser!(usize)), ) .arg( - Arg::new(OPT_GOAL) + Arg::new(options::GOAL) .short('g') .long("goal") .help("Goal width, default of 93% of WIDTH. Must be less than WIDTH.") @@ -353,7 +355,7 @@ pub fn uu_app() -> Command { .value_parser(clap::value_parser!(usize)), ) .arg( - Arg::new(OPT_QUICK) + Arg::new(options::QUICK) .short('q') .long("quick") .help( @@ -363,7 +365,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_TAB_WIDTH) + Arg::new(options::TAB_WIDTH) .short('T') .long("tab-width") .help( @@ -374,7 +376,7 @@ pub fn uu_app() -> Command { .value_name("TABWIDTH"), ) .arg( - Arg::new(ARG_FILES) + Arg::new(options::FILES) .action(ArgAction::Append) .value_hint(clap::ValueHint::FilePath), ) From 2a8f4ec294369c228bc8063676879a9ac9e436e5 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:34:04 +0100 Subject: [PATCH 086/429] fmt: inline parse_arguments function --- src/uu/fmt/src/fmt.rs | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index e44b7e0e5be..4380487814b 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -147,32 +147,6 @@ impl FmtOptions { } } -/// Parse the command line arguments and return the list of files and formatting options. -/// -/// # Arguments -/// -/// * `args` - Command line arguments. -/// -/// # Returns -/// -/// A tuple containing a vector of file names and a `FmtOptions` struct. -fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions)> { - let matches = uu_app().try_get_matches_from(args)?; - - let mut files: Vec = matches - .get_many::(options::FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); - - let fmt_opts = FmtOptions::from_matches(&matches)?; - - if files.is_empty() { - files.push("-".to_owned()); - } - - Ok((files, fmt_opts)) -} - /// Process the content of a file and format it according to the provided options. /// /// # Arguments @@ -226,7 +200,14 @@ fn process_file( #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let (files, fmt_opts) = parse_arguments(args)?; + let matches = uu_app().try_get_matches_from(args)?; + + let files: Vec = matches + .get_many::(options::FILES) + .map(|v| v.map(ToString::to_string).collect()) + .unwrap_or(vec!["-".into()]); + + let fmt_opts = FmtOptions::from_matches(&matches)?; let mut ostream = BufWriter::new(stdout()); From 2d5ea264106b26caea8133f8d3d1c31ade2ac3ba Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 28 Nov 2023 14:33:19 +0100 Subject: [PATCH 087/429] fmt: rename pfxind_end -> prefix_indent_end --- src/uu/fmt/src/parasplit.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index f22400dff20..1ae8ea34f42 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) INFTY MULT PSKIP accum aftertab beforetab breakwords fmt's formatline linebreak linebreaking linebreaks linelen maxlength minlength nchars noformat noformatline ostream overlen parasplit pfxind plass pmatch poffset posn powf prefixindent punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen wordsplits xanti xprefix +// spell-checker:ignore (ToDO) INFTY MULT PSKIP accum aftertab beforetab breakwords fmt's formatline linebreak linebreaking linebreaks linelen maxlength minlength nchars noformat noformatline ostream overlen parasplit plass pmatch poffset posn powf prefixindent punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen wordsplits xanti xprefix use std::io::{BufRead, Lines}; use std::iter::Peekable; @@ -60,7 +60,7 @@ pub struct FileLine { /// The end of the indent, always the start of the text indent_end: usize, /// The end of the PREFIX's indent, that is, the spaces before the prefix - pfxind_end: usize, + prefix_indent_end: usize, /// Display length of indent taking into account tabs indent_len: usize, /// PREFIX indent length taking into account tabs @@ -192,7 +192,7 @@ impl<'a> Iterator for FileLines<'a> { Some(Line::FormatLine(FileLine { line: n, indent_end, - pfxind_end: poffset, + prefix_indent_end: poffset, indent_len, prefix_len, })) @@ -210,7 +210,7 @@ pub struct Paragraph { lines: Vec, /// string representing the init, that is, the first line's indent pub init_str: String, - /// printable length of the init string considering TABWIDTH + /// printable length of the init string considering TABWIDTH pub init_len: usize, /// byte location of end of init in first line String init_end: usize, @@ -299,7 +299,7 @@ impl<'a> Iterator for ParagraphStream<'a> { let mut indent_end = 0; let mut indent_len = 0; let mut prefix_len = 0; - let mut pfxind_end = 0; + let mut prefix_indent_end = 0; let mut p_lines = Vec::new(); let mut in_mail = false; @@ -316,7 +316,7 @@ impl<'a> Iterator for ParagraphStream<'a> { // detect mail header if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { in_mail = true; - // there can't be any indent or pfxind because otherwise is_mail_header + // there can't be any indent or prefixindent because otherwise is_mail_header // would fail since there cannot be any whitespace before the colon in a // valid header field indent_str.push_str(" "); @@ -339,7 +339,7 @@ impl<'a> Iterator for ParagraphStream<'a> { // save these to check for matching lines prefix_len = fl.prefix_len; - pfxind_end = fl.pfxind_end; + prefix_indent_end = fl.prefix_indent_end; // in tagged mode, add 4 spaces of additional indenting by default // (gnu fmt's behavior is different: it seems to find the closest column to @@ -353,14 +353,14 @@ impl<'a> Iterator for ParagraphStream<'a> { } } else if in_mail { // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.pfxind_end == 0) { + if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.prefix_indent_end == 0) { break; // this line does not begin with spaces } } else if !second_done { // now we have enough info to handle crown margin and tagged mode // in both crown and tagged modes we require that prefix_len is the same - if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { + if prefix_len != fl.prefix_len || prefix_indent_end != fl.prefix_indent_end { break; } @@ -382,7 +382,7 @@ impl<'a> Iterator for ParagraphStream<'a> { } else { // detect mismatch if indent_end != fl.indent_end - || pfxind_end != fl.pfxind_end + || prefix_indent_end != fl.prefix_indent_end || indent_len != fl.indent_len || prefix_len != fl.prefix_len { From 7383820354d497b647bd721f3b147d9700676847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dylan=20A=C3=AFssi?= Date: Tue, 28 Nov 2023 16:51:20 +0100 Subject: [PATCH 088/429] uuhelp_parser: include missing LICENSE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dylan Aïssi --- src/uuhelp_parser/LICENSE | 1 + 1 file changed, 1 insertion(+) create mode 120000 src/uuhelp_parser/LICENSE diff --git a/src/uuhelp_parser/LICENSE b/src/uuhelp_parser/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/src/uuhelp_parser/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file From 6eec4fe8f9ce40e95199de2fc67170d2cd11bfae Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 29 Nov 2023 09:35:22 +0100 Subject: [PATCH 089/429] cut: add test & improve error message --- src/uu/cut/src/cut.rs | 2 +- tests/by-util/test_cut.rs | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 05e8bc6e424..0555be14f73 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -426,7 +426,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { delim = ""; } if delim.chars().count() > 1 { - Err("invalid input: The '--delimiter' ('-d') option expects empty or 1 character long, but was provided a value 2 characters or longer".into()) + Err("the delimiter must be a single character".into()) } else { let delim = if delim.is_empty() { "\0".to_owned() diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 184e413a867..112dc0fd3e5 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -126,7 +126,7 @@ fn test_too_large() { } #[test] -fn test_specify_delimiter() { +fn test_delimiter() { for param in ["-d", "--delimiter", "--del"] { new_ucmd!() .args(&[param, ":", "-f", COMPLEX_SEQUENCE.sequence, INPUT]) @@ -135,6 +135,15 @@ fn test_specify_delimiter() { } } +#[test] +fn test_delimiter_with_more_than_one_char() { + new_ucmd!() + .args(&["-d", "ab", "-f1"]) + .fails() + .stderr_contains("cut: the delimiter must be a single character") + .no_stdout(); +} + #[test] fn test_output_delimiter() { // we use -d here to ensure output delimiter From 66e0835e72a72d8ff0a97213b5320fc45973ecbc Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 29 Nov 2023 09:55:55 +0100 Subject: [PATCH 090/429] fuzz the echo command --- fuzz/Cargo.toml | 7 +++ fuzz/fuzz_targets/fuzz_echo.rs | 93 ++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 fuzz/fuzz_targets/fuzz_echo.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 630af4650b6..b27f5b58677 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -17,6 +17,7 @@ uu_date = { path = "../src/uu/date/" } uu_test = { path = "../src/uu/test/" } uu_expr = { path = "../src/uu/expr/" } uu_printf = { path = "../src/uu/printf/" } +uu_echo = { path = "../src/uu/echo/" } # Prevent this from interfering with workspaces @@ -35,6 +36,12 @@ path = "fuzz_targets/fuzz_printf.rs" test = false doc = false +[[bin]] +name = "fuzz_echo" +path = "fuzz_targets/fuzz_echo.rs" +test = false +doc = false + [[bin]] name = "fuzz_expr" path = "fuzz_targets/fuzz_expr.rs" diff --git a/fuzz/fuzz_targets/fuzz_echo.rs b/fuzz/fuzz_targets/fuzz_echo.rs new file mode 100644 index 00000000000..3d810085301 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_echo.rs @@ -0,0 +1,93 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use uu_echo::uumain; // Changed from uu_printf to uu_echo + +use rand::prelude::SliceRandom; +use rand::Rng; +use std::ffi::OsString; + +mod fuzz_common; +use crate::fuzz_common::CommandResult; +use crate::fuzz_common::{ + compare_result, generate_and_run_uumain, generate_random_string, run_gnu_cmd, +}; + +static CMD_PATH: &str = "/usr/bin/echo"; // Changed from "printf" to "echo" + +fn generate_echo() -> String { + let mut rng = rand::thread_rng(); + let mut echo_str = String::new(); + + // Randomly decide whether to include options + let include_n = rng.gen_bool(0.1); // 10% chance + let include_e = rng.gen_bool(0.1); // 10% chance + let include_E = rng.gen_bool(0.1); // 10% chance + // --help and --version are typically not included in fuzzing as they don't change output format + + if include_n { + echo_str.push_str("-n "); + } + if include_e { + echo_str.push_str("-e "); + } + if include_E { + echo_str.push_str("-E "); + } + + // Add a random string + echo_str.push_str(&generate_random_string(rng.gen_range(1..=10))); + + // Include escape sequences if -e is enabled + if include_e { + // Add a 10% chance of including an escape sequence + if rng.gen_bool(0.1) { + echo_str.push_str(&generate_escape_sequence(&mut rng)); // This function should handle echo-specific sequences + } + } + + echo_str +} + +// You should also modify the generate_escape_sequence function to include echo-specific sequences +fn generate_escape_sequence(rng: &mut impl Rng) -> String { + let escape_sequences = [ + "\\\\", "\\a", "\\b", "\\c", "\\e", "\\f", "\\n", "\\r", "\\t", "\\v", + "\\0NNN", // You can randomly generate NNN + "\\xHH", // You can randomly generate HH + // ... other sequences + ]; + escape_sequences.choose(rng).unwrap().to_string() +} + +fuzz_target!(|_data: &[u8]| { + let echo_input = generate_echo(); // Changed from generate_printf to generate_echo + let mut args = vec![OsString::from("echo")]; // Changed from "printf" to "echo" + args.extend(echo_input.split_whitespace().map(OsString::from)); + let rust_result = generate_and_run_uumain(&args, uumain); // uumain function from uu_echo + + let gnu_result = match run_gnu_cmd(CMD_PATH, &args[1..], false) { + Ok(result) => result, + Err(error_result) => { + eprintln!("Failed to run GNU command:"); + eprintln!("Stderr: {}", error_result.stderr); + eprintln!("Exit Code: {}", error_result.exit_code); + CommandResult { + stdout: String::new(), + stderr: error_result.stderr, + exit_code: error_result.exit_code, + } + } + }; + + compare_result( + "echo", + &format!("{:?}", &args[1..]), + &rust_result.stdout, + &gnu_result.stdout, + &rust_result.stderr, + &gnu_result.stderr, + rust_result.exit_code, + gnu_result.exit_code, + false, // Set to true if you want to fail on stderr diff + ); +}); From 203e79d74b406a02ab7202f36c66ed148f11411c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 29 Nov 2023 14:09:13 +0100 Subject: [PATCH 091/429] Fix unused import: `UError` --- src/uu/kill/src/kill.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/kill/src/kill.rs b/src/uu/kill/src/kill.rs index b0e18a79820..f353fd7cab3 100644 --- a/src/uu/kill/src/kill.rs +++ b/src/uu/kill/src/kill.rs @@ -10,7 +10,7 @@ use nix::sys::signal::{self, Signal}; use nix::unistd::Pid; use std::io::Error; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::error::{FromIo, UResult, USimpleError}; use uucore::signals::{signal_by_name_or_value, ALL_SIGNALS}; use uucore::{format_usage, help_about, help_usage, show}; From 420df3db3d14f89c1b11278fc485aac7fd1d9745 Mon Sep 17 00:00:00 2001 From: Laurent Cheylus Date: Wed, 29 Nov 2023 17:44:50 +0100 Subject: [PATCH 092/429] Add support in uucore for OpenBSD - uucore/src/lib/features/fs.rs: add target_os = OpenBSD when needed - uucore/src/lib/features/fsext.rs: implement FsUsage::new for OpenBSD - fixes uutils/coreutils#5448 - initial code by n1000 https://github.com/n1000/coreutils/tree/openbsd_compile_fixes Signed-off-by: Laurent Cheylus --- src/uucore/src/lib/features/fs.rs | 5 +++- src/uucore/src/lib/features/fsext.rs | 39 ++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index de4c0b08dbe..94ca82c8098 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -115,6 +115,7 @@ impl FileInformation { not(target_os = "android"), not(target_os = "freebsd"), not(target_os = "netbsd"), + not(target_os = "openbsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_arch = "aarch64"), @@ -130,6 +131,7 @@ impl FileInformation { target_os = "android", target_os = "freebsd", target_os = "netbsd", + target_os = "openbsd", target_os = "illumos", target_os = "solaris", target_arch = "aarch64", @@ -146,13 +148,14 @@ impl FileInformation { #[cfg(unix)] pub fn inode(&self) -> u64 { #[cfg(all( - not(any(target_os = "freebsd", target_os = "netbsd")), + not(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd")), target_pointer_width = "64" ))] return self.0.st_ino; #[cfg(any( target_os = "freebsd", target_os = "netbsd", + target_os = "openbsd", not(target_pointer_width = "64") ))] return self.0.st_ino.into(); diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 8b1c42de6d1..93fedb44b7c 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -497,7 +497,10 @@ impl FsUsage { #[cfg(unix)] pub fn new(statvfs: StatFs) -> Self { { - #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] + #[cfg(all( + not(any(target_os = "freebsd", target_os = "openbsd")), + target_pointer_width = "64" + ))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks, @@ -507,7 +510,10 @@ impl FsUsage { files: statvfs.f_files, ffree: statvfs.f_ffree, }; - #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] + #[cfg(all( + not(any(target_os = "freebsd", target_os = "openbsd")), + not(target_pointer_width = "64") + ))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks.into(), @@ -530,6 +536,19 @@ impl FsUsage { files: statvfs.f_files, ffree: statvfs.f_ffree.try_into().unwrap(), }; + #[cfg(target_os = "openbsd")] + return Self { + blocksize: statvfs.f_bsize.into(), + blocks: statvfs.f_blocks, + bfree: statvfs.f_bfree, + bavail: statvfs.f_bavail.try_into().unwrap(), + bavail_top_bit_set: ((std::convert::TryInto::::try_into(statvfs.f_bavail) + .unwrap()) + & (1u64.rotate_right(1))) + != 0, + files: statvfs.f_files, + ffree: statvfs.f_ffree, + }; } } #[cfg(not(unix))] @@ -617,6 +636,7 @@ impl FsMeta for StatFs { not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "freebsd"), + not(target_os = "openbsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_arch = "s390x"), @@ -630,6 +650,7 @@ impl FsMeta for StatFs { target_arch = "s390x", target_vendor = "apple", target_os = "android", + target_os = "openbsd", not(target_pointer_width = "64") ) ))] @@ -655,11 +676,19 @@ impl FsMeta for StatFs { return self.f_bfree.into(); } fn avail_blocks(&self) -> u64 { - #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] + #[cfg(all( + not(target_os = "freebsd"), + not(target_os = "openbsd"), + target_pointer_width = "64" + ))] return self.f_bavail; - #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] + #[cfg(all( + not(target_os = "freebsd"), + not(target_os = "openbsd"), + not(target_pointer_width = "64") + ))] return self.f_bavail.into(); - #[cfg(target_os = "freebsd")] + #[cfg(any(target_os = "freebsd", target_os = "openbsd"))] return self.f_bavail.try_into().unwrap(); } fn total_file_nodes(&self) -> u64 { From 173153122b255bb57afbbad1872a26ebe9050c16 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 21:49:08 +0000 Subject: [PATCH 093/429] chore(deps): update rust crate lscolors to 0.16.0 --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d7682f839e4..3f5e4280964 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1240,9 +1240,9 @@ dependencies = [ [[package]] name = "lscolors" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7015a04103ad78abb77e4b79ed151e767922d1cfde5f62640471c629a2320d" +checksum = "ab0b209ec3976527806024406fe765474b9a1750a0ed4b8f0372364741f50e7b" dependencies = [ "nu-ansi-term", ] diff --git a/Cargo.toml b/Cargo.toml index ba701b2d5e5..14e700ee0a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -287,7 +287,7 @@ half = "2.3" indicatif = "0.17" itertools = "0.12.0" libc = "0.2.150" -lscolors = { version = "0.15.0", default-features = false, features = [ +lscolors = { version = "0.16.0", default-features = false, features = [ "nu-ansi-term", ] } memchr = "2" From 4d5c034eb10d0a07098abb75ddd5771b6d444c05 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 29 Nov 2023 14:11:43 +0100 Subject: [PATCH 094/429] Run the echo fuzzer in the CI --- .github/workflows/fuzzing.yml | 1 + fuzz/fuzz_targets/fuzz_echo.rs | 21 ++++++++------------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 2274f6905c1..cc1547c87a6 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -42,6 +42,7 @@ jobs: - { name: fuzz_date, should_pass: false } - { name: fuzz_expr, should_pass: true } - { name: fuzz_printf, should_pass: false } + - { name: fuzz_echo, should_pass: false } - { name: fuzz_parse_glob, should_pass: true } - { name: fuzz_parse_size, should_pass: true } - { name: fuzz_parse_time, should_pass: true } diff --git a/fuzz/fuzz_targets/fuzz_echo.rs b/fuzz/fuzz_targets/fuzz_echo.rs index 3d810085301..826fd6da3eb 100644 --- a/fuzz/fuzz_targets/fuzz_echo.rs +++ b/fuzz/fuzz_targets/fuzz_echo.rs @@ -1,6 +1,6 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use uu_echo::uumain; // Changed from uu_printf to uu_echo +use uu_echo::uumain; use rand::prelude::SliceRandom; use rand::Rng; @@ -12,7 +12,7 @@ use crate::fuzz_common::{ compare_result, generate_and_run_uumain, generate_random_string, run_gnu_cmd, }; -static CMD_PATH: &str = "/usr/bin/echo"; // Changed from "printf" to "echo" +static CMD_PATH: &str = "echo"; fn generate_echo() -> String { let mut rng = rand::thread_rng(); @@ -22,7 +22,6 @@ fn generate_echo() -> String { let include_n = rng.gen_bool(0.1); // 10% chance let include_e = rng.gen_bool(0.1); // 10% chance let include_E = rng.gen_bool(0.1); // 10% chance - // --help and --version are typically not included in fuzzing as they don't change output format if include_n { echo_str.push_str("-n "); @@ -41,29 +40,25 @@ fn generate_echo() -> String { if include_e { // Add a 10% chance of including an escape sequence if rng.gen_bool(0.1) { - echo_str.push_str(&generate_escape_sequence(&mut rng)); // This function should handle echo-specific sequences + echo_str.push_str(&generate_escape_sequence(&mut rng)); } } echo_str } -// You should also modify the generate_escape_sequence function to include echo-specific sequences fn generate_escape_sequence(rng: &mut impl Rng) -> String { let escape_sequences = [ - "\\\\", "\\a", "\\b", "\\c", "\\e", "\\f", "\\n", "\\r", "\\t", "\\v", - "\\0NNN", // You can randomly generate NNN - "\\xHH", // You can randomly generate HH - // ... other sequences + "\\\\", "\\a", "\\b", "\\c", "\\e", "\\f", "\\n", "\\r", "\\t", "\\v", "\\0NNN", "\\xHH", ]; escape_sequences.choose(rng).unwrap().to_string() } fuzz_target!(|_data: &[u8]| { - let echo_input = generate_echo(); // Changed from generate_printf to generate_echo - let mut args = vec![OsString::from("echo")]; // Changed from "printf" to "echo" + let echo_input = generate_echo(); + let mut args = vec![OsString::from("echo")]; args.extend(echo_input.split_whitespace().map(OsString::from)); - let rust_result = generate_and_run_uumain(&args, uumain); // uumain function from uu_echo + let rust_result = generate_and_run_uumain(&args, uumain); let gnu_result = match run_gnu_cmd(CMD_PATH, &args[1..], false) { Ok(result) => result, @@ -88,6 +83,6 @@ fuzz_target!(|_data: &[u8]| { &gnu_result.stderr, rust_result.exit_code, gnu_result.exit_code, - false, // Set to true if you want to fail on stderr diff + true, ); }); From 9061b2ba7e1267cd6c8466108ecb197614ce29ea Mon Sep 17 00:00:00 2001 From: clara swanson <69856940+cswn@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:01:31 +0100 Subject: [PATCH 095/429] libstdbuf: remove crash macro (#5565) * libstdbuf: remove crash macro * libstdbuf: remove uucore macro/struct and use gnu messages * libstdbuf: remove crash macro * libstdbuf: remove uucore macro/struct and use gnu messages * libstdbuf: remove :? from print by printing file descriptor instead of file * merge main into libstdbuf-remove-crash-macro * libstdbuf: remove uucore from dependencies --- Cargo.lock | 1 - src/uu/stdbuf/src/libstdbuf/Cargo.toml | 1 - src/uu/stdbuf/src/libstdbuf/src/libstdbuf.rs | 14 ++++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f5e4280964..bf638b421a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2952,7 +2952,6 @@ dependencies = [ "cpp", "cpp_build", "libc", - "uucore", ] [[package]] diff --git a/src/uu/stdbuf/src/libstdbuf/Cargo.toml b/src/uu/stdbuf/src/libstdbuf/Cargo.toml index be97c47aeaf..eaa82e6e73d 100644 --- a/src/uu/stdbuf/src/libstdbuf/Cargo.toml +++ b/src/uu/stdbuf/src/libstdbuf/Cargo.toml @@ -22,7 +22,6 @@ crate-type = [ [dependencies] cpp = "0.5" libc = { workspace = true } -uucore = { version = ">=0.0.19", package = "uucore", path = "../../../../uucore" } [build-dependencies] cpp_build = "0.5" diff --git a/src/uu/stdbuf/src/libstdbuf/src/libstdbuf.rs b/src/uu/stdbuf/src/libstdbuf/src/libstdbuf.rs index a29d01b78f3..d744ca4c545 100644 --- a/src/uu/stdbuf/src/libstdbuf/src/libstdbuf.rs +++ b/src/uu/stdbuf/src/libstdbuf/src/libstdbuf.rs @@ -5,10 +5,9 @@ // spell-checker:ignore (ToDO) IOFBF IOLBF IONBF cstdio setvbuf use cpp::cpp; -use libc::{c_char, c_int, size_t, FILE, _IOFBF, _IOLBF, _IONBF}; +use libc::{c_char, c_int, fileno, size_t, FILE, _IOFBF, _IOLBF, _IONBF}; use std::env; use std::ptr; -use uucore::crash; cpp! {{ #include @@ -40,7 +39,10 @@ fn set_buffer(stream: *mut FILE, value: &str) { input => { let buff_size: usize = match input.parse() { Ok(num) => num, - Err(e) => crash!(1, "incorrect size of buffer!: {}", e), + Err(_) => { + eprintln!("failed to allocate a {} byte stdio buffer", value); + std::process::exit(1); + } }; (_IOFBF, buff_size as size_t) } @@ -52,7 +54,11 @@ fn set_buffer(stream: *mut FILE, value: &str) { res = libc::setvbuf(stream, buffer, mode, size); } if res != 0 { - crash!(res, "error while calling setvbuf!"); + eprintln!( + "could not set buffering of {} to mode {}", + unsafe { fileno(stream) }, + mode + ); } } From 0ec6802459bf79d6f36c0289aa1cb262c40537ba Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 30 Nov 2023 16:13:54 +0100 Subject: [PATCH 096/429] ls: fix padding of size column when using -l --- src/uu/ls/src/ls.rs | 2 +- tests/by-util/test_ls.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 88af56bb186..cba9cdf5375 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -3228,7 +3228,7 @@ fn calculate_padding_collection( padding_collections.minor = minor_len.max(padding_collections.minor); padding_collections.size = size_len .max(padding_collections.size) - .max(padding_collections.major + padding_collections.minor + 2usize); + .max(padding_collections.major); } } } diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 07ea8c9cd63..19a3f5578fa 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1023,6 +1023,21 @@ fn test_ls_long_format() { ).unwrap()); } +#[test] +fn test_ls_long_padding_of_size_column_with_multiple_files() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("dir"); + at.touch("dir/a"); + at.touch("dir/b"); + + ucmd.arg("-l") + .arg("dir") + .succeeds() + .stdout_contains(" 0 ") + .stdout_does_not_contain(" 0 "); +} + /// This test tests `ls -laR --color`. /// This test is mainly about coloring, but, the recursion, symlink `->` processing, /// and `.` and `..` being present in `-a` all need to work for the test to pass. From 8d591a7acce618ab3a9290f50487e9c0de22c5ed Mon Sep 17 00:00:00 2001 From: Piotr Kwiecinski Date: Thu, 30 Nov 2023 17:59:48 +0100 Subject: [PATCH 097/429] Bump freebsd-vm action to v1.0.2 & use ubuntu --- .github/workflows/freebsd.yml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 5af3da320a7..b932f9aa8f5 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: job: - - { os: macos-12 , features: unix } ## GHA MacOS-11.0 VM won't have VirtualBox; refs: , + - { os: ubuntu-22.04 , features: unix } env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" @@ -35,9 +35,11 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v0.3.1 + uses: vmactions/freebsd-vm@v1.0.2 with: usesh: true + sync: rsync + copyback: false # We need jq to run show-utils.sh and bash to use inline shell string replacement prepare: pkg install -y curl sudo jq bash run: | @@ -48,11 +50,11 @@ jobs: # TEST_USER=tester REPO_NAME=${GITHUB_WORKSPACE##*/} - WORKSPACE_PARENT="/Users/runner/work/${REPO_NAME}" + WORKSPACE_PARENT="/home/runner/work/${REPO_NAME}" WORKSPACE="${WORKSPACE_PARENT}/${REPO_NAME}" # pw adduser -n ${TEST_USER} -d /root/ -g wheel -c "Coreutils user to build" -w random - chown -R ${TEST_USER}:wheel /root/ "/Users/runner/work/${REPO_NAME}"/ + chown -R ${TEST_USER}:wheel /root/ "${WORKSPACE_PARENT}"/ whoami # # Further work needs to be done in a sudo as we are changing users @@ -114,7 +116,7 @@ jobs: fail-fast: false matrix: job: - - { os: macos-12 , features: unix } ## GHA MacOS-11.0 VM won't have VirtualBox; refs: , + - { os: ubuntu-22.04 , features: unix } env: mem: 4096 SCCACHE_GHA_ENABLED: "true" @@ -125,10 +127,11 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v0.3.1 + uses: vmactions/freebsd-vm@v1.0.2 with: usesh: true - # sync: sshfs + sync: rsync + copyback: false prepare: pkg install -y curl gmake sudo run: | ## Prepare, build, and test @@ -141,12 +144,12 @@ jobs: # TEST_USER=tester REPO_NAME=${GITHUB_WORKSPACE##*/} - WORKSPACE_PARENT="/Users/runner/work/${REPO_NAME}" + WORKSPACE_PARENT="/home/runner/work/${REPO_NAME}" WORKSPACE="${WORKSPACE_PARENT}/${REPO_NAME}" # pw adduser -n ${TEST_USER} -d /root/ -g wheel -c "Coreutils user to build" -w random # chown -R ${TEST_USER}:wheel /root/ "${WORKSPACE_PARENT}"/ - chown -R ${TEST_USER}:wheel /root/ "/Users/runner/work/${REPO_NAME}"/ + chown -R ${TEST_USER}:wheel /root/ "${WORKSPACE_PARENT}"/ whoami # # Further work needs to be done in a sudo as we are changing users From c6e7fdcabe46e9bcd903567900c11d7e230f9c1f Mon Sep 17 00:00:00 2001 From: Piotr Kwiecinski Date: Thu, 30 Nov 2023 18:45:06 +0100 Subject: [PATCH 098/429] skip test_cp_arg_update_interactive on FreeBSD --- tests/by-util/test_cp.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 14b68da3718..36bca68b0d0 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -257,6 +257,8 @@ fn test_cp_target_directory_is_file() { } #[test] +// FixMe: for FreeBSD, flaky test; track repair progress at GH:uutils/coreutils/issue/4725 +#[cfg(not(target_os = "freebsd"))] fn test_cp_arg_update_interactive() { new_ucmd!() .arg(TEST_HELLO_WORLD_SOURCE) From 6b3f00cc3192624f2e0f763fef668de3cc9ab25d Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 1 Dec 2023 10:03:44 +0100 Subject: [PATCH 099/429] dd: skip two tests without "printf" feature --- tests/by-util/test_dd.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index a4c70097c8b..bd43ad077d3 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -15,7 +15,12 @@ use regex::Regex; use std::fs::{File, OpenOptions}; use std::io::{BufReader, Read, Write}; use std::path::PathBuf; -#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +#[cfg(all( + unix, + not(target_os = "macos"), + not(target_os = "freebsd"), + feature = "printf" +))] use std::process::{Command, Stdio}; #[cfg(not(windows))] use std::thread::sleep; @@ -1586,7 +1591,12 @@ fn test_seek_past_dev() { } #[test] -#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +#[cfg(all( + unix, + not(target_os = "macos"), + not(target_os = "freebsd"), + feature = "printf" +))] fn test_reading_partial_blocks_from_fifo() { // Create the FIFO. let ts = TestScenario::new(util_name!()); @@ -1622,7 +1632,12 @@ fn test_reading_partial_blocks_from_fifo() { } #[test] -#[cfg(all(unix, not(target_os = "macos"), not(target_os = "freebsd")))] +#[cfg(all( + unix, + not(target_os = "macos"), + not(target_os = "freebsd"), + feature = "printf" +))] fn test_reading_partial_blocks_from_fifo_unbuffered() { // Create the FIFO. let ts = TestScenario::new(util_name!()); From 4d2bdf497ab030871a7a84191010637a355ffd7e Mon Sep 17 00:00:00 2001 From: Piotr Kwiecinski Date: Fri, 1 Dec 2023 11:52:23 +0100 Subject: [PATCH 100/429] prevent CI creating 2 events on each pull request push --- .github/workflows/CICD.yml | 6 +++++- .github/workflows/GnuTests.yml | 6 +++++- .github/workflows/android.yml | 7 ++++++- .github/workflows/code-quality.yml | 6 +++++- .github/workflows/freebsd.yml | 6 +++++- .github/workflows/fuzzing.yml | 6 +++++- 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 8dfa0b1d103..8c96ce693ee 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -15,7 +15,11 @@ env: # * style job configuration STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 61f30eba4c1..87e4373eddc 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -9,7 +9,11 @@ name: GnuTests # * note: to run a single test => `REPO/util/run-gnu-test.sh PATH/TO/TEST/SCRIPT` -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main permissions: contents: read diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 5834aceffe8..69ca970c0fa 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -2,7 +2,12 @@ name: Android # spell-checker:ignore TERMUX reactivecircus Swatinem noaudio pkill swiftshader dtolnay juliangruber -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main + permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 98691f34bc8..289830f8171 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -2,7 +2,11 @@ name: Code Quality # spell-checker:ignore TERMUX reactivecircus Swatinem noaudio pkill swiftshader dtolnay juliangruber -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main env: # * style job configuration diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index b932f9aa8f5..02c0137e7b2 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -6,7 +6,11 @@ env: # * style job configuration STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main permissions: contents: read # to fetch code (actions/checkout) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 2274f6905c1..4e47ea0e281 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -2,7 +2,11 @@ name: Fuzzing # spell-checker:ignore fuzzer -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main permissions: contents: read # to fetch code (actions/checkout) From 13a4c9114c4574ee8d14ffe0bb6b326ad203375d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 2 Dec 2023 10:28:45 +0100 Subject: [PATCH 101/429] create datastructures for colors --- src/uucore/src/lib/features.rs | 2 + src/uucore/src/lib/features/colors.rs | 225 ++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 src/uucore/src/lib/features/colors.rs diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 1d0d437824d..a28e8a7bfc9 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -6,6 +6,8 @@ #[cfg(feature = "backup-control")] pub mod backup_control; +#[cfg(feature = "colors")] +pub mod colors; #[cfg(feature = "encoding")] pub mod encoding; #[cfg(feature = "format")] diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs new file mode 100644 index 00000000000..58b0b757084 --- /dev/null +++ b/src/uucore/src/lib/features/colors.rs @@ -0,0 +1,225 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +/* The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the + * slackware version of dircolors) are recognized but ignored. + * Global config options can be specified before TERM or COLORTERM entries + * below are TERM or COLORTERM entries, which can be glob patterns, which + * restrict following config to systems with matching environment variables. + * COLORTERM ?* +*/ + +static TERMS: &[&str] = &[ + "Eterm", + "ansi", + "*color*", + "con[0-9]*x[0-9]*", + "cons25", + "console", + "cygwin", + "*direct*", + "dtterm", + "gnome", + "hurd", + "jfbterm", + "konsole", + "kterm", + "linux", + "linux-c", + "mlterm", + "putty", + "rxvt*", + "screen*", + "st", + "terminator", + "tmux*", + "vt100", + "xterm*", +]; + +/* +# Below are the color init strings for the basic file types. +# One can use codes for 256 or more colors supported by modern terminals. +# The default color codes use the capabilities of an 8 color terminal +# with some additional attributes as per the following codes: +# Attribute codes: +# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed +# Text color codes: +# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white +# Background color codes: +# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white +#NORMAL 00 # no color code at all +#FILE 00 # regular file: use no color at all +*/ +static FILE_TYPES: &[(&str, &str)] = &[ + ("RESET", "0"), // reset to "normal" color + ("DIR", "01;34"), // directory + ("LINK", "01;36"), // symbolic link + ("MULTIHARDLINK", "00"), // regular file with more than one link + ("FIFO", "40;33"), // pipe + ("SOCK", "01;35"), // socket + ("DOOR", "01;35"), // door + ("BLK", "40;33;01"), // block device driver + ("CHR", "40;33;01"), // character device driver + ("ORPHAN", "40;31;01"), // symlink to nonexistent file, or non-stat'able file + ("MISSING", "00"), // ... and the files they point to + ("SETUID", "37;41"), // file that is setuid (u+s) + ("SETGID", "30;43"), // file that is setgid (g+s) + ("CAPABILITY", "00"), // file with capability + ("STICKY_OTHER_WRITABLE", "30;42"), // dir that is sticky and other-writable (+t,o+w) + ("OTHER_WRITABLE", "34;42"), // dir that is other-writable (o+w) and not sticky + ("STICKY", "37;44"), // dir with the sticky bit set (+t) and not other-writable + ("EXEC", "01;32"), // files with execute permission +]; + +/* +# List any file extensions like '.gz' or '.tar' that you would like ls +# to color below. Put the extension, a space, and the color init string. +# (and any comments you want to add after a '#') +*/ +static FILE_COLORS: &[(&str, &str)] = &[ + // Executables (Windows) + (".cmd", "01;32"), + (".exe", "01;32"), + (".com", "01;32"), + (".btm", "01;32"), + (".bat", "01;32"), + (".sh", "01;32"), + (".csh", "01;32"), + // Archives or compressed + (".tar", "01;31"), + (".tgz", "01;31"), + (".arc", "01;31"), + (".arj", "01;31"), + (".taz", "01;31"), + (".lha", "01;31"), + (".lz4", "01;31"), + (".lzh", "01;31"), + (".lzma", "01;31"), + (".tlz", "01;31"), + (".txz", "01;31"), + (".tzo", "01;31"), + (".t7z", "01;31"), + (".zip", "01;31"), + (".z", "01;31"), + (".dz", "01;31"), + (".gz", "01;31"), + (".lrz", "01;31"), + (".lz", "01;31"), + (".lzo", "01;31"), + (".xz", "01;31"), + (".zst", "01;31"), + (".tzst", "01;31"), + (".bz2", "01;31"), + (".bz", "01;31"), + (".tbz", "01;31"), + (".tbz2", "01;31"), + (".tz", "01;31"), + (".deb", "01;31"), + (".rpm", "01;31"), + (".jar", "01;31"), + (".war", "01;31"), + (".ear", "01;31"), + (".sar", "01;31"), + (".rar", "01;31"), + (".alz", "01;31"), + (".ace", "01;31"), + (".zoo", "01;31"), + (".cpio", "01;31"), + (".7z", "01;31"), + (".rz", "01;31"), + (".cab", "01;31"), + (".wim", "01;31"), + (".swm", "01;31"), + (".dwm", "01;31"), + (".esd", "01;31"), + // Image formats + (".avif", "01;35"), + (".jpg", "01;35"), + (".jpeg", "01;35"), + (".mjpg", "01;35"), + (".mjpeg", "01;35"), + (".gif", "01;35"), + (".bmp", "01;35"), + (".pbm", "01;35"), + (".pgm", "01;35"), + (".ppm", "01;35"), + (".tga", "01;35"), + (".xbm", "01;35"), + (".xpm", "01;35"), + (".tif", "01;35"), + (".tiff", "01;35"), + (".png", "01;35"), + (".svg", "01;35"), + (".svgz", "01;35"), + (".mng", "01;35"), + (".pcx", "01;35"), + (".mov", "01;35"), + (".mpg", "01;35"), + (".mpeg", "01;35"), + (".m2v", "01;35"), + (".mkv", "01;35"), + (".webm", "01;35"), + (".webp", "01;35"), + (".ogm", "01;35"), + (".mp4", "01;35"), + (".m4v", "01;35"), + (".mp4v", "01;35"), + (".vob", "01;35"), + (".qt", "01;35"), + (".nuv", "01;35"), + (".wmv", "01;35"), + (".asf", "01;35"), + (".rm", "01;35"), + (".rmvb", "01;35"), + (".flc", "01;35"), + (".avi", "01;35"), + (".fli", "01;35"), + (".flv", "01;35"), + (".gl", "01;35"), + (".dl", "01;35"), + (".xcf", "01;35"), + (".xwd", "01;35"), + (".yuv", "01;35"), + (".cgm", "01;35"), + (".emf", "01;35"), + (".ogv", "01;35"), + (".ogx", "01;35"), + // Audio formats + (".aac", "00;36"), + (".au", "00;36"), + (".flac", "00;36"), + (".m4a", "00;36"), + (".mid", "00;36"), + (".midi", "00;36"), + (".mka", "00;36"), + (".mp3", "00;36"), + (".mpc", "00;36"), + (".ogg", "00;36"), + (".ra", "00;36"), + (".wav", "00;36"), + (".oga", "00;36"), + (".opus", "00;36"), + (".spx", "00;36"), + (".xspf", "00;36"), + // Backup files + ("*~", "00;90"), + ("*#", "00;90"), + (".bak", "00;90"), + (".old", "00;90"), + (".orig", "00;90"), + (".part", "00;90"), + (".rej", "00;90"), + (".swp", "00;90"), + (".tmp", "00;90"), + (".dpkg-dist", "00;90"), + (".dpkg-old", "00;90"), + (".ucf-dist", "00;90"), + (".ucf-new", "00;90"), + (".ucf-old", "00;90"), + (".rpmnew", "00;90"), + (".rpmorig", "00;90"), + (".rpmsave", "00;90"), +]; From 0e8c171c80ab35f4d16a2613700648ec059ebaa5 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 2 Dec 2023 11:00:55 +0100 Subject: [PATCH 102/429] dircolors: move the FILE_ATTRIBUTE_CODES datastructures and use it --- src/uu/dircolors/Cargo.toml | 2 +- src/uu/dircolors/src/dircolors.rs | 43 +-- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features/colors.rs | 458 +++++++++++++++----------- src/uucore/src/lib/lib.rs | 2 + 5 files changed, 273 insertions(+), 233 deletions(-) diff --git a/src/uu/dircolors/Cargo.toml b/src/uu/dircolors/Cargo.toml index 6099b5a8428..66ee792f839 100644 --- a/src/uu/dircolors/Cargo.toml +++ b/src/uu/dircolors/Cargo.toml @@ -16,7 +16,7 @@ path = "src/dircolors.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true } +uucore = { workspace = true, features = ["colors"] } [[bin]] name = "dircolors" diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 2e3087d810b..58228ddeb57 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -12,6 +12,7 @@ use std::io::{BufRead, BufReader}; use std::path::Path; use clap::{crate_version, Arg, ArgAction, Command}; +use uucore::colors::FILE_ATTRIBUTE_CODES; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::{help_about, help_section, help_usage}; @@ -276,7 +277,6 @@ enum ParseState { Pass, } -use std::collections::HashMap; use uucore::{format_usage, parse_glob}; #[allow(clippy::cognitive_complexity)] @@ -294,45 +294,6 @@ where OutputFmt::Unknown => unreachable!(), } - let mut table: HashMap<&str, &str> = HashMap::with_capacity(48); - table.insert("normal", "no"); - table.insert("norm", "no"); - table.insert("file", "fi"); - table.insert("reset", "rs"); - table.insert("dir", "di"); - table.insert("lnk", "ln"); - table.insert("link", "ln"); - table.insert("symlink", "ln"); - table.insert("orphan", "or"); - table.insert("missing", "mi"); - table.insert("fifo", "pi"); - table.insert("pipe", "pi"); - table.insert("sock", "so"); - table.insert("blk", "bd"); - table.insert("block", "bd"); - table.insert("chr", "cd"); - table.insert("char", "cd"); - table.insert("door", "do"); - table.insert("exec", "ex"); - table.insert("left", "lc"); - table.insert("leftcode", "lc"); - table.insert("right", "rc"); - table.insert("rightcode", "rc"); - table.insert("end", "ec"); - table.insert("endcode", "ec"); - table.insert("suid", "su"); - table.insert("setuid", "su"); - table.insert("sgid", "sg"); - table.insert("setgid", "sg"); - table.insert("sticky", "st"); - table.insert("other_writable", "ow"); - table.insert("owr", "ow"); - table.insert("sticky_other_writable", "tw"); - table.insert("owt", "tw"); - table.insert("capability", "ca"); - table.insert("multihardlink", "mh"); - table.insert("clrtoeol", "cl"); - let term = env::var("TERM").unwrap_or_else(|_| "none".to_owned()); let term = term.as_str(); @@ -384,7 +345,7 @@ where } } else if lower == "options" || lower == "color" || lower == "eightbit" { // Slackware only. Ignore - } else if let Some(s) = table.get(lower.as_str()) { + } else if let Some(s) = FILE_ATTRIBUTE_CODES.get(lower.as_str()) { if *fmt == OutputFmt::Display { result.push_str(format!("\x1b[{val}m{s}\t{val}\x1b[0m\n").as_str()); } else { diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index b43445b4a92..44f8bb2d13f 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -72,6 +72,7 @@ windows-sys = { workspace = true, optional = true, default-features = false, fea default = [] # * non-default features backup-control = [] +colors = [] encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"] entries = ["libc"] fs = ["dunce", "libc", "winapi-util", "windows-sys"] diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 58b0b757084..69be16ba291 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -3,6 +3,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use once_cell::sync::Lazy; +use std::collections::HashMap; + /* The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the * slackware version of dircolors) are recognized but ignored. * Global config options can be specified before TERM or COLORTERM entries @@ -11,33 +14,41 @@ * COLORTERM ?* */ -static TERMS: &[&str] = &[ - "Eterm", - "ansi", - "*color*", - "con[0-9]*x[0-9]*", - "cons25", - "console", - "cygwin", - "*direct*", - "dtterm", - "gnome", - "hurd", - "jfbterm", - "konsole", - "kterm", - "linux", - "linux-c", - "mlterm", - "putty", - "rxvt*", - "screen*", - "st", - "terminator", - "tmux*", - "vt100", - "xterm*", -]; +pub static TERMS: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + [ + "Eterm", + "ansi", + "*color*", + "con[0-9]*x[0-9]*", + "cons25", + "console", + "cygwin", + "*direct*", + "dtterm", + "gnome", + "hurd", + "jfbterm", + "konsole", + "kterm", + "linux", + "linux-c", + "mlterm", + "putty", + "rxvt*", + "screen*", + "st", + "terminator", + "tmux*", + "vt100", + "xterm*", + ] + .iter() + .for_each(|&term| { + m.insert(term, ""); + }); + m +}); /* # Below are the color init strings for the basic file types. @@ -53,173 +64,238 @@ static TERMS: &[&str] = &[ #NORMAL 00 # no color code at all #FILE 00 # regular file: use no color at all */ -static FILE_TYPES: &[(&str, &str)] = &[ - ("RESET", "0"), // reset to "normal" color - ("DIR", "01;34"), // directory - ("LINK", "01;36"), // symbolic link - ("MULTIHARDLINK", "00"), // regular file with more than one link - ("FIFO", "40;33"), // pipe - ("SOCK", "01;35"), // socket - ("DOOR", "01;35"), // door - ("BLK", "40;33;01"), // block device driver - ("CHR", "40;33;01"), // character device driver - ("ORPHAN", "40;31;01"), // symlink to nonexistent file, or non-stat'able file - ("MISSING", "00"), // ... and the files they point to - ("SETUID", "37;41"), // file that is setuid (u+s) - ("SETGID", "30;43"), // file that is setgid (g+s) - ("CAPABILITY", "00"), // file with capability - ("STICKY_OTHER_WRITABLE", "30;42"), // dir that is sticky and other-writable (+t,o+w) - ("OTHER_WRITABLE", "34;42"), // dir that is other-writable (o+w) and not sticky - ("STICKY", "37;44"), // dir with the sticky bit set (+t) and not other-writable - ("EXEC", "01;32"), // files with execute permission -]; +// FILE_TYPES with Lazy initialization +pub static FILE_TYPES: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + [ + ("RESET", "0"), // reset to "normal" color + ("DIR", "01;34"), // directory + ("LINK", "01;36"), // symbolic link + ("MULTIHARDLINK", "00"), // regular file with more than one link + ("FIFO", "40;33"), // pipe + ("SOCK", "01;35"), // socket + ("DOOR", "01;35"), // door + ("BLK", "40;33;01"), // block device driver + ("CHR", "40;33;01"), // character device driver + ("ORPHAN", "40;31;01"), // symlink to nonexistent file, or non-stat'able file + ("MISSING", "00"), // ... and the files they point to + ("SETUID", "37;41"), // file that is setuid (u+s) + ("SETGID", "30;43"), // file that is setgid (g+s) + ("CAPABILITY", "00"), // file with capability + ("STICKY_OTHER_WRITABLE", "30;42"), // dir that is sticky and other-writable (+t,o+w) + ("OTHER_WRITABLE", "34;42"), // dir that is other-writable (o+w) and not sticky + ("STICKY", "37;44"), // dir with the sticky bit set (+t) and not other-writable + ("EXEC", "01;32"), // files with execute permission + ] + .iter() + .for_each(|&(k, v)| { + m.insert(k, v); + }); + m +}); /* # List any file extensions like '.gz' or '.tar' that you would like ls # to color below. Put the extension, a space, and the color init string. # (and any comments you want to add after a '#') */ -static FILE_COLORS: &[(&str, &str)] = &[ - // Executables (Windows) - (".cmd", "01;32"), - (".exe", "01;32"), - (".com", "01;32"), - (".btm", "01;32"), - (".bat", "01;32"), - (".sh", "01;32"), - (".csh", "01;32"), - // Archives or compressed - (".tar", "01;31"), - (".tgz", "01;31"), - (".arc", "01;31"), - (".arj", "01;31"), - (".taz", "01;31"), - (".lha", "01;31"), - (".lz4", "01;31"), - (".lzh", "01;31"), - (".lzma", "01;31"), - (".tlz", "01;31"), - (".txz", "01;31"), - (".tzo", "01;31"), - (".t7z", "01;31"), - (".zip", "01;31"), - (".z", "01;31"), - (".dz", "01;31"), - (".gz", "01;31"), - (".lrz", "01;31"), - (".lz", "01;31"), - (".lzo", "01;31"), - (".xz", "01;31"), - (".zst", "01;31"), - (".tzst", "01;31"), - (".bz2", "01;31"), - (".bz", "01;31"), - (".tbz", "01;31"), - (".tbz2", "01;31"), - (".tz", "01;31"), - (".deb", "01;31"), - (".rpm", "01;31"), - (".jar", "01;31"), - (".war", "01;31"), - (".ear", "01;31"), - (".sar", "01;31"), - (".rar", "01;31"), - (".alz", "01;31"), - (".ace", "01;31"), - (".zoo", "01;31"), - (".cpio", "01;31"), - (".7z", "01;31"), - (".rz", "01;31"), - (".cab", "01;31"), - (".wim", "01;31"), - (".swm", "01;31"), - (".dwm", "01;31"), - (".esd", "01;31"), - // Image formats - (".avif", "01;35"), - (".jpg", "01;35"), - (".jpeg", "01;35"), - (".mjpg", "01;35"), - (".mjpeg", "01;35"), - (".gif", "01;35"), - (".bmp", "01;35"), - (".pbm", "01;35"), - (".pgm", "01;35"), - (".ppm", "01;35"), - (".tga", "01;35"), - (".xbm", "01;35"), - (".xpm", "01;35"), - (".tif", "01;35"), - (".tiff", "01;35"), - (".png", "01;35"), - (".svg", "01;35"), - (".svgz", "01;35"), - (".mng", "01;35"), - (".pcx", "01;35"), - (".mov", "01;35"), - (".mpg", "01;35"), - (".mpeg", "01;35"), - (".m2v", "01;35"), - (".mkv", "01;35"), - (".webm", "01;35"), - (".webp", "01;35"), - (".ogm", "01;35"), - (".mp4", "01;35"), - (".m4v", "01;35"), - (".mp4v", "01;35"), - (".vob", "01;35"), - (".qt", "01;35"), - (".nuv", "01;35"), - (".wmv", "01;35"), - (".asf", "01;35"), - (".rm", "01;35"), - (".rmvb", "01;35"), - (".flc", "01;35"), - (".avi", "01;35"), - (".fli", "01;35"), - (".flv", "01;35"), - (".gl", "01;35"), - (".dl", "01;35"), - (".xcf", "01;35"), - (".xwd", "01;35"), - (".yuv", "01;35"), - (".cgm", "01;35"), - (".emf", "01;35"), - (".ogv", "01;35"), - (".ogx", "01;35"), - // Audio formats - (".aac", "00;36"), - (".au", "00;36"), - (".flac", "00;36"), - (".m4a", "00;36"), - (".mid", "00;36"), - (".midi", "00;36"), - (".mka", "00;36"), - (".mp3", "00;36"), - (".mpc", "00;36"), - (".ogg", "00;36"), - (".ra", "00;36"), - (".wav", "00;36"), - (".oga", "00;36"), - (".opus", "00;36"), - (".spx", "00;36"), - (".xspf", "00;36"), - // Backup files - ("*~", "00;90"), - ("*#", "00;90"), - (".bak", "00;90"), - (".old", "00;90"), - (".orig", "00;90"), - (".part", "00;90"), - (".rej", "00;90"), - (".swp", "00;90"), - (".tmp", "00;90"), - (".dpkg-dist", "00;90"), - (".dpkg-old", "00;90"), - (".ucf-dist", "00;90"), - (".ucf-new", "00;90"), - (".ucf-old", "00;90"), - (".rpmnew", "00;90"), - (".rpmorig", "00;90"), - (".rpmsave", "00;90"), -]; +pub static FILE_COLORS: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + [ + // Executables (Windows) + (".cmd", "01;32"), + (".exe", "01;32"), + (".com", "01;32"), + (".btm", "01;32"), + (".bat", "01;32"), + (".sh", "01;32"), + (".csh", "01;32"), + // Archives or compressed + (".tar", "01;31"), + (".tgz", "01;31"), + (".arc", "01;31"), + (".arj", "01;31"), + (".taz", "01;31"), + (".lha", "01;31"), + (".lz4", "01;31"), + (".lzh", "01;31"), + (".lzma", "01;31"), + (".tlz", "01;31"), + (".txz", "01;31"), + (".tzo", "01;31"), + (".t7z", "01;31"), + (".zip", "01;31"), + (".z", "01;31"), + (".dz", "01;31"), + (".gz", "01;31"), + (".lrz", "01;31"), + (".lz", "01;31"), + (".lzo", "01;31"), + (".xz", "01;31"), + (".zst", "01;31"), + (".tzst", "01;31"), + (".bz2", "01;31"), + (".bz", "01;31"), + (".tbz", "01;31"), + (".tbz2", "01;31"), + (".tz", "01;31"), + (".deb", "01;31"), + (".rpm", "01;31"), + (".jar", "01;31"), + (".war", "01;31"), + (".ear", "01;31"), + (".sar", "01;31"), + (".rar", "01;31"), + (".alz", "01;31"), + (".ace", "01;31"), + (".zoo", "01;31"), + (".cpio", "01;31"), + (".7z", "01;31"), + (".rz", "01;31"), + (".cab", "01;31"), + (".wim", "01;31"), + (".swm", "01;31"), + (".dwm", "01;31"), + (".esd", "01;31"), + // Image formats + (".avif", "01;35"), + (".jpg", "01;35"), + (".jpeg", "01;35"), + (".mjpg", "01;35"), + (".mjpeg", "01;35"), + (".gif", "01;35"), + (".bmp", "01;35"), + (".pbm", "01;35"), + (".pgm", "01;35"), + (".ppm", "01;35"), + (".tga", "01;35"), + (".xbm", "01;35"), + (".xpm", "01;35"), + (".tif", "01;35"), + (".tiff", "01;35"), + (".png", "01;35"), + (".svg", "01;35"), + (".svgz", "01;35"), + (".mng", "01;35"), + (".pcx", "01;35"), + (".mov", "01;35"), + (".mpg", "01;35"), + (".mpeg", "01;35"), + (".m2v", "01;35"), + (".mkv", "01;35"), + (".webm", "01;35"), + (".webp", "01;35"), + (".ogm", "01;35"), + (".mp4", "01;35"), + (".m4v", "01;35"), + (".mp4v", "01;35"), + (".vob", "01;35"), + (".qt", "01;35"), + (".nuv", "01;35"), + (".wmv", "01;35"), + (".asf", "01;35"), + (".rm", "01;35"), + (".rmvb", "01;35"), + (".flc", "01;35"), + (".avi", "01;35"), + (".fli", "01;35"), + (".flv", "01;35"), + (".gl", "01;35"), + (".dl", "01;35"), + (".xcf", "01;35"), + (".xwd", "01;35"), + (".yuv", "01;35"), + (".cgm", "01;35"), + (".emf", "01;35"), + (".ogv", "01;35"), + (".ogx", "01;35"), + // Audio formats + (".aac", "00;36"), + (".au", "00;36"), + (".flac", "00;36"), + (".m4a", "00;36"), + (".mid", "00;36"), + (".midi", "00;36"), + (".mka", "00;36"), + (".mp3", "00;36"), + (".mpc", "00;36"), + (".ogg", "00;36"), + (".ra", "00;36"), + (".wav", "00;36"), + (".oga", "00;36"), + (".opus", "00;36"), + (".spx", "00;36"), + (".xspf", "00;36"), + // Backup files + ("*~", "00;90"), + ("*#", "00;90"), + (".bak", "00;90"), + (".old", "00;90"), + (".orig", "00;90"), + (".part", "00;90"), + (".rej", "00;90"), + (".swp", "00;90"), + (".tmp", "00;90"), + (".dpkg-dist", "00;90"), + (".dpkg-old", "00;90"), + (".ucf-dist", "00;90"), + (".ucf-new", "00;90"), + (".ucf-old", "00;90"), + (".rpmnew", "00;90"), + (".rpmorig", "00;90"), + (".rpmsave", "00;90"), + ] + .iter() + .for_each(|&(k, v)| { + m.insert(k, v); + }); + m +}); + +pub static FILE_ATTRIBUTE_CODES: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + [ + ("normal", "no"), + ("norm", "no"), + ("file", "fi"), + ("reset", "rs"), + ("dir", "di"), + ("lnk", "ln"), + ("link", "ln"), + ("symlink", "ln"), + ("orphan", "or"), + ("missing", "mi"), + ("fifo", "pi"), + ("pipe", "pi"), + ("sock", "so"), + ("blk", "bd"), + ("block", "bd"), + ("chr", "cd"), + ("char", "cd"), + ("door", "do"), + ("exec", "ex"), + ("left", "lc"), + ("leftcode", "lc"), + ("right", "rc"), + ("rightcode", "rc"), + ("end", "ec"), + ("endcode", "ec"), + ("suid", "su"), + ("setuid", "su"), + ("sgid", "sg"), + ("setgid", "sg"), + ("sticky", "st"), + ("other_writable", "ow"), + ("owr", "ow"), + ("sticky_other_writable", "tw"), + ("owt", "tw"), + ("capability", "ca"), + ("multihardlink", "mh"), + ("clrtoeol", "cl"), + ] + .iter() + .for_each(|&(k, v)| { + m.insert(k, v); + }); + m +}); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index af8668ef02f..426b4216ca2 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -35,6 +35,8 @@ pub use crate::parser::shortcut_value_parser; // * feature-gated modules #[cfg(feature = "backup-control")] pub use crate::features::backup_control; +#[cfg(feature = "colors")] +pub use crate::features::colors; #[cfg(feature = "encoding")] pub use crate::features::encoding; #[cfg(feature = "format")] From 5d19f79cd0b791bf28c9c8d9d8f4fd61817f6026 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 2 Dec 2023 12:57:38 +0100 Subject: [PATCH 103/429] dircolors should use the datastructures when printing --- src/uu/dircolors/src/dircolors.rs | 119 ++++++++++++++++++++++---- src/uucore/src/lib/features/colors.rs | 89 +++++++------------ 2 files changed, 134 insertions(+), 74 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 58228ddeb57..43e35c3d2a9 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -12,7 +12,7 @@ use std::io::{BufRead, BufReader}; use std::path::Path; use clap::{crate_version, Arg, ArgAction, Command}; -use uucore::colors::FILE_ATTRIBUTE_CODES; +use uucore::colors::{FILE_ATTRIBUTE_CODES, FILE_COLORS, FILE_TYPES}; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::{help_about, help_section, help_usage}; @@ -58,6 +58,89 @@ pub fn guess_syntax() -> OutputFmt { } } +fn get_colors_format_strings(fmt: &OutputFmt) -> (String, String) { + let prefix = match fmt { + OutputFmt::Shell => "LS_COLORS='".to_string(), + OutputFmt::CShell => "setenv LS_COLORS '".to_string(), + OutputFmt::Display => String::new(), + OutputFmt::Unknown => unreachable!(), + }; + + let suffix = match fmt { + OutputFmt::Shell => "';\nexport LS_COLORS".to_string(), + OutputFmt::CShell => "'".to_string(), + OutputFmt::Display => String::new(), + OutputFmt::Unknown => unreachable!(), + }; + + (prefix, suffix) +} + +pub fn generate_type_output(fmt: &OutputFmt) -> String { + match fmt { + OutputFmt::Display => FILE_TYPES + .iter() + .map(|&(_, key, val)| format!("\x1b[{}m{}\t{}\x1b[0m", val, key, val)) + .collect::>() + .join("\n"), + _ => { + // Existing logic for other formats + FILE_TYPES + .iter() + .map(|&(_, v1, v2)| format!("{}={}", v1, v2)) + .collect::>() + .join(":") + } + } +} + +enum ExtensionFormat { + StarDot, // Format as ".*ext" + Dot, // Format as ".ext" + NoDot, // Format as "ext" +} + +fn generate_ls_colors(fmt: &OutputFmt, format: ExtensionFormat, sep: &str) -> String { + match fmt { + OutputFmt::Display => { + let mut display_parts = vec![]; + let type_output = generate_type_output(fmt); + display_parts.push(type_output); + for &(extension, code) in FILE_COLORS.iter() { + display_parts.push(format!("\x1b[{}m*{}\t{}\x1b[0m", code, extension, code)); + } + display_parts.join("\n") + } + _ => { + // existing logic for other formats + let mut parts = vec![]; + for &(extension, code) in FILE_COLORS.iter() { + let formatted_extension = match format { + ExtensionFormat::StarDot => format!("*{}", extension), + ExtensionFormat::Dot => extension.to_string(), + ExtensionFormat::NoDot => { + if extension.starts_with('.') { + extension[1..].to_string() + } else { + extension.to_string() + } + } + }; + parts.push(format!("{}={}", formatted_extension, code)); + } + let (prefix, suffix) = get_colors_format_strings(&fmt); + let ls_colors = parts.join(sep); + format!( + "{}{}:{}:{}", + prefix, + generate_type_output(&fmt), + ls_colors, + suffix + ) + } + } +} + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let args = args.collect_ignore(); @@ -126,7 +209,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result; if files.is_empty() { - result = parse(INTERNAL_DB.lines(), &out_format, ""); + println!( + "{}", + generate_ls_colors(&out_format, ExtensionFormat::StarDot, ":") + ); + + return Ok(()); } else if files.len() > 1 { return Err(UUsageError::new( 1, @@ -287,12 +375,9 @@ where { // 1790 > $(dircolors | wc -m) let mut result = String::with_capacity(1790); - match fmt { - OutputFmt::Shell => result.push_str("LS_COLORS='"), - OutputFmt::CShell => result.push_str("setenv LS_COLORS '"), - OutputFmt::Display => (), - OutputFmt::Unknown => unreachable!(), - } + let (prefix, suffix) = get_colors_format_strings(&fmt); + + result.push_str(&prefix); let term = env::var("TERM").unwrap_or_else(|_| "none".to_owned()); let term = term.as_str(); @@ -331,6 +416,7 @@ where state = ParseState::Continue; } if state != ParseState::Pass { + let search_key = lower.as_str(); if key.starts_with('.') { if *fmt == OutputFmt::Display { result.push_str(format!("\x1b[{val}m*{key}\t{val}\x1b[0m\n").as_str()); @@ -345,7 +431,10 @@ where } } else if lower == "options" || lower == "color" || lower == "eightbit" { // Slackware only. Ignore - } else if let Some(s) = FILE_ATTRIBUTE_CODES.get(lower.as_str()) { + } else if let Some((_, s)) = FILE_ATTRIBUTE_CODES + .iter() + .find(|&&(key, _)| key == search_key) + { if *fmt == OutputFmt::Display { result.push_str(format!("\x1b[{val}m{s}\t{val}\x1b[0m\n").as_str()); } else { @@ -363,15 +452,11 @@ where } } - match fmt { - OutputFmt::Shell => result.push_str("';\nexport LS_COLORS"), - OutputFmt::CShell => result.push('\''), - OutputFmt::Display => { - // remove latest "\n" - result.pop(); - } - OutputFmt::Unknown => unreachable!(), + if fmt == &OutputFmt::Display { + // remove latest "\n" + result.pop(); } + result.push_str(&suffix); Ok(result) } diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 69be16ba291..96ca6d45672 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -4,19 +4,15 @@ // file that was distributed with this source code. use once_cell::sync::Lazy; -use std::collections::HashMap; /* The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the * slackware version of dircolors) are recognized but ignored. * Global config options can be specified before TERM or COLORTERM entries * below are TERM or COLORTERM entries, which can be glob patterns, which * restrict following config to systems with matching environment variables. - * COLORTERM ?* */ - -pub static TERMS: Lazy> = Lazy::new(|| { - let mut m = HashMap::new(); - [ +pub static TERMS: Lazy> = Lazy::new(|| { + vec![ "Eterm", "ansi", "*color*", @@ -43,11 +39,6 @@ pub static TERMS: Lazy> = Lazy::new(|| { "vt100", "xterm*", ] - .iter() - .for_each(|&term| { - m.insert(term, ""); - }); - m }); /* @@ -64,34 +55,27 @@ pub static TERMS: Lazy> = Lazy::new(|| { #NORMAL 00 # no color code at all #FILE 00 # regular file: use no color at all */ -// FILE_TYPES with Lazy initialization -pub static FILE_TYPES: Lazy> = Lazy::new(|| { - let mut m = HashMap::new(); - [ - ("RESET", "0"), // reset to "normal" color - ("DIR", "01;34"), // directory - ("LINK", "01;36"), // symbolic link - ("MULTIHARDLINK", "00"), // regular file with more than one link - ("FIFO", "40;33"), // pipe - ("SOCK", "01;35"), // socket - ("DOOR", "01;35"), // door - ("BLK", "40;33;01"), // block device driver - ("CHR", "40;33;01"), // character device driver - ("ORPHAN", "40;31;01"), // symlink to nonexistent file, or non-stat'able file - ("MISSING", "00"), // ... and the files they point to - ("SETUID", "37;41"), // file that is setuid (u+s) - ("SETGID", "30;43"), // file that is setgid (g+s) - ("CAPABILITY", "00"), // file with capability - ("STICKY_OTHER_WRITABLE", "30;42"), // dir that is sticky and other-writable (+t,o+w) - ("OTHER_WRITABLE", "34;42"), // dir that is other-writable (o+w) and not sticky - ("STICKY", "37;44"), // dir with the sticky bit set (+t) and not other-writable - ("EXEC", "01;32"), // files with execute permission +pub static FILE_TYPES: Lazy> = Lazy::new(|| { + vec![ + ("RESET", "rs", "0"), // reset to "normal" color + ("DIR", "di", "01;34"), // directory + ("LINK", "ln", "01;36"), // symbolic link + ("MULTIHARDLINK", "mh", "00"), // regular file with more than one link + ("FIFO", "pi", "40;33"), // pipe + ("SOCK", "so", "01;35"), // socket + ("DOOR", "do", "01;35"), // door + ("BLK", "bd", "40;33;01"), // block device driver + ("CHR", "cd", "40;33;01"), // character device driver + ("ORPHAN", "or", "40;31;01"), // symlink to nonexistent file, or non-stat'able file + ("MISSING", "mi", "00"), // ... and the files they point to + ("SETUID", "su", "37;41"), // file that is setuid (u+s) + ("SETGID", "sg", "30;43"), // file that is setgid (g+s) + ("CAPABILITY", "ca", "00"), // file with capability + ("STICKY_OTHER_WRITABLE", "tw", "30;42"), // dir that is sticky and other-writable (+t,o+w) + ("OTHER_WRITABLE", "ow", "34;42"), // dir that is other-writable (o+w) and not sticky + ("STICKY", "st", "37;44"), // dir with the sticky bit set (+t) and not other-writable + ("EXEC", "ex", "01;32"), // files with execute permission ] - .iter() - .for_each(|&(k, v)| { - m.insert(k, v); - }); - m }); /* @@ -99,9 +83,9 @@ pub static FILE_TYPES: Lazy> = Lazy::new(|| { # to color below. Put the extension, a space, and the color init string. # (and any comments you want to add after a '#') */ -pub static FILE_COLORS: Lazy> = Lazy::new(|| { - let mut m = HashMap::new(); - [ +pub static FILE_COLORS: Lazy> = Lazy::new(|| { + vec![ + /* // Executables (Windows) (".cmd", "01;32"), (".exe", "01;32"), @@ -109,7 +93,7 @@ pub static FILE_COLORS: Lazy> = Lazy::new(|| { (".btm", "01;32"), (".bat", "01;32"), (".sh", "01;32"), - (".csh", "01;32"), + (".csh", "01;32"),*/ // Archives or compressed (".tar", "01;31"), (".tgz", "01;31"), @@ -207,6 +191,7 @@ pub static FILE_COLORS: Lazy> = Lazy::new(|| { (".yuv", "01;35"), (".cgm", "01;35"), (".emf", "01;35"), + // https://wiki.xiph.org/MIME_Types_and_File_Extensions (".ogv", "01;35"), (".ogx", "01;35"), // Audio formats @@ -222,13 +207,14 @@ pub static FILE_COLORS: Lazy> = Lazy::new(|| { (".ogg", "00;36"), (".ra", "00;36"), (".wav", "00;36"), + // https://wiki.xiph.org/MIME_Types_and_File_Extensions (".oga", "00;36"), (".opus", "00;36"), (".spx", "00;36"), (".xspf", "00;36"), // Backup files - ("*~", "00;90"), - ("*#", "00;90"), + ("~", "00;90"), + ("#", "00;90"), (".bak", "00;90"), (".old", "00;90"), (".orig", "00;90"), @@ -245,16 +231,10 @@ pub static FILE_COLORS: Lazy> = Lazy::new(|| { (".rpmorig", "00;90"), (".rpmsave", "00;90"), ] - .iter() - .for_each(|&(k, v)| { - m.insert(k, v); - }); - m }); -pub static FILE_ATTRIBUTE_CODES: Lazy> = Lazy::new(|| { - let mut m = HashMap::new(); - [ +pub static FILE_ATTRIBUTE_CODES: Lazy> = Lazy::new(|| { + vec![ ("normal", "no"), ("norm", "no"), ("file", "fi"), @@ -293,9 +273,4 @@ pub static FILE_ATTRIBUTE_CODES: Lazy> = Lazy::new(|| { ("multihardlink", "mh"), ("clrtoeol", "cl"), ] - .iter() - .for_each(|&(k, v)| { - m.insert(k, v); - }); - m }); From e4b875043429ac78f4ae4b2824caea3e8d180244 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 2 Dec 2023 15:06:09 +0100 Subject: [PATCH 104/429] dircolors -p: generate it dynamically --- src/uu/dircolors/src/colors.rs | 225 --------------------- src/uu/dircolors/src/dircolors.rs | 65 +++++- tests/fixtures/dircolors/internal.expected | 60 ++---- 3 files changed, 75 insertions(+), 275 deletions(-) delete mode 100644 src/uu/dircolors/src/colors.rs diff --git a/src/uu/dircolors/src/colors.rs b/src/uu/dircolors/src/colors.rs deleted file mode 100644 index c0a981db89c..00000000000 --- a/src/uu/dircolors/src/colors.rs +++ /dev/null @@ -1,225 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. -// spell-checker:ignore (ToDO) EIGHTBIT ETERM MULTIHARDLINK cpio dtterm jfbterm konsole kterm mlterm rmvb rxvt stat'able svgz tmux webm xspf COLORTERM tzst avif tzst mjpg mjpeg webp dpkg rpmnew rpmorig rpmsave - -pub const INTERNAL_DB: &str = r#"# Configuration file for dircolors, a utility to help you set the -# LS_COLORS environment variable used by GNU ls with the --color option. -# Copyright (C) 1996-2022 Free Software Foundation, Inc. -# Copying and distribution of this file, with or without modification, -# are permitted provided the copyright notice and this notice are preserved. -# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the -# slackware version of dircolors) are recognized but ignored. -# Global config options can be specified before TERM or COLORTERM entries -# Below are TERM or COLORTERM entries, which can be glob patterns, which -# restrict following config to systems with matching environment variables. -COLORTERM ?* -TERM Eterm -TERM ansi -TERM *color* -TERM con[0-9]*x[0-9]* -TERM cons25 -TERM console -TERM cygwin -TERM *direct* -TERM dtterm -TERM gnome -TERM hurd -TERM jfbterm -TERM konsole -TERM kterm -TERM linux -TERM linux-c -TERM mlterm -TERM putty -TERM rxvt* -TERM screen* -TERM st -TERM terminator -TERM tmux* -TERM vt100 -TERM xterm* -# Below are the color init strings for the basic file types. -# One can use codes for 256 or more colors supported by modern terminals. -# The default color codes use the capabilities of an 8 color terminal -# with some additional attributes as per the following codes: -# Attribute codes: -# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed -# Text color codes: -# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white -# Background color codes: -# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white -#NORMAL 00 # no color code at all -#FILE 00 # regular file: use no color at all -RESET 0 # reset to "normal" color -DIR 01;34 # directory -LINK 01;36 # symbolic link. (If you set this to 'target' instead of a - # numerical value, the color is as for the file pointed to.) -MULTIHARDLINK 00 # regular file with more than one link -FIFO 40;33 # pipe -SOCK 01;35 # socket -DOOR 01;35 # door -BLK 40;33;01 # block device driver -CHR 40;33;01 # character device driver -ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to -SETUID 37;41 # file that is setuid (u+s) -SETGID 30;43 # file that is setgid (g+s) -CAPABILITY 00 # file with capability (very expensive to lookup) -STICKY_OTHER_WRITABLE 30;42 # dir that is sticky and other-writable (+t,o+w) -OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky -STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable -# This is for files with execute permission: -EXEC 01;32 -# List any file extensions like '.gz' or '.tar' that you would like ls -# to color below. Put the extension, a space, and the color init string. -# (and any comments you want to add after a '#') -# If you use DOS-style suffixes, you may want to uncomment the following: -#.cmd 01;32 # executables (bright green) -#.exe 01;32 -#.com 01;32 -#.btm 01;32 -#.bat 01;32 -# Or if you want to color scripts even if they do not have the -# executable bit actually set. -#.sh 01;32 -#.csh 01;32 - # archives or compressed (bright red) -.tar 01;31 -.tgz 01;31 -.arc 01;31 -.arj 01;31 -.taz 01;31 -.lha 01;31 -.lz4 01;31 -.lzh 01;31 -.lzma 01;31 -.tlz 01;31 -.txz 01;31 -.tzo 01;31 -.t7z 01;31 -.zip 01;31 -.z 01;31 -.dz 01;31 -.gz 01;31 -.lrz 01;31 -.lz 01;31 -.lzo 01;31 -.xz 01;31 -.zst 01;31 -.tzst 01;31 -.bz2 01;31 -.bz 01;31 -.tbz 01;31 -.tbz2 01;31 -.tz 01;31 -.deb 01;31 -.rpm 01;31 -.jar 01;31 -.war 01;31 -.ear 01;31 -.sar 01;31 -.rar 01;31 -.alz 01;31 -.ace 01;31 -.zoo 01;31 -.cpio 01;31 -.7z 01;31 -.rz 01;31 -.cab 01;31 -.wim 01;31 -.swm 01;31 -.dwm 01;31 -.esd 01;31 -# image formats -.avif 01;35 -.jpg 01;35 -.jpeg 01;35 -.mjpg 01;35 -.mjpeg 01;35 -.gif 01;35 -.bmp 01;35 -.pbm 01;35 -.pgm 01;35 -.ppm 01;35 -.tga 01;35 -.xbm 01;35 -.xpm 01;35 -.tif 01;35 -.tiff 01;35 -.png 01;35 -.svg 01;35 -.svgz 01;35 -.mng 01;35 -.pcx 01;35 -.mov 01;35 -.mpg 01;35 -.mpeg 01;35 -.m2v 01;35 -.mkv 01;35 -.webm 01;35 -.webp 01;35 -.ogm 01;35 -.mp4 01;35 -.m4v 01;35 -.mp4v 01;35 -.vob 01;35 -.qt 01;35 -.nuv 01;35 -.wmv 01;35 -.asf 01;35 -.rm 01;35 -.rmvb 01;35 -.flc 01;35 -.avi 01;35 -.fli 01;35 -.flv 01;35 -.gl 01;35 -.dl 01;35 -.xcf 01;35 -.xwd 01;35 -.yuv 01;35 -.cgm 01;35 -.emf 01;35 -# https://wiki.xiph.org/MIME_Types_and_File_Extensions -.ogv 01;35 -.ogx 01;35 -# audio formats -.aac 00;36 -.au 00;36 -.flac 00;36 -.m4a 00;36 -.mid 00;36 -.midi 00;36 -.mka 00;36 -.mp3 00;36 -.mpc 00;36 -.ogg 00;36 -.ra 00;36 -.wav 00;36 -# https://wiki.xiph.org/MIME_Types_and_File_Extensions -.oga 00;36 -.opus 00;36 -.spx 00;36 -.xspf 00;36 -# backup files -*~ 00;90 -*# 00;90 -.bak 00;90 -.old 00;90 -.orig 00;90 -.part 00;90 -.rej 00;90 -.swp 00;90 -.tmp 00;90 -.dpkg-dist 00;90 -.dpkg-old 00;90 -.ucf-dist 00;90 -.ucf-new 00;90 -.ucf-old 00;90 -.rpmnew 00;90 -.rpmorig 00;90 -.rpmsave 00;90 -# Subsequent TERM or COLORTERM entries, can be used to add / override -# config specific to those matching environment variables."#; diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 43e35c3d2a9..338bf83e514 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -10,9 +10,10 @@ use std::env; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::Path; +use std::fmt::Write; use clap::{crate_version, Arg, ArgAction, Command}; -use uucore::colors::{FILE_ATTRIBUTE_CODES, FILE_COLORS, FILE_TYPES}; +use uucore::colors::{FILE_ATTRIBUTE_CODES, FILE_COLORS, FILE_TYPES, TERMS}; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::{help_about, help_section, help_usage}; @@ -29,9 +30,6 @@ const USAGE: &str = help_usage!("dircolors.md"); const ABOUT: &str = help_about!("dircolors.md"); const AFTER_HELP: &str = help_section!("after help", "dircolors.md"); -mod colors; -use self::colors::INTERNAL_DB; - #[derive(PartialEq, Eq, Debug)] pub enum OutputFmt { Shell, @@ -181,7 +179,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { ), )); } - println!("{INTERNAL_DB}"); + + println!("{}", generate_dircolors_config()); return Ok(()); } @@ -222,6 +221,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { )); } else if files[0].eq("-") { let fin = BufReader::new(std::io::stdin()); + // For example, for echo "owt 40;33"|dircolors -b - result = parse(fin.lines().map_while(Result::ok), &out_format, files[0]); } else { let path = Path::new(files[0]); @@ -368,7 +368,7 @@ enum ParseState { use uucore::{format_usage, parse_glob}; #[allow(clippy::cognitive_complexity)] -fn parse(lines: T, fmt: &OutputFmt, fp: &str) -> Result +fn parse(user_input: T, fmt: &OutputFmt, fp: &str) -> Result where T: IntoIterator, T::Item: Borrow, @@ -384,7 +384,7 @@ where let mut state = ParseState::Global; - for (num, line) in lines.into_iter().enumerate() { + for (num, line) in user_input.into_iter().enumerate() { let num = num + 1; let line = line.borrow().purify(); if line.is_empty() { @@ -396,13 +396,12 @@ where let (key, val) = line.split_two(); if val.is_empty() { return Err(format!( - "{}:{}: invalid line; missing second token", + "{}:{}: invalid line; missing second token", fp.maybe_quote(), num )); } let lower = key.to_lowercase(); - if lower == "term" || lower == "colorterm" { if term.fnmatch(val) { state = ParseState::Matched; @@ -417,6 +416,7 @@ where } if state != ParseState::Pass { let search_key = lower.as_str(); + if key.starts_with('.') { if *fmt == OutputFmt::Display { result.push_str(format!("\x1b[{val}m*{key}\t{val}\x1b[0m\n").as_str()); @@ -482,6 +482,53 @@ fn escape(s: &str) -> String { result } + +pub fn generate_dircolors_config() -> String { + let mut config = String::new(); + + // Adding the complete header comments as in the original file + writeln!(config, "# Configuration file for dircolors, a utility to help you set the").unwrap(); + writeln!(config, "# LS_COLORS environment variable used by GNU ls with the --color option.").unwrap(); + writeln!(config, "# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the").unwrap(); + writeln!(config, "# slackware version of dircolors) are recognized but ignored.").unwrap(); + writeln!(config, "# Global config options can be specified before TERM or COLORTERM entries").unwrap(); + writeln!(config, "# Below are TERM or COLORTERM entries, which can be glob patterns, which").unwrap(); + writeln!(config, "# restrict following config to systems with matching environment variables.").unwrap(); + writeln!(config, "COLORTERM ?*").unwrap(); + for term in TERMS.iter() { + writeln!(config, "TERM {}", term).unwrap(); + } + + // Adding file types and their color codes with header + writeln!(config, "# Below are the color init strings for the basic file types.").unwrap(); + writeln!(config, "# One can use codes for 256 or more colors supported by modern terminals.").unwrap(); + writeln!(config, "# The default color codes use the capabilities of an 8 color terminal").unwrap(); + writeln!(config, "# with some additional attributes as per the following codes:").unwrap(); + writeln!(config, "# Attribute codes:").unwrap(); + writeln!(config, "# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed").unwrap(); + writeln!(config, "# Text color codes:").unwrap(); + writeln!(config, "# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white").unwrap(); + writeln!(config, "# Background color codes:").unwrap(); + writeln!(config, "# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white").unwrap(); + writeln!(config, "#NORMAL 00 # no color code at all").unwrap(); + writeln!(config, "#FILE 00 # regular file: use no color at all").unwrap(); + + for (name, _, code) in FILE_TYPES.iter() { + writeln!(config, "{} {}", name, code).unwrap(); + } + + writeln!(config, "# List any file extensions like '.gz' or '.tar' that you would like ls").unwrap(); + writeln!(config, "# to color below. Put the extension, a space, and the color init string.").unwrap(); + + for (ext, color) in FILE_COLORS.iter() { + writeln!(config, "{} {}", ext, color).unwrap(); + } + writeln!(config, "# Subsequent TERM or COLORTERM entries, can be used to add / override").unwrap(); + write!(config, "# config specific to those matching environment variables.").unwrap(); + + config +} + #[cfg(test)] mod tests { use super::escape; diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected index 7bc91ef470c..933e70bc470 100644 --- a/tests/fixtures/dircolors/internal.expected +++ b/tests/fixtures/dircolors/internal.expected @@ -1,8 +1,5 @@ # Configuration file for dircolors, a utility to help you set the # LS_COLORS environment variable used by GNU ls with the --color option. -# Copyright (C) 1996-2022 Free Software Foundation, Inc. -# Copying and distribution of this file, with or without modification, -# are permitted provided the copyright notice and this notice are preserved. # The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the # slackware version of dircolors) are recognized but ignored. # Global config options can be specified before TERM or COLORTERM entries @@ -46,40 +43,26 @@ TERM xterm* # 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white #NORMAL 00 # no color code at all #FILE 00 # regular file: use no color at all -RESET 0 # reset to "normal" color -DIR 01;34 # directory -LINK 01;36 # symbolic link. (If you set this to 'target' instead of a - # numerical value, the color is as for the file pointed to.) -MULTIHARDLINK 00 # regular file with more than one link -FIFO 40;33 # pipe -SOCK 01;35 # socket -DOOR 01;35 # door -BLK 40;33;01 # block device driver -CHR 40;33;01 # character device driver -ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to -SETUID 37;41 # file that is setuid (u+s) -SETGID 30;43 # file that is setgid (g+s) -CAPABILITY 00 # file with capability (very expensive to lookup) -STICKY_OTHER_WRITABLE 30;42 # dir that is sticky and other-writable (+t,o+w) -OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky -STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable -# This is for files with execute permission: +RESET 0 +DIR 01;34 +LINK 01;36 +MULTIHARDLINK 00 +FIFO 40;33 +SOCK 01;35 +DOOR 01;35 +BLK 40;33;01 +CHR 40;33;01 +ORPHAN 40;31;01 +MISSING 00 +SETUID 37;41 +SETGID 30;43 +CAPABILITY 00 +STICKY_OTHER_WRITABLE 30;42 +OTHER_WRITABLE 34;42 +STICKY 37;44 EXEC 01;32 # List any file extensions like '.gz' or '.tar' that you would like ls # to color below. Put the extension, a space, and the color init string. -# (and any comments you want to add after a '#') -# If you use DOS-style suffixes, you may want to uncomment the following: -#.cmd 01;32 # executables (bright green) -#.exe 01;32 -#.com 01;32 -#.btm 01;32 -#.bat 01;32 -# Or if you want to color scripts even if they do not have the -# executable bit actually set. -#.sh 01;32 -#.csh 01;32 - # archives or compressed (bright red) .tar 01;31 .tgz 01;31 .arc 01;31 @@ -126,7 +109,6 @@ EXEC 01;32 .swm 01;31 .dwm 01;31 .esd 01;31 -# image formats .avif 01;35 .jpg 01;35 .jpeg 01;35 @@ -176,10 +158,8 @@ EXEC 01;32 .yuv 01;35 .cgm 01;35 .emf 01;35 -# https://wiki.xiph.org/MIME_Types_and_File_Extensions .ogv 01;35 .ogx 01;35 -# audio formats .aac 00;36 .au 00;36 .flac 00;36 @@ -192,14 +172,12 @@ EXEC 01;32 .ogg 00;36 .ra 00;36 .wav 00;36 -# https://wiki.xiph.org/MIME_Types_and_File_Extensions .oga 00;36 .opus 00;36 .spx 00;36 .xspf 00;36 -# backup files -*~ 00;90 -*# 00;90 +~ 00;90 +# 00;90 .bak 00;90 .old 00;90 .orig 00;90 From 1a4ca7e65dc6991f8584c6f03d493bace427ff2d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 2 Dec 2023 15:18:20 +0100 Subject: [PATCH 105/429] fix clippy warnings --- src/uu/dircolors/src/dircolors.rs | 143 +++++++++++++++++--------- src/uucore/src/lib/features/colors.rs | 2 +- 2 files changed, 98 insertions(+), 47 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 338bf83e514..28d74775db8 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -7,10 +7,10 @@ use std::borrow::Borrow; use std::env; +use std::fmt::Write; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::Path; -use std::fmt::Write; use clap::{crate_version, Arg, ArgAction, Command}; use uucore::colors::{FILE_ATTRIBUTE_CODES, FILE_COLORS, FILE_TYPES, TERMS}; @@ -92,13 +92,7 @@ pub fn generate_type_output(fmt: &OutputFmt) -> String { } } -enum ExtensionFormat { - StarDot, // Format as ".*ext" - Dot, // Format as ".ext" - NoDot, // Format as "ext" -} - -fn generate_ls_colors(fmt: &OutputFmt, format: ExtensionFormat, sep: &str) -> String { +fn generate_ls_colors(fmt: &OutputFmt, sep: &str) -> String { match fmt { OutputFmt::Display => { let mut display_parts = vec![]; @@ -113,25 +107,15 @@ fn generate_ls_colors(fmt: &OutputFmt, format: ExtensionFormat, sep: &str) -> St // existing logic for other formats let mut parts = vec![]; for &(extension, code) in FILE_COLORS.iter() { - let formatted_extension = match format { - ExtensionFormat::StarDot => format!("*{}", extension), - ExtensionFormat::Dot => extension.to_string(), - ExtensionFormat::NoDot => { - if extension.starts_with('.') { - extension[1..].to_string() - } else { - extension.to_string() - } - } - }; + let formatted_extension = format!("*{}", extension); parts.push(format!("{}={}", formatted_extension, code)); } - let (prefix, suffix) = get_colors_format_strings(&fmt); + let (prefix, suffix) = get_colors_format_strings(fmt); let ls_colors = parts.join(sep); format!( "{}{}:{}:{}", prefix, - generate_type_output(&fmt), + generate_type_output(fmt), ls_colors, suffix ) @@ -208,10 +192,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result; if files.is_empty() { - println!( - "{}", - generate_ls_colors(&out_format, ExtensionFormat::StarDot, ":") - ); + println!("{}", generate_ls_colors(&out_format, ":")); return Ok(()); } else if files.len() > 1 { @@ -373,9 +354,8 @@ where T: IntoIterator, T::Item: Borrow, { - // 1790 > $(dircolors | wc -m) let mut result = String::with_capacity(1790); - let (prefix, suffix) = get_colors_format_strings(&fmt); + let (prefix, suffix) = get_colors_format_strings(fmt); result.push_str(&prefix); @@ -482,34 +462,89 @@ fn escape(s: &str) -> String { result } - pub fn generate_dircolors_config() -> String { let mut config = String::new(); // Adding the complete header comments as in the original file - writeln!(config, "# Configuration file for dircolors, a utility to help you set the").unwrap(); - writeln!(config, "# LS_COLORS environment variable used by GNU ls with the --color option.").unwrap(); - writeln!(config, "# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the").unwrap(); - writeln!(config, "# slackware version of dircolors) are recognized but ignored.").unwrap(); - writeln!(config, "# Global config options can be specified before TERM or COLORTERM entries").unwrap(); - writeln!(config, "# Below are TERM or COLORTERM entries, which can be glob patterns, which").unwrap(); - writeln!(config, "# restrict following config to systems with matching environment variables.").unwrap(); + writeln!( + config, + "# Configuration file for dircolors, a utility to help you set the" + ) + .unwrap(); + writeln!( + config, + "# LS_COLORS environment variable used by GNU ls with the --color option." + ) + .unwrap(); + writeln!( + config, + "# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the" + ) + .unwrap(); + writeln!( + config, + "# slackware version of dircolors) are recognized but ignored." + ) + .unwrap(); + writeln!( + config, + "# Global config options can be specified before TERM or COLORTERM entries" + ) + .unwrap(); + writeln!( + config, + "# Below are TERM or COLORTERM entries, which can be glob patterns, which" + ) + .unwrap(); + writeln!( + config, + "# restrict following config to systems with matching environment variables." + ) + .unwrap(); writeln!(config, "COLORTERM ?*").unwrap(); for term in TERMS.iter() { writeln!(config, "TERM {}", term).unwrap(); } // Adding file types and their color codes with header - writeln!(config, "# Below are the color init strings for the basic file types.").unwrap(); - writeln!(config, "# One can use codes for 256 or more colors supported by modern terminals.").unwrap(); - writeln!(config, "# The default color codes use the capabilities of an 8 color terminal").unwrap(); - writeln!(config, "# with some additional attributes as per the following codes:").unwrap(); + writeln!( + config, + "# Below are the color init strings for the basic file types." + ) + .unwrap(); + writeln!( + config, + "# One can use codes for 256 or more colors supported by modern terminals." + ) + .unwrap(); + writeln!( + config, + "# The default color codes use the capabilities of an 8 color terminal" + ) + .unwrap(); + writeln!( + config, + "# with some additional attributes as per the following codes:" + ) + .unwrap(); writeln!(config, "# Attribute codes:").unwrap(); - writeln!(config, "# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed").unwrap(); + writeln!( + config, + "# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed" + ) + .unwrap(); writeln!(config, "# Text color codes:").unwrap(); - writeln!(config, "# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white").unwrap(); + writeln!( + config, + "# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white" + ) + .unwrap(); writeln!(config, "# Background color codes:").unwrap(); - writeln!(config, "# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white").unwrap(); + writeln!( + config, + "# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white" + ) + .unwrap(); writeln!(config, "#NORMAL 00 # no color code at all").unwrap(); writeln!(config, "#FILE 00 # regular file: use no color at all").unwrap(); @@ -517,14 +552,30 @@ pub fn generate_dircolors_config() -> String { writeln!(config, "{} {}", name, code).unwrap(); } - writeln!(config, "# List any file extensions like '.gz' or '.tar' that you would like ls").unwrap(); - writeln!(config, "# to color below. Put the extension, a space, and the color init string.").unwrap(); + writeln!( + config, + "# List any file extensions like '.gz' or '.tar' that you would like ls" + ) + .unwrap(); + writeln!( + config, + "# to color below. Put the extension, a space, and the color init string." + ) + .unwrap(); for (ext, color) in FILE_COLORS.iter() { writeln!(config, "{} {}", ext, color).unwrap(); } - writeln!(config, "# Subsequent TERM or COLORTERM entries, can be used to add / override").unwrap(); - write!(config, "# config specific to those matching environment variables.").unwrap(); + writeln!( + config, + "# Subsequent TERM or COLORTERM entries, can be used to add / override" + ) + .unwrap(); + write!( + config, + "# config specific to those matching environment variables." + ) + .unwrap(); config } diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 96ca6d45672..81d117a50b3 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. - +// cSpell:disable use once_cell::sync::Lazy; /* The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the From 4903b91973005f039f468d0f7740daf24b1ce282 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 6 Nov 2023 09:25:54 +0100 Subject: [PATCH 106/429] du: call unused _du_basics() in test_du_basics() --- tests/by-util/test_du.rs | 54 +++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c07de2851ee..8ed7ce8c0c1 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -22,24 +22,54 @@ const SUB_LINK: &str = "subdir/links/sublink.txt"; #[test] fn test_du_basics() { - new_ucmd!().succeeds().no_stderr(); + let ts = TestScenario::new(util_name!()); + + let result = ts.ucmd().succeeds(); + + #[cfg(any(target_os = "linux", target_os = "android"))] + { + let result_reference = unwrap_or_return!(expected_result(&ts, &[])); + if result_reference.succeeded() { + assert_eq!(result.stdout_str(), result_reference.stdout_str()); + return; + } + } + _du_basics(result.stdout_str()); } + #[cfg(target_vendor = "apple")] fn _du_basics(s: &str) { - let answer = "32\t./subdir -8\t./subdir/deeper -24\t./subdir/links -40\t. -"; + let answer = concat!( + "4\t./subdir/deeper/deeper_dir\n", + "8\t./subdir/deeper\n", + "12\t./subdir/links\n", + "20\t./subdir\n", + "24\t.\n" + ); + assert_eq!(s, answer); +} + +#[cfg(target_os = "windows")] +fn _du_basics(s: &str) { + let answer = concat!( + "0\t.\\subdir\\deeper\\deeper_dir\n", + "0\t.\\subdir\\deeper\n", + "8\t.\\subdir\\links\n", + "8\t.\\subdir\n", + "8\t.\n" + ); assert_eq!(s, answer); } -#[cfg(not(target_vendor = "apple"))] + +#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"),))] fn _du_basics(s: &str) { - let answer = "28\t./subdir -8\t./subdir/deeper -16\t./subdir/links -36\t. -"; + let answer = concat!( + "8\t./subdir/deeper/deeper_dir\n", + "16\t./subdir/deeper\n", + "16\t./subdir/links\n", + "36\t./subdir\n", + "44\t.\n" + ); assert_eq!(s, answer); } From 0ea1a7cd88c2a6d02ff4c1e69f5a9fa70c9bc135 Mon Sep 17 00:00:00 2001 From: Nathan Houghton Date: Sat, 21 Oct 2023 20:22:25 -0700 Subject: [PATCH 107/429] tests/cp, tests/test: Don't attempt to set sticky file bit on FreeBSD On FreeBSD (and OpenBSD), only the superuser can set the file sticky bit. --- tests/by-util/test_cp.rs | 20 +++++++++++++++----- tests/by-util/test_test.rs | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 36bca68b0d0..37bec522238 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -13,7 +13,7 @@ use std::os::unix::fs; #[cfg(unix)] use std::os::unix::fs::MetadataExt; -#[cfg(all(unix, not(target_os = "freebsd")))] +#[cfg(unix)] use std::os::unix::fs::PermissionsExt; #[cfg(windows)] use std::os::windows::fs::symlink_file; @@ -2381,13 +2381,18 @@ fn test_copy_symlink_force() { } #[test] -#[cfg(all(unix, not(target_os = "freebsd")))] +#[cfg(unix)] fn test_no_preserve_mode() { use std::os::unix::prelude::MetadataExt; use uucore::mode::get_umask; - const PERMS_ALL: u32 = 0o7777; + const PERMS_ALL: u32 = if cfg!(target_os = "freebsd") { + // Only the superuser can set the sticky bit on a file. + 0o6777 + } else { + 0o7777 + }; let (at, mut ucmd) = at_and_ucmd!(); at.touch("file"); @@ -2407,11 +2412,16 @@ fn test_no_preserve_mode() { } #[test] -#[cfg(all(unix, not(target_os = "freebsd")))] +#[cfg(unix)] fn test_preserve_mode() { use std::os::unix::prelude::MetadataExt; - const PERMS_ALL: u32 = 0o7777; + const PERMS_ALL: u32 = if cfg!(target_os = "freebsd") { + // Only the superuser can set the sticky bit on a file. + 0o6777 + } else { + 0o7777 + }; let (at, mut ucmd) = at_and_ucmd!(); at.touch("file"); diff --git a/tests/by-util/test_test.rs b/tests/by-util/test_test.rs index 922d854c640..b91bc727d40 100644 --- a/tests/by-util/test_test.rs +++ b/tests/by-util/test_test.rs @@ -553,7 +553,7 @@ fn test_nonexistent_file_is_not_symlink() { } #[test] -// FixME: freebsd fails with 'chmod: sticky_file: Inappropriate file type or format' +// Only the superuser is allowed to set the sticky bit on files on FreeBSD. // Windows has no concept of sticky bit #[cfg(not(any(windows, target_os = "freebsd")))] fn test_file_is_sticky() { From 117ab7737ac1aa61ef4759837ee7718ba016f860 Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sat, 2 Dec 2023 17:25:57 +0000 Subject: [PATCH 108/429] Optimize expr for numerical values --- src/uu/expr/src/expr.rs | 2 +- src/uu/expr/src/syntax_tree.rs | 106 +++++++++++++++++++++------------ 2 files changed, 69 insertions(+), 39 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index c271f0935fd..91d7a878820 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -108,7 +108,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(|v| v.into_iter().map(|s| s.as_ref()).collect::>()) .unwrap_or_default(); - let res = AstNode::parse(&token_strings)?.eval()?; + let res = AstNode::parse(&token_strings)?.eval()?.to_string(); println!("{res}"); if !is_truthy(&res) { return Err(1.into()); diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index f81f1da1ec4..705864f3a6c 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -6,6 +6,7 @@ // spell-checker:ignore (ToDO) ints paren prec multibytes use num_bigint::BigInt; +use num_traits::ToPrimitive; use onig::{Regex, RegexOptions, Syntax}; use crate::{ExprError, ExprResult}; @@ -45,7 +46,7 @@ pub enum StringOp { } impl BinOp { - fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { match self { Self::Relation(op) => op.eval(left, right), Self::Numeric(op) => op.eval(left, right), @@ -55,10 +56,10 @@ impl BinOp { } impl RelationOp { - fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { + fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { let a = a.eval()?; let b = b.eval()?; - let b = if let (Ok(a), Ok(b)) = (a.parse::(), b.parse::()) { + let b = if let (NumOrStr::Num(a), NumOrStr::Num(b)) = (&a, &b) { match self { Self::Lt => a < b, Self::Leq => a <= b, @@ -79,24 +80,22 @@ impl RelationOp { } }; if b { - Ok("1".into()) + Ok(NumOrStr::Num(BigInt::from(1))) } else { - Ok("0".into()) + Ok(NumOrStr::Num(BigInt::from(0))) } } } impl NumericOp { - fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { let a: BigInt = left .eval()? - .parse() - .map_err(|_| ExprError::NonIntegerArgument)?; + .to_bigint()?; let b: BigInt = right .eval()? - .parse() - .map_err(|_| ExprError::NonIntegerArgument)?; - Ok(match self { + .to_bigint()?; + Ok(NumOrStr::Num(match self { Self::Add => a + b, Self::Sub => a - b, Self::Mul => a * b, @@ -110,67 +109,66 @@ impl NumericOp { }; a % b } - } - .to_string()) + })) } } impl StringOp { - fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { match self { Self::Or => { let left = left.eval()?; - if is_truthy(&left) { + if is_truthy(&left.to_string()) { return Ok(left); } let right = right.eval()?; - if is_truthy(&right) { + if is_truthy(&right.to_string()) { return Ok(right); } - Ok("0".into()) + Ok(NumOrStr::Num(BigInt::from(0))) } Self::And => { let left = left.eval()?; - if !is_truthy(&left) { - return Ok("0".into()); + if !is_truthy(&left.to_string()) { + return Ok(NumOrStr::Num(BigInt::from(0))); } let right = right.eval()?; - if !is_truthy(&right) { - return Ok("0".into()); + if !is_truthy(&right.to_string()) { + return Ok(NumOrStr::Num(BigInt::from(0))); } Ok(left) } Self::Match => { let left = left.eval()?; let right = right.eval()?; - let re_string = format!("^{}", &right); + let re_string = format!("^{}", right.to_string()); let re = Regex::with_options( &re_string, RegexOptions::REGEX_OPTION_NONE, Syntax::grep(), ) .map_err(|_| ExprError::InvalidRegexExpression)?; - Ok(if re.captures_len() > 0 { - re.captures(&left) + Ok(NumOrStr::Str(if re.captures_len() > 0 { + re.captures(&left.to_string()) .map(|captures| captures.at(1).unwrap()) .unwrap_or("") .to_string() } else { - re.find(&left) + re.find(&left.to_string()) .map_or("0".to_string(), |(start, end)| (end - start).to_string()) - }) + })) } Self::Index => { let left = left.eval()?; let right = right.eval()?; - for (current_idx, ch_h) in left.chars().enumerate() { - for ch_n in right.chars() { + for (current_idx, ch_h) in left.to_string().chars().enumerate() { + for ch_n in right.to_string().chars() { if ch_n == ch_h { - return Ok((current_idx + 1).to_string()); + return Ok(NumOrStr::Num(BigInt::from(current_idx + 1))); } } } - Ok("0".to_string()) + Ok(NumOrStr::Num(BigInt::from(0))) } } } @@ -200,6 +198,38 @@ const PRECEDENCE: &[&[(&str, BinOp)]] = &[ &[(":", BinOp::String(StringOp::Match))], ]; +#[derive(Debug, PartialEq, Eq, Ord, PartialOrd)] +pub enum NumOrStr { + Num(BigInt), + Str(String), +} + +impl NumOrStr { + pub fn to_usize(self: NumOrStr) -> Option { + match self.to_bigint() { + Ok(num) => {num.to_usize()} + Err(_) => {None}, + } + } + + pub fn to_string(self: &NumOrStr) -> String { + match self { + NumOrStr::Num(num) => {num.to_string()} + NumOrStr::Str(str) => {str.to_string()}, + } + } + + pub fn to_bigint(self: NumOrStr) -> ExprResult { + match self { + NumOrStr::Num(num) => {Ok(num)} + NumOrStr::Str(str) => { match str.parse::() { + Ok(val) => {Ok(val)}, + Err(_) => {Err(ExprError::NonIntegerArgument)} + }}, + } + } +} + #[derive(Debug, PartialEq, Eq)] pub enum AstNode { Leaf { @@ -225,9 +255,9 @@ impl AstNode { Parser::new(input).parse() } - pub fn eval(&self) -> ExprResult { + pub fn eval(&self) -> ExprResult { match self { - Self::Leaf { value } => Ok(value.into()), + Self::Leaf { value } => Ok(NumOrStr::Str(value.to_string())), Self::BinOp { op_type, left, @@ -238,7 +268,7 @@ impl AstNode { pos, length, } => { - let string = string.eval()?; + let string = string.eval()?.to_string(); // The GNU docs say: // @@ -247,16 +277,16 @@ impl AstNode { // // So we coerce errors into 0 to make that the only case we // have to care about. - let pos: usize = pos.eval()?.parse().unwrap_or(0); - let length: usize = length.eval()?.parse().unwrap_or(0); + let pos: usize = pos.eval()?.to_usize().unwrap_or(0); + let length: usize = length.eval()?.to_usize().unwrap_or(0); let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { - return Ok(String::new()); + return Ok(NumOrStr::Str(String::new())); }; - Ok(string.chars().skip(pos).take(length).collect()) + Ok(NumOrStr::Str(string.chars().skip(pos).take(length).collect())) } - Self::Length { string } => Ok(string.eval()?.chars().count().to_string()), + Self::Length { string } => Ok(NumOrStr::Num(BigInt::from(string.eval()?.to_string().chars().count()))), } } } From f4141100836ff382b1e217b96c278e1d47e01b74 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:49:28 +0000 Subject: [PATCH 109/429] chore(deps): update vmactions/freebsd-vm action to v1.0.3 --- .github/workflows/freebsd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 02c0137e7b2..5261d103c59 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -39,7 +39,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.0.2 + uses: vmactions/freebsd-vm@v1.0.3 with: usesh: true sync: rsync @@ -131,7 +131,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.0.2 + uses: vmactions/freebsd-vm@v1.0.3 with: usesh: true sync: rsync From 49fb72ed21ddcb709426159b1336edded3778164 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 3 Dec 2023 14:07:03 +0100 Subject: [PATCH 110/429] du: disable test on Android --- tests/by-util/test_du.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c07de2851ee..10f32caf52e 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -441,6 +441,7 @@ fn test_du_inodes() { } } +#[cfg(not(target_os = "android"))] #[test] fn test_du_inodes_with_count_links() { let ts = TestScenario::new(util_name!()); From a6d15d67caf13c86aa2973ea45a7ed7758c8f50b Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 1 Dec 2023 15:15:44 +0100 Subject: [PATCH 111/429] ls: cleanup "spell-checker:ignore" entries --- src/uu/ls/src/ls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index cba9cdf5375..6e7fe405b16 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) cpio svgz webm somegroup nlink rmvb xspf tabsize dired subdired dtype +// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype use clap::{ builder::{NonEmptyStringValueParser, ValueParser}, From c77d389f5b93399bdeb6007f24d745c94e64f454 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 1 Dec 2023 16:19:13 +0100 Subject: [PATCH 112/429] ls: improve some var names related to block sizes --- src/uu/ls/src/ls.rs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 6e7fe405b16..3745cfc9df3 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -741,14 +741,14 @@ impl Config { let mut needs_color = extract_color(options); - let cmd_line_bs = options.get_one::(options::size::BLOCK_SIZE); - let opt_si = cmd_line_bs.is_some() + let opt_block_size = options.get_one::(options::size::BLOCK_SIZE); + let opt_si = opt_block_size.is_some() && options .get_one::(options::size::BLOCK_SIZE) .unwrap() .eq("si") || options.get_flag(options::size::SI); - let opt_hr = (cmd_line_bs.is_some() + let opt_hr = (opt_block_size.is_some() && options .get_one::(options::size::BLOCK_SIZE) .unwrap() @@ -756,9 +756,9 @@ impl Config { || options.get_flag(options::size::HUMAN_READABLE); let opt_kb = options.get_flag(options::size::KIBIBYTES); - let bs_env_var = std::env::var_os("BLOCK_SIZE"); - let ls_bs_env_var = std::env::var_os("LS_BLOCK_SIZE"); - let pc_env_var = std::env::var_os("POSIXLY_CORRECT"); + let env_var_block_size = std::env::var_os("BLOCK_SIZE"); + let env_var_ls_block_size = std::env::var_os("LS_BLOCK_SIZE"); + let env_var_posixly_correct = std::env::var_os("POSIXLY_CORRECT"); let size_format = if opt_si { SizeFormat::Decimal @@ -768,13 +768,13 @@ impl Config { SizeFormat::Bytes }; - let raw_bs = if let Some(cmd_line_bs) = cmd_line_bs { - OsString::from(cmd_line_bs) + let raw_block_size = if let Some(opt_block_size) = opt_block_size { + OsString::from(opt_block_size) } else if !opt_kb { - if let Some(ls_bs_env_var) = ls_bs_env_var { - ls_bs_env_var - } else if let Some(bs_env_var) = bs_env_var { - bs_env_var + if let Some(env_var_ls_block_size) = env_var_ls_block_size { + env_var_ls_block_size + } else if let Some(env_var_block_size) = env_var_block_size { + env_var_block_size } else { OsString::from("") } @@ -782,15 +782,17 @@ impl Config { OsString::from("") }; - let block_size: Option = if !opt_si && !opt_hr && !raw_bs.is_empty() { - match parse_size_u64(&raw_bs.to_string_lossy()) { + let block_size: Option = if !opt_si && !opt_hr && !raw_block_size.is_empty() { + match parse_size_u64(&raw_block_size.to_string_lossy()) { Ok(size) => Some(size), Err(_) => { - show!(LsError::BlockSizeParseError(cmd_line_bs.unwrap().clone())); + show!(LsError::BlockSizeParseError( + opt_block_size.unwrap().clone() + )); None } } - } else if let Some(pc) = pc_env_var { + } else if let Some(pc) = env_var_posixly_correct { if pc.as_os_str() == OsStr::new("true") || pc == OsStr::new("1") { Some(POSIXLY_CORRECT_BLOCK_SIZE) } else { From 51fc2d7564b29de22936e8ca4a73883aeca77662 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 1 Dec 2023 16:27:05 +0100 Subject: [PATCH 113/429] ls: ignore value of POSIXLY_CORRECT --- src/uu/ls/src/ls.rs | 10 +++------- tests/by-util/test_ls.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 3745cfc9df3..f645e31d3c2 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -20,7 +20,7 @@ use std::os::windows::fs::MetadataExt; use std::{ cmp::Reverse, error::Error, - ffi::{OsStr, OsString}, + ffi::OsString, fmt::{Display, Write as FmtWrite}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{stdout, BufWriter, ErrorKind, Stdout, Write}, @@ -792,12 +792,8 @@ impl Config { None } } - } else if let Some(pc) = env_var_posixly_correct { - if pc.as_os_str() == OsStr::new("true") || pc == OsStr::new("1") { - Some(POSIXLY_CORRECT_BLOCK_SIZE) - } else { - None - } + } else if env_var_posixly_correct.is_some() { + Some(POSIXLY_CORRECT_BLOCK_SIZE) } else { None }; diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 19a3f5578fa..fcd57170d48 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -3828,3 +3828,30 @@ fn test_ls_cf_output_should_be_delimited_by_tab() { .succeeds() .stdout_is("a2345/\tb/\n"); } + +#[cfg(all(unix, feature = "dd"))] +#[test] +fn test_posixly_correct() { + let scene = TestScenario::new(util_name!()); + + scene + .ccmd("dd") + .arg("if=/dev/zero") + .arg("of=file") + .arg("bs=1024") + .arg("count=1") + .succeeds(); + + scene + .ucmd() + .arg("-s") + .succeeds() + .stdout_contains_line("total 4"); + + scene + .ucmd() + .arg("-s") + .env("POSIXLY_CORRECT", "some_value") + .succeeds() + .stdout_contains_line("total 8"); +} From d8a64a90ece80fc029860f66d7bf8858c79f9e91 Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sun, 3 Dec 2023 15:09:12 +0000 Subject: [PATCH 114/429] Formatting fixes in expr --- src/uu/expr/src/syntax_tree.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 705864f3a6c..1c74b97103b 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -89,12 +89,8 @@ impl RelationOp { impl NumericOp { fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { - let a: BigInt = left - .eval()? - .to_bigint()?; - let b: BigInt = right - .eval()? - .to_bigint()?; + let a: BigInt = left.eval()?.to_bigint()?; + let b: BigInt = right.eval()?.to_bigint()?; Ok(NumOrStr::Num(match self { Self::Add => a + b, Self::Sub => a - b, @@ -207,25 +203,25 @@ pub enum NumOrStr { impl NumOrStr { pub fn to_usize(self: NumOrStr) -> Option { match self.to_bigint() { - Ok(num) => {num.to_usize()} - Err(_) => {None}, + Ok(num) => num.to_usize(), + Err(_) => None, } } pub fn to_string(self: &NumOrStr) -> String { match self { - NumOrStr::Num(num) => {num.to_string()} - NumOrStr::Str(str) => {str.to_string()}, + NumOrStr::Num(num) => num.to_string(), + NumOrStr::Str(str) => str.to_string(), } } pub fn to_bigint(self: NumOrStr) -> ExprResult { match self { - NumOrStr::Num(num) => {Ok(num)} - NumOrStr::Str(str) => { match str.parse::() { - Ok(val) => {Ok(val)}, - Err(_) => {Err(ExprError::NonIntegerArgument)} - }}, + NumOrStr::Num(num) => Ok(num), + NumOrStr::Str(str) => match str.parse::() { + Ok(val) => Ok(val), + Err(_) => Err(ExprError::NonIntegerArgument), + }, } } } @@ -284,9 +280,13 @@ impl AstNode { return Ok(NumOrStr::Str(String::new())); }; - Ok(NumOrStr::Str(string.chars().skip(pos).take(length).collect())) + Ok(NumOrStr::Str( + string.chars().skip(pos).take(length).collect(), + )) } - Self::Length { string } => Ok(NumOrStr::Num(BigInt::from(string.eval()?.to_string().chars().count()))), + Self::Length { string } => Ok(NumOrStr::Num(BigInt::from( + string.eval()?.to_string().chars().count(), + ))), } } } From f8573d555133f22cfdfbfa198a265138277d3f36 Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sun, 3 Dec 2023 20:03:50 +0000 Subject: [PATCH 115/429] code and styling fixes in expr --- src/uu/expr/src/expr.rs | 4 +- src/uu/expr/src/syntax_tree.rs | 126 +++++++++++++++++++++------------ 2 files changed, 81 insertions(+), 49 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 91d7a878820..b46034f845d 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -13,7 +13,7 @@ use uucore::{ format_usage, help_about, help_section, help_usage, }; -use crate::syntax_tree::is_truthy; +use crate::syntax_tree::{is_truthy, NumOrStr}; mod syntax_tree; @@ -110,7 +110,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let res = AstNode::parse(&token_strings)?.eval()?.to_string(); println!("{res}"); - if !is_truthy(&res) { + if !is_truthy(&NumOrStr::from(res)) { return Err(1.into()); } Ok(()) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 1c74b97103b..79ba8d9ae10 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -57,8 +57,8 @@ impl BinOp { impl RelationOp { fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { - let a = a.eval()?; - let b = b.eval()?; + let a = a.eval()?.coerce_num(); + let b = b.eval()?.coerce_num(); let b = if let (NumOrStr::Num(a), NumOrStr::Num(b)) = (&a, &b) { match self { Self::Lt => a < b, @@ -80,17 +80,17 @@ impl RelationOp { } }; if b { - Ok(NumOrStr::Num(BigInt::from(1))) + Ok(NumOrStr::from(1)) } else { - Ok(NumOrStr::Num(BigInt::from(0))) + Ok(NumOrStr::from(0)) } } } impl NumericOp { fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { - let a: BigInt = left.eval()?.to_bigint()?; - let b: BigInt = right.eval()?.to_bigint()?; + let a = left.eval()?.to_bigint()?; + let b = right.eval()?.to_bigint()?; Ok(NumOrStr::Num(match self { Self::Add => a + b, Self::Sub => a - b, @@ -114,23 +114,23 @@ impl StringOp { match self { Self::Or => { let left = left.eval()?; - if is_truthy(&left.to_string()) { + if is_truthy(&left) { return Ok(left); } let right = right.eval()?; - if is_truthy(&right.to_string()) { + if is_truthy(&right) { return Ok(right); } - Ok(NumOrStr::Num(BigInt::from(0))) + Ok(NumOrStr::from(0)) } Self::And => { let left = left.eval()?; - if !is_truthy(&left.to_string()) { - return Ok(NumOrStr::Num(BigInt::from(0))); + if !is_truthy(&left) { + return Ok(NumOrStr::from(0)); } let right = right.eval()?; - if !is_truthy(&right.to_string()) { - return Ok(NumOrStr::Num(BigInt::from(0))); + if !is_truthy(&right) { + return Ok(NumOrStr::from(0)); } Ok(left) } @@ -144,7 +144,7 @@ impl StringOp { Syntax::grep(), ) .map_err(|_| ExprError::InvalidRegexExpression)?; - Ok(NumOrStr::Str(if re.captures_len() > 0 { + Ok(NumOrStr::from(if re.captures_len() > 0 { re.captures(&left.to_string()) .map(|captures| captures.at(1).unwrap()) .unwrap_or("") @@ -155,16 +155,16 @@ impl StringOp { })) } Self::Index => { - let left = left.eval()?; - let right = right.eval()?; - for (current_idx, ch_h) in left.to_string().chars().enumerate() { + let left = left.eval()?.to_string(); + let right = right.eval()?.to_string(); + for (current_idx, ch_h) in left.chars().enumerate() { for ch_n in right.to_string().chars() { if ch_n == ch_h { - return Ok(NumOrStr::Num(BigInt::from(current_idx + 1))); + return Ok(NumOrStr::from(current_idx + 1)); } } } - Ok(NumOrStr::Num(BigInt::from(0))) + Ok(NumOrStr::from(0)) } } } @@ -200,27 +200,54 @@ pub enum NumOrStr { Str(String), } +impl From for NumOrStr { + fn from(num: usize) -> NumOrStr { + NumOrStr::Num(BigInt::from(num)) + } +} + +impl From for NumOrStr { + fn from(num: BigInt) -> NumOrStr { + NumOrStr::Num(num) + } +} + +impl From for NumOrStr { + fn from(str: String) -> NumOrStr { + NumOrStr::Str(str) + } +} + impl NumOrStr { - pub fn to_usize(self: NumOrStr) -> Option { + pub fn to_usize(self: Self) -> Option { match self.to_bigint() { Ok(num) => num.to_usize(), Err(_) => None, } } - pub fn to_string(self: &NumOrStr) -> String { + pub fn to_string(self: Self) -> String { match self { - NumOrStr::Num(num) => num.to_string(), - NumOrStr::Str(str) => str.to_string(), + Self::Num(num) => num.to_string(), + Self::Str(str) => str.to_string(), } } - pub fn to_bigint(self: NumOrStr) -> ExprResult { + pub fn to_bigint(self: Self) -> ExprResult { match self { - NumOrStr::Num(num) => Ok(num), - NumOrStr::Str(str) => match str.parse::() { - Ok(val) => Ok(val), - Err(_) => Err(ExprError::NonIntegerArgument), + Self::Num(num) => Ok(num), + Self::Str(str) => str + .parse::() + .map_err(|_| ExprError::NonIntegerArgument), + } + } + + pub fn coerce_num(self: Self) -> NumOrStr { + match self { + Self::Num(num) => Self::from(num), + Self::Str(str) => match str.parse::() { + Ok(num) => Self::from(num), + Err(_) => Self::from(str), }, } } @@ -253,7 +280,7 @@ impl AstNode { pub fn eval(&self) -> ExprResult { match self { - Self::Leaf { value } => Ok(NumOrStr::Str(value.to_string())), + Self::Leaf { value } => Ok(NumOrStr::from(value.to_string())), Self::BinOp { op_type, left, @@ -277,16 +304,16 @@ impl AstNode { let length: usize = length.eval()?.to_usize().unwrap_or(0); let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { - return Ok(NumOrStr::Str(String::new())); + return Ok(NumOrStr::from(String::new())); }; - Ok(NumOrStr::Str( - string.chars().skip(pos).take(length).collect(), + Ok(NumOrStr::from( + string.chars().skip(pos).take(length).collect::(), )) } - Self::Length { string } => Ok(NumOrStr::Num(BigInt::from( - string.eval()?.to_string().chars().count(), - ))), + Self::Length { string } => { + Ok(NumOrStr::from(string.eval()?.to_string().chars().count())) + } } } } @@ -429,21 +456,26 @@ impl<'a> Parser<'a> { /// Determine whether `expr` should evaluate the string as "truthy" /// /// Truthy strings are either empty or match the regex "-?0+". -pub fn is_truthy(s: &str) -> bool { - // Edge case: `-` followed by nothing is truthy - if s == "-" { - return true; - } +pub fn is_truthy(s: &NumOrStr) -> bool { + match s { + NumOrStr::Num(num) => num == &BigInt::from(0), + NumOrStr::Str(str) => { + // Edge case: `-` followed by nothing is truthy + if str == "-" { + return true; + } - let mut bytes = s.bytes(); + let mut bytes = str.bytes(); - // Empty string is falsy - let Some(first) = bytes.next() else { - return false; - }; + // Empty string is falsy + let Some(first) = bytes.next() else { + return false; + }; - let is_zero = (first == b'-' || first == b'0') && bytes.all(|b| b == b'0'); - !is_zero + let is_zero = (first == b'-' || first == b'0') && bytes.all(|b| b == b'0'); + !is_zero + } + } } #[cfg(test)] From 5672e3d9bdec3acc1d1ab22b0217b5fac17ab10b Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sun, 3 Dec 2023 22:07:56 +0000 Subject: [PATCH 116/429] Fix errors --- src/uu/expr/src/syntax_tree.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 79ba8d9ae10..a4cb99a8365 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (ToDO) ints paren prec multibytes -use num_bigint::BigInt; +use num_bigint::{BigInt, ParseBigIntError}; use num_traits::ToPrimitive; use onig::{Regex, RegexOptions, Syntax}; @@ -57,9 +57,9 @@ impl BinOp { impl RelationOp { fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { - let a = a.eval()?.coerce_num(); - let b = b.eval()?.coerce_num(); - let b = if let (NumOrStr::Num(a), NumOrStr::Num(b)) = (&a, &b) { + let a = a.eval()?; + let b = b.eval()?; + let b = if let (Ok(a), Ok(b)) = (&a.coerce_bigint(), &b.coerce_bigint()) { match self { Self::Lt => a < b, Self::Leq => a <= b, @@ -242,13 +242,10 @@ impl NumOrStr { } } - pub fn coerce_num(self: Self) -> NumOrStr { + pub fn coerce_bigint(self: &Self) -> Result { match self { - Self::Num(num) => Self::from(num), - Self::Str(str) => match str.parse::() { - Ok(num) => Self::from(num), - Err(_) => Self::from(str), - }, + Self::Num(num) => Ok(num.clone()), + Self::Str(str) => str.parse::(), } } } @@ -458,7 +455,7 @@ impl<'a> Parser<'a> { /// Truthy strings are either empty or match the regex "-?0+". pub fn is_truthy(s: &NumOrStr) -> bool { match s { - NumOrStr::Num(num) => num == &BigInt::from(0), + NumOrStr::Num(num) => num != &BigInt::from(0), NumOrStr::Str(str) => { // Edge case: `-` followed by nothing is truthy if str == "-" { From 21c041fa79b64d9f55b8672c4a74e22dd941fd96 Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sun, 3 Dec 2023 22:27:13 +0000 Subject: [PATCH 117/429] Fix lint issues in expr --- src/uu/expr/src/syntax_tree.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index a4cb99a8365..4f447e60f54 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -201,39 +201,39 @@ pub enum NumOrStr { } impl From for NumOrStr { - fn from(num: usize) -> NumOrStr { - NumOrStr::Num(BigInt::from(num)) + fn from(num: usize) -> Self { + Self::Num(BigInt::from(num)) } } impl From for NumOrStr { - fn from(num: BigInt) -> NumOrStr { - NumOrStr::Num(num) + fn from(num: BigInt) -> Self { + Self::Num(num) } } impl From for NumOrStr { - fn from(str: String) -> NumOrStr { - NumOrStr::Str(str) + fn from(str: String) -> Self { + Self::Str(str) } } impl NumOrStr { - pub fn to_usize(self: Self) -> Option { + pub fn to_usize(self) -> Option { match self.to_bigint() { Ok(num) => num.to_usize(), Err(_) => None, } } - pub fn to_string(self: Self) -> String { + pub fn to_string(self) -> String { match self { Self::Num(num) => num.to_string(), Self::Str(str) => str.to_string(), } } - pub fn to_bigint(self: Self) -> ExprResult { + pub fn to_bigint(self) -> ExprResult { match self { Self::Num(num) => Ok(num), Self::Str(str) => str @@ -242,7 +242,7 @@ impl NumOrStr { } } - pub fn coerce_bigint(self: &Self) -> Result { + pub fn coerce_bigint(&self) -> Result { match self { Self::Num(num) => Ok(num.clone()), Self::Str(str) => str.parse::(), From 9ecd6a296e06b6f20a5bc29f876a546cdafd020d Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Sun, 3 Dec 2023 23:32:51 +0000 Subject: [PATCH 118/429] Refactoring for lint issues --- src/uu/expr/src/expr.rs | 2 +- src/uu/expr/src/syntax_tree.rs | 53 ++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index b46034f845d..1a9bb07de4a 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -108,7 +108,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(|v| v.into_iter().map(|s| s.as_ref()).collect::>()) .unwrap_or_default(); - let res = AstNode::parse(&token_strings)?.eval()?.to_string(); + let res: String = AstNode::parse(&token_strings)?.eval()?.into(); println!("{res}"); if !is_truthy(&NumOrStr::from(res)) { return Err(1.into()); diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 4f447e60f54..7677b5e7e54 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -59,7 +59,7 @@ impl RelationOp { fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { let a = a.eval()?; let b = b.eval()?; - let b = if let (Ok(a), Ok(b)) = (&a.coerce_bigint(), &b.coerce_bigint()) { + let b = if let (Ok(a), Ok(b)) = (&a.to_bigint(), &b.to_bigint()) { match self { Self::Lt => a < b, Self::Leq => a <= b, @@ -89,8 +89,8 @@ impl RelationOp { impl NumericOp { fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { - let a = left.eval()?.to_bigint()?; - let b = right.eval()?.to_bigint()?; + let a = >>::into(left.eval()?)?; + let b = >>::into(right.eval()?)?; Ok(NumOrStr::Num(match self { Self::Add => a + b, Self::Sub => a - b, @@ -135,9 +135,9 @@ impl StringOp { Ok(left) } Self::Match => { - let left = left.eval()?; - let right = right.eval()?; - let re_string = format!("^{}", right.to_string()); + let left: String = left.eval()?.into(); + let right: String = right.eval()?.into(); + let re_string = format!("^{}", right); let re = Regex::with_options( &re_string, RegexOptions::REGEX_OPTION_NONE, @@ -145,18 +145,18 @@ impl StringOp { ) .map_err(|_| ExprError::InvalidRegexExpression)?; Ok(NumOrStr::from(if re.captures_len() > 0 { - re.captures(&left.to_string()) + re.captures(&left) .map(|captures| captures.at(1).unwrap()) .unwrap_or("") .to_string() } else { - re.find(&left.to_string()) + re.find(&left) .map_or("0".to_string(), |(start, end)| (end - start).to_string()) })) } Self::Index => { - let left = left.eval()?.to_string(); - let right = right.eval()?.to_string(); + let left: String = left.eval()?.into(); + let right: String = right.eval()?.into(); for (current_idx, ch_h) in left.chars().enumerate() { for ch_n in right.to_string().chars() { if ch_n == ch_h { @@ -218,22 +218,26 @@ impl From for NumOrStr { } } -impl NumOrStr { - pub fn to_usize(self) -> Option { - match self.to_bigint() { +impl Into> for NumOrStr { + fn into(self) -> Option { + match self.into() { Ok(num) => num.to_usize(), Err(_) => None, } } +} - pub fn to_string(self) -> String { +impl Into for NumOrStr { + fn into(self) -> String { match self { Self::Num(num) => num.to_string(), Self::Str(str) => str.to_string(), } } +} - pub fn to_bigint(self) -> ExprResult { +impl Into> for NumOrStr { + fn into(self) -> ExprResult { match self { Self::Num(num) => Ok(num), Self::Str(str) => str @@ -241,8 +245,10 @@ impl NumOrStr { .map_err(|_| ExprError::NonIntegerArgument), } } +} - pub fn coerce_bigint(&self) -> Result { +impl NumOrStr { + pub fn to_bigint(&self) -> Result { match self { Self::Num(num) => Ok(num.clone()), Self::Str(str) => str.parse::(), @@ -288,7 +294,7 @@ impl AstNode { pos, length, } => { - let string = string.eval()?.to_string(); + let string: String = string.eval()?.into(); // The GNU docs say: // @@ -297,8 +303,9 @@ impl AstNode { // // So we coerce errors into 0 to make that the only case we // have to care about. - let pos: usize = pos.eval()?.to_usize().unwrap_or(0); - let length: usize = length.eval()?.to_usize().unwrap_or(0); + let pos: usize = >>::into(pos.eval()?).unwrap_or(0); + let length: usize = + >>::into(length.eval()?).unwrap_or(0); let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { return Ok(NumOrStr::from(String::new())); @@ -308,9 +315,11 @@ impl AstNode { string.chars().skip(pos).take(length).collect::(), )) } - Self::Length { string } => { - Ok(NumOrStr::from(string.eval()?.to_string().chars().count())) - } + Self::Length { string } => Ok(NumOrStr::from( + >::into(string.eval()?) + .chars() + .count(), + )), } } } From cf853df2dcf4309049ad3a0ff3ab531aabcb86ec Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 4 Dec 2023 14:48:25 +0100 Subject: [PATCH 119/429] ls: remove "#[allow(unused_variables)]" --- src/uu/ls/src/ls.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index cba9cdf5375..626e17ca14d 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -2946,7 +2946,6 @@ fn classify_file(path: &PathData, out: &mut BufWriter) -> Option { /// /// Note that non-unicode sequences in symlink targets are dealt with using /// [`std::path::Path::to_string_lossy`]. -#[allow(unused_variables)] #[allow(clippy::cognitive_complexity)] fn display_file_name( path: &PathData, From d4b22a192d9c4af1ed2b92c0b0197a790536e981 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 4 Dec 2023 19:09:30 +0000 Subject: [PATCH 120/429] chore(deps): update vmactions/freebsd-vm action to v1.0.4 --- .github/workflows/freebsd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 5261d103c59..27537cf6dab 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -39,7 +39,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.0.3 + uses: vmactions/freebsd-vm@v1.0.4 with: usesh: true sync: rsync @@ -131,7 +131,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.0.3 + uses: vmactions/freebsd-vm@v1.0.4 with: usesh: true sync: rsync From 9ef43191ff28af359989848dc65548aa253e4337 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 4 Dec 2023 21:20:47 +0100 Subject: [PATCH 121/429] Document that \0NNN and \xHH need more work --- fuzz/fuzz_targets/fuzz_echo.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/fuzz/fuzz_targets/fuzz_echo.rs b/fuzz/fuzz_targets/fuzz_echo.rs index 826fd6da3eb..fda7fd7276f 100644 --- a/fuzz/fuzz_targets/fuzz_echo.rs +++ b/fuzz/fuzz_targets/fuzz_echo.rs @@ -51,6 +51,7 @@ fn generate_escape_sequence(rng: &mut impl Rng) -> String { let escape_sequences = [ "\\\\", "\\a", "\\b", "\\c", "\\e", "\\f", "\\n", "\\r", "\\t", "\\v", "\\0NNN", "\\xHH", ]; + // \0NNN and \xHH need more work escape_sequences.choose(rng).unwrap().to_string() } From f5776bc511ec0fbe5dac93af190ef8a3c8d610be Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 4 Dec 2023 22:40:18 +0100 Subject: [PATCH 122/429] fix comment Co-authored-by: Terts Diepraam --- src/uucore/src/lib/features/colors.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 81d117a50b3..1f55db5a7d2 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -78,11 +78,11 @@ pub static FILE_TYPES: Lazy> = L ] }); -/* -# List any file extensions like '.gz' or '.tar' that you would like ls -# to color below. Put the extension, a space, and the color init string. -# (and any comments you want to add after a '#') -*/ +/// Colors for file types +/// +/// List any file extensions like '.gz' or '.tar' that you would like ls +/// to color below. Put the extension, a space, and the color init string. +/// (and any comments you want to add after a '#') pub static FILE_COLORS: Lazy> = Lazy::new(|| { vec![ /* From dabbcff9fbbbce7c0ec692431f73fb17657c8199 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 3 Dec 2023 10:34:33 +0100 Subject: [PATCH 123/429] dircolors: manage the --print-ls-colors pipe option --- src/uu/dircolors/src/dircolors.rs | 17 +++++++++++++---- tests/by-util/test_dircolors.rs | 10 ++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 28d74775db8..cf8ed62922c 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -9,6 +9,7 @@ use std::borrow::Borrow; use std::env; use std::fmt::Write; use std::fs::File; +use std::io::IsTerminal; use std::io::{BufRead, BufReader}; use std::path::Path; @@ -192,9 +193,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result; if files.is_empty() { - println!("{}", generate_ls_colors(&out_format, ":")); - - return Ok(()); + // Check if data is being piped into the program + if std::io::stdin().is_terminal() { + // No data piped, use default behavior + println!("{}", generate_ls_colors(&out_format, ":")); + return Ok(()); + } else { + // Data is piped, process the input from stdin + let fin = BufReader::new(std::io::stdin()); + result = parse(fin.lines().map_while(Result::ok), &out_format, "-"); + } } else if files.len() > 1 { return Err(UUsageError::new( 1, @@ -376,7 +384,8 @@ where let (key, val) = line.split_two(); if val.is_empty() { return Err(format!( - "{}:{}: invalid line; missing second token", + // The double space is what GNU is doing + "{}:{}: invalid line; missing second token", fp.maybe_quote(), num )); diff --git a/tests/by-util/test_dircolors.rs b/tests/by-util/test_dircolors.rs index d4fa0a3b0a3..e3752fcde98 100644 --- a/tests/by-util/test_dircolors.rs +++ b/tests/by-util/test_dircolors.rs @@ -159,6 +159,16 @@ fn test_quoting() { .no_stderr(); } +#[test] +fn test_print_ls_colors() { + new_ucmd!() + .pipe_in("OWT 40;33\n") + .args(&["--print-ls-colors"]) + .succeeds() + .stdout_is("\x1B[40;33mtw\t40;33\x1B[0m\n") + .no_stderr(); +} + #[test] fn test_extra_operand() { new_ucmd!() From 3e354109076eb1f806715e0d5c89006fd2a1a12a Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 4 Dec 2023 22:47:42 +0100 Subject: [PATCH 124/429] dircolors: fix comments --- src/uucore/src/lib/features/colors.rs | 512 +++++++++++++------------- 1 file changed, 250 insertions(+), 262 deletions(-) diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 1f55db5a7d2..81d2fd3f336 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -3,274 +3,262 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // cSpell:disable -use once_cell::sync::Lazy; -/* The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the - * slackware version of dircolors) are recognized but ignored. - * Global config options can be specified before TERM or COLORTERM entries - * below are TERM or COLORTERM entries, which can be glob patterns, which - * restrict following config to systems with matching environment variables. -*/ -pub static TERMS: Lazy> = Lazy::new(|| { - vec![ - "Eterm", - "ansi", - "*color*", - "con[0-9]*x[0-9]*", - "cons25", - "console", - "cygwin", - "*direct*", - "dtterm", - "gnome", - "hurd", - "jfbterm", - "konsole", - "kterm", - "linux", - "linux-c", - "mlterm", - "putty", - "rxvt*", - "screen*", - "st", - "terminator", - "tmux*", - "vt100", - "xterm*", - ] -}); +/// The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the +/// slackware version of dircolors) are recognized but ignored. +/// Global config options can be specified before TERM or COLORTERM entries +/// below are TERM or COLORTERM entries, which can be glob patterns, which +/// restrict following config to systems with matching environment variables. +pub static TERMS: &[&str] = &[ + "Eterm", + "ansi", + "*color*", + "con[0-9]*x[0-9]*", + "cons25", + "console", + "cygwin", + "*direct*", + "dtterm", + "gnome", + "hurd", + "jfbterm", + "konsole", + "kterm", + "linux", + "linux-c", + "mlterm", + "putty", + "rxvt*", + "screen*", + "st", + "terminator", + "tmux*", + "vt100", + "xterm*", +]; -/* -# Below are the color init strings for the basic file types. -# One can use codes for 256 or more colors supported by modern terminals. -# The default color codes use the capabilities of an 8 color terminal -# with some additional attributes as per the following codes: -# Attribute codes: -# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed -# Text color codes: -# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white -# Background color codes: -# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white -#NORMAL 00 # no color code at all -#FILE 00 # regular file: use no color at all -*/ -pub static FILE_TYPES: Lazy> = Lazy::new(|| { - vec![ - ("RESET", "rs", "0"), // reset to "normal" color - ("DIR", "di", "01;34"), // directory - ("LINK", "ln", "01;36"), // symbolic link - ("MULTIHARDLINK", "mh", "00"), // regular file with more than one link - ("FIFO", "pi", "40;33"), // pipe - ("SOCK", "so", "01;35"), // socket - ("DOOR", "do", "01;35"), // door - ("BLK", "bd", "40;33;01"), // block device driver - ("CHR", "cd", "40;33;01"), // character device driver - ("ORPHAN", "or", "40;31;01"), // symlink to nonexistent file, or non-stat'able file - ("MISSING", "mi", "00"), // ... and the files they point to - ("SETUID", "su", "37;41"), // file that is setuid (u+s) - ("SETGID", "sg", "30;43"), // file that is setgid (g+s) - ("CAPABILITY", "ca", "00"), // file with capability - ("STICKY_OTHER_WRITABLE", "tw", "30;42"), // dir that is sticky and other-writable (+t,o+w) - ("OTHER_WRITABLE", "ow", "34;42"), // dir that is other-writable (o+w) and not sticky - ("STICKY", "st", "37;44"), // dir with the sticky bit set (+t) and not other-writable - ("EXEC", "ex", "01;32"), // files with execute permission - ] -}); +/// Below are the color init strings for the basic file types. +/// One can use codes for 256 or more colors supported by modern terminals. +/// The default color codes use the capabilities of an 8 color terminal +/// with some additional attributes as per the following codes: +/// Attribute codes: +/// 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed +/// Text color codes: +/// 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white +/// Background color codes: +/// 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white +/// #NORMAL 00 /// no color code at all +/// #FILE 00 /// regular file: use no color at all +pub static FILE_TYPES: &[(&str, &str, &str)] = &[ + ("RESET", "rs", "0"), // reset to "normal" color + ("DIR", "di", "01;34"), // directory + ("LINK", "ln", "01;36"), // symbolic link + ("MULTIHARDLINK", "mh", "00"), // regular file with more than one link + ("FIFO", "pi", "40;33"), // pipe + ("SOCK", "so", "01;35"), // socket + ("DOOR", "do", "01;35"), // door + ("BLK", "bd", "40;33;01"), // block device driver + ("CHR", "cd", "40;33;01"), // character device driver + ("ORPHAN", "or", "40;31;01"), // symlink to nonexistent file, or non-stat'able file + ("MISSING", "mi", "00"), // ... and the files they point to + ("SETUID", "su", "37;41"), // file that is setuid (u+s) + ("SETGID", "sg", "30;43"), // file that is setgid (g+s) + ("CAPABILITY", "ca", "00"), // file with capability + ("STICKY_OTHER_WRITABLE", "tw", "30;42"), // dir that is sticky and other-writable (+t,o+w) + ("OTHER_WRITABLE", "ow", "34;42"), // dir that is other-writable (o+w) and not sticky + ("STICKY", "st", "37;44"), // dir with the sticky bit set (+t) and not other-writable + ("EXEC", "ex", "01;32"), // files with execute permission +]; /// Colors for file types /// /// List any file extensions like '.gz' or '.tar' that you would like ls /// to color below. Put the extension, a space, and the color init string. /// (and any comments you want to add after a '#') -pub static FILE_COLORS: Lazy> = Lazy::new(|| { - vec![ - /* - // Executables (Windows) - (".cmd", "01;32"), - (".exe", "01;32"), - (".com", "01;32"), - (".btm", "01;32"), - (".bat", "01;32"), - (".sh", "01;32"), - (".csh", "01;32"),*/ - // Archives or compressed - (".tar", "01;31"), - (".tgz", "01;31"), - (".arc", "01;31"), - (".arj", "01;31"), - (".taz", "01;31"), - (".lha", "01;31"), - (".lz4", "01;31"), - (".lzh", "01;31"), - (".lzma", "01;31"), - (".tlz", "01;31"), - (".txz", "01;31"), - (".tzo", "01;31"), - (".t7z", "01;31"), - (".zip", "01;31"), - (".z", "01;31"), - (".dz", "01;31"), - (".gz", "01;31"), - (".lrz", "01;31"), - (".lz", "01;31"), - (".lzo", "01;31"), - (".xz", "01;31"), - (".zst", "01;31"), - (".tzst", "01;31"), - (".bz2", "01;31"), - (".bz", "01;31"), - (".tbz", "01;31"), - (".tbz2", "01;31"), - (".tz", "01;31"), - (".deb", "01;31"), - (".rpm", "01;31"), - (".jar", "01;31"), - (".war", "01;31"), - (".ear", "01;31"), - (".sar", "01;31"), - (".rar", "01;31"), - (".alz", "01;31"), - (".ace", "01;31"), - (".zoo", "01;31"), - (".cpio", "01;31"), - (".7z", "01;31"), - (".rz", "01;31"), - (".cab", "01;31"), - (".wim", "01;31"), - (".swm", "01;31"), - (".dwm", "01;31"), - (".esd", "01;31"), - // Image formats - (".avif", "01;35"), - (".jpg", "01;35"), - (".jpeg", "01;35"), - (".mjpg", "01;35"), - (".mjpeg", "01;35"), - (".gif", "01;35"), - (".bmp", "01;35"), - (".pbm", "01;35"), - (".pgm", "01;35"), - (".ppm", "01;35"), - (".tga", "01;35"), - (".xbm", "01;35"), - (".xpm", "01;35"), - (".tif", "01;35"), - (".tiff", "01;35"), - (".png", "01;35"), - (".svg", "01;35"), - (".svgz", "01;35"), - (".mng", "01;35"), - (".pcx", "01;35"), - (".mov", "01;35"), - (".mpg", "01;35"), - (".mpeg", "01;35"), - (".m2v", "01;35"), - (".mkv", "01;35"), - (".webm", "01;35"), - (".webp", "01;35"), - (".ogm", "01;35"), - (".mp4", "01;35"), - (".m4v", "01;35"), - (".mp4v", "01;35"), - (".vob", "01;35"), - (".qt", "01;35"), - (".nuv", "01;35"), - (".wmv", "01;35"), - (".asf", "01;35"), - (".rm", "01;35"), - (".rmvb", "01;35"), - (".flc", "01;35"), - (".avi", "01;35"), - (".fli", "01;35"), - (".flv", "01;35"), - (".gl", "01;35"), - (".dl", "01;35"), - (".xcf", "01;35"), - (".xwd", "01;35"), - (".yuv", "01;35"), - (".cgm", "01;35"), - (".emf", "01;35"), - // https://wiki.xiph.org/MIME_Types_and_File_Extensions - (".ogv", "01;35"), - (".ogx", "01;35"), - // Audio formats - (".aac", "00;36"), - (".au", "00;36"), - (".flac", "00;36"), - (".m4a", "00;36"), - (".mid", "00;36"), - (".midi", "00;36"), - (".mka", "00;36"), - (".mp3", "00;36"), - (".mpc", "00;36"), - (".ogg", "00;36"), - (".ra", "00;36"), - (".wav", "00;36"), - // https://wiki.xiph.org/MIME_Types_and_File_Extensions - (".oga", "00;36"), - (".opus", "00;36"), - (".spx", "00;36"), - (".xspf", "00;36"), - // Backup files - ("~", "00;90"), - ("#", "00;90"), - (".bak", "00;90"), - (".old", "00;90"), - (".orig", "00;90"), - (".part", "00;90"), - (".rej", "00;90"), - (".swp", "00;90"), - (".tmp", "00;90"), - (".dpkg-dist", "00;90"), - (".dpkg-old", "00;90"), - (".ucf-dist", "00;90"), - (".ucf-new", "00;90"), - (".ucf-old", "00;90"), - (".rpmnew", "00;90"), - (".rpmorig", "00;90"), - (".rpmsave", "00;90"), - ] -}); +pub static FILE_COLORS: &[(&str, &str)] = &[ + /* + // Executables (Windows) + (".cmd", "01;32"), + (".exe", "01;32"), + (".com", "01;32"), + (".btm", "01;32"), + (".bat", "01;32"), + (".sh", "01;32"), + (".csh", "01;32"),*/ + // Archives or compressed + (".tar", "01;31"), + (".tgz", "01;31"), + (".arc", "01;31"), + (".arj", "01;31"), + (".taz", "01;31"), + (".lha", "01;31"), + (".lz4", "01;31"), + (".lzh", "01;31"), + (".lzma", "01;31"), + (".tlz", "01;31"), + (".txz", "01;31"), + (".tzo", "01;31"), + (".t7z", "01;31"), + (".zip", "01;31"), + (".z", "01;31"), + (".dz", "01;31"), + (".gz", "01;31"), + (".lrz", "01;31"), + (".lz", "01;31"), + (".lzo", "01;31"), + (".xz", "01;31"), + (".zst", "01;31"), + (".tzst", "01;31"), + (".bz2", "01;31"), + (".bz", "01;31"), + (".tbz", "01;31"), + (".tbz2", "01;31"), + (".tz", "01;31"), + (".deb", "01;31"), + (".rpm", "01;31"), + (".jar", "01;31"), + (".war", "01;31"), + (".ear", "01;31"), + (".sar", "01;31"), + (".rar", "01;31"), + (".alz", "01;31"), + (".ace", "01;31"), + (".zoo", "01;31"), + (".cpio", "01;31"), + (".7z", "01;31"), + (".rz", "01;31"), + (".cab", "01;31"), + (".wim", "01;31"), + (".swm", "01;31"), + (".dwm", "01;31"), + (".esd", "01;31"), + // Image formats + (".avif", "01;35"), + (".jpg", "01;35"), + (".jpeg", "01;35"), + (".mjpg", "01;35"), + (".mjpeg", "01;35"), + (".gif", "01;35"), + (".bmp", "01;35"), + (".pbm", "01;35"), + (".pgm", "01;35"), + (".ppm", "01;35"), + (".tga", "01;35"), + (".xbm", "01;35"), + (".xpm", "01;35"), + (".tif", "01;35"), + (".tiff", "01;35"), + (".png", "01;35"), + (".svg", "01;35"), + (".svgz", "01;35"), + (".mng", "01;35"), + (".pcx", "01;35"), + (".mov", "01;35"), + (".mpg", "01;35"), + (".mpeg", "01;35"), + (".m2v", "01;35"), + (".mkv", "01;35"), + (".webm", "01;35"), + (".webp", "01;35"), + (".ogm", "01;35"), + (".mp4", "01;35"), + (".m4v", "01;35"), + (".mp4v", "01;35"), + (".vob", "01;35"), + (".qt", "01;35"), + (".nuv", "01;35"), + (".wmv", "01;35"), + (".asf", "01;35"), + (".rm", "01;35"), + (".rmvb", "01;35"), + (".flc", "01;35"), + (".avi", "01;35"), + (".fli", "01;35"), + (".flv", "01;35"), + (".gl", "01;35"), + (".dl", "01;35"), + (".xcf", "01;35"), + (".xwd", "01;35"), + (".yuv", "01;35"), + (".cgm", "01;35"), + (".emf", "01;35"), + // https://wiki.xiph.org/MIME_Types_and_File_Extensions + (".ogv", "01;35"), + (".ogx", "01;35"), + // Audio formats + (".aac", "00;36"), + (".au", "00;36"), + (".flac", "00;36"), + (".m4a", "00;36"), + (".mid", "00;36"), + (".midi", "00;36"), + (".mka", "00;36"), + (".mp3", "00;36"), + (".mpc", "00;36"), + (".ogg", "00;36"), + (".ra", "00;36"), + (".wav", "00;36"), + // https://wiki.xiph.org/MIME_Types_and_File_Extensions + (".oga", "00;36"), + (".opus", "00;36"), + (".spx", "00;36"), + (".xspf", "00;36"), + // Backup files + ("~", "00;90"), + ("#", "00;90"), + (".bak", "00;90"), + (".old", "00;90"), + (".orig", "00;90"), + (".part", "00;90"), + (".rej", "00;90"), + (".swp", "00;90"), + (".tmp", "00;90"), + (".dpkg-dist", "00;90"), + (".dpkg-old", "00;90"), + (".ucf-dist", "00;90"), + (".ucf-new", "00;90"), + (".ucf-old", "00;90"), + (".rpmnew", "00;90"), + (".rpmorig", "00;90"), + (".rpmsave", "00;90"), +]; -pub static FILE_ATTRIBUTE_CODES: Lazy> = Lazy::new(|| { - vec![ - ("normal", "no"), - ("norm", "no"), - ("file", "fi"), - ("reset", "rs"), - ("dir", "di"), - ("lnk", "ln"), - ("link", "ln"), - ("symlink", "ln"), - ("orphan", "or"), - ("missing", "mi"), - ("fifo", "pi"), - ("pipe", "pi"), - ("sock", "so"), - ("blk", "bd"), - ("block", "bd"), - ("chr", "cd"), - ("char", "cd"), - ("door", "do"), - ("exec", "ex"), - ("left", "lc"), - ("leftcode", "lc"), - ("right", "rc"), - ("rightcode", "rc"), - ("end", "ec"), - ("endcode", "ec"), - ("suid", "su"), - ("setuid", "su"), - ("sgid", "sg"), - ("setgid", "sg"), - ("sticky", "st"), - ("other_writable", "ow"), - ("owr", "ow"), - ("sticky_other_writable", "tw"), - ("owt", "tw"), - ("capability", "ca"), - ("multihardlink", "mh"), - ("clrtoeol", "cl"), - ] -}); +pub static FILE_ATTRIBUTE_CODES: &[(&str, &str)] = &[ + ("normal", "no"), + ("norm", "no"), + ("file", "fi"), + ("reset", "rs"), + ("dir", "di"), + ("lnk", "ln"), + ("link", "ln"), + ("symlink", "ln"), + ("orphan", "or"), + ("missing", "mi"), + ("fifo", "pi"), + ("pipe", "pi"), + ("sock", "so"), + ("blk", "bd"), + ("block", "bd"), + ("chr", "cd"), + ("char", "cd"), + ("door", "do"), + ("exec", "ex"), + ("left", "lc"), + ("leftcode", "lc"), + ("right", "rc"), + ("rightcode", "rc"), + ("end", "ec"), + ("endcode", "ec"), + ("suid", "su"), + ("setuid", "su"), + ("sgid", "sg"), + ("setgid", "sg"), + ("sticky", "st"), + ("other_writable", "ow"), + ("owr", "ow"), + ("sticky_other_writable", "tw"), + ("owt", "tw"), + ("capability", "ca"), + ("multihardlink", "mh"), + ("clrtoeol", "cl"), +]; From 1c9413e185c8eb16175761b929a373578f0607e3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 4 Dec 2023 22:51:08 +0100 Subject: [PATCH 125/429] bring back the old format --- src/uucore/src/lib/features/colors.rs | 4 ++-- tests/fixtures/dircolors/internal.expected | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index 81d2fd3f336..e0de8b1e3e3 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -204,8 +204,8 @@ pub static FILE_COLORS: &[(&str, &str)] = &[ (".spx", "00;36"), (".xspf", "00;36"), // Backup files - ("~", "00;90"), - ("#", "00;90"), + ("*~", "00;90"), + ("*#", "00;90"), (".bak", "00;90"), (".old", "00;90"), (".orig", "00;90"), diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected index 933e70bc470..e151973f200 100644 --- a/tests/fixtures/dircolors/internal.expected +++ b/tests/fixtures/dircolors/internal.expected @@ -176,8 +176,8 @@ EXEC 01;32 .opus 00;36 .spx 00;36 .xspf 00;36 -~ 00;90 -# 00;90 +*~ 00;90 +*# 00;90 .bak 00;90 .old 00;90 .orig 00;90 From b0fdb1edef1a197c95faf83ffbb325190dfee9c3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 4 Dec 2023 23:25:13 +0100 Subject: [PATCH 126/429] Rest of the comments --- src/uu/dircolors/src/dircolors.rs | 158 +++++++++--------------------- tests/by-util/test_dircolors.rs | 2 + 2 files changed, 48 insertions(+), 112 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index cf8ed62922c..41e640b2a5f 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -7,9 +7,8 @@ use std::borrow::Borrow; use std::env; -use std::fmt::Write; use std::fs::File; -use std::io::IsTerminal; +//use std::io::IsTerminal; use std::io::{BufRead, BufReader}; use std::path::Path; @@ -99,16 +98,20 @@ fn generate_ls_colors(fmt: &OutputFmt, sep: &str) -> String { let mut display_parts = vec![]; let type_output = generate_type_output(fmt); display_parts.push(type_output); - for &(extension, code) in FILE_COLORS.iter() { - display_parts.push(format!("\x1b[{}m*{}\t{}\x1b[0m", code, extension, code)); + for &(extension, code) in FILE_COLORS { + let prefix = if extension.starts_with('*') { "" } else { "*" }; + let formatted_extension = + format!("\x1b[{}m{}{}\t{}\x1b[0m", code, prefix, extension, code); + display_parts.push(formatted_extension); } display_parts.join("\n") } _ => { // existing logic for other formats let mut parts = vec![]; - for &(extension, code) in FILE_COLORS.iter() { - let formatted_extension = format!("*{}", extension); + for &(extension, code) in FILE_COLORS { + let prefix = if extension.starts_with('*') { "" } else { "*" }; + let formatted_extension = format!("{}{}", prefix, extension); parts.push(format!("{}={}", formatted_extension, code)); } let (prefix, suffix) = get_colors_format_strings(fmt); @@ -193,6 +196,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result; if files.is_empty() { + println!("{}", generate_ls_colors(&out_format, ":")); + return Ok(()); + /* // Check if data is being piped into the program if std::io::stdin().is_terminal() { // No data piped, use default behavior @@ -203,6 +209,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let fin = BufReader::new(std::io::stdin()); result = parse(fin.lines().map_while(Result::ok), &out_format, "-"); } + */ } else if files.len() > 1 { return Err(UUsageError::new( 1, @@ -474,117 +481,44 @@ fn escape(s: &str) -> String { pub fn generate_dircolors_config() -> String { let mut config = String::new(); - // Adding the complete header comments as in the original file - writeln!( - config, - "# Configuration file for dircolors, a utility to help you set the" - ) - .unwrap(); - writeln!( - config, - "# LS_COLORS environment variable used by GNU ls with the --color option." - ) - .unwrap(); - writeln!( - config, - "# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the" - ) - .unwrap(); - writeln!( - config, - "# slackware version of dircolors) are recognized but ignored." - ) - .unwrap(); - writeln!( - config, - "# Global config options can be specified before TERM or COLORTERM entries" - ) - .unwrap(); - writeln!( - config, - "# Below are TERM or COLORTERM entries, which can be glob patterns, which" - ) - .unwrap(); - writeln!( - config, - "# restrict following config to systems with matching environment variables." - ) - .unwrap(); - writeln!(config, "COLORTERM ?*").unwrap(); - for term in TERMS.iter() { - writeln!(config, "TERM {}", term).unwrap(); + config.push_str("# Configuration file for dircolors, a utility to help you set the\n"); + config.push_str("# LS_COLORS environment variable used by GNU ls with the --color option.\n"); + config.push_str("# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the\n"); + config.push_str("# slackware version of dircolors) are recognized but ignored.\n"); + config.push_str("# Global config options can be specified before TERM or COLORTERM entries\n"); + config.push_str("# Below are TERM or COLORTERM entries, which can be glob patterns, which\n"); + config + .push_str("# restrict following config to systems with matching environment variables.\n"); + config.push_str("COLORTERM ?*\n"); + for term in TERMS { + config.push_str(&format!("TERM {}\n", term)); } - // Adding file types and their color codes with header - writeln!( - config, - "# Below are the color init strings for the basic file types." - ) - .unwrap(); - writeln!( - config, - "# One can use codes for 256 or more colors supported by modern terminals." - ) - .unwrap(); - writeln!( - config, - "# The default color codes use the capabilities of an 8 color terminal" - ) - .unwrap(); - writeln!( - config, - "# with some additional attributes as per the following codes:" - ) - .unwrap(); - writeln!(config, "# Attribute codes:").unwrap(); - writeln!( - config, - "# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed" - ) - .unwrap(); - writeln!(config, "# Text color codes:").unwrap(); - writeln!( - config, - "# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white" - ) - .unwrap(); - writeln!(config, "# Background color codes:").unwrap(); - writeln!( - config, - "# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white" - ) - .unwrap(); - writeln!(config, "#NORMAL 00 # no color code at all").unwrap(); - writeln!(config, "#FILE 00 # regular file: use no color at all").unwrap(); - - for (name, _, code) in FILE_TYPES.iter() { - writeln!(config, "{} {}", name, code).unwrap(); + config.push_str("# Below are the color init strings for the basic file types.\n"); + config.push_str("# One can use codes for 256 or more colors supported by modern terminals.\n"); + config.push_str("# The default color codes use the capabilities of an 8 color terminal\n"); + config.push_str("# with some additional attributes as per the following codes:\n"); + config.push_str("# Attribute codes:\n"); + config.push_str("# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed\n"); + config.push_str("# Text color codes:\n"); + config.push_str("# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white\n"); + config.push_str("# Background color codes:\n"); + config.push_str("# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white\n"); + config.push_str("#NORMAL 00 # no color code at all\n"); + config.push_str("#FILE 00 # regular file: use no color at all\n"); + + for (name, _, code) in FILE_TYPES { + config.push_str(&format!("{} {}\n", name, code)); } - writeln!( - config, - "# List any file extensions like '.gz' or '.tar' that you would like ls" - ) - .unwrap(); - writeln!( - config, - "# to color below. Put the extension, a space, and the color init string." - ) - .unwrap(); - - for (ext, color) in FILE_COLORS.iter() { - writeln!(config, "{} {}", ext, color).unwrap(); + config.push_str("# List any file extensions like '.gz' or '.tar' that you would like ls\n"); + config.push_str("# to color below. Put the extension, a space, and the color init string.\n"); + + for (ext, color) in FILE_COLORS { + config.push_str(&format!("{} {}\n", ext, color)); } - writeln!( - config, - "# Subsequent TERM or COLORTERM entries, can be used to add / override" - ) - .unwrap(); - write!( - config, - "# config specific to those matching environment variables." - ) - .unwrap(); + config.push_str("# Subsequent TERM or COLORTERM entries, can be used to add / override\n"); + config.push_str("# config specific to those matching environment variables."); config } diff --git a/tests/by-util/test_dircolors.rs b/tests/by-util/test_dircolors.rs index e3752fcde98..4a256352c76 100644 --- a/tests/by-util/test_dircolors.rs +++ b/tests/by-util/test_dircolors.rs @@ -159,6 +159,7 @@ fn test_quoting() { .no_stderr(); } +/* #[test] fn test_print_ls_colors() { new_ucmd!() @@ -168,6 +169,7 @@ fn test_print_ls_colors() { .stdout_is("\x1B[40;33mtw\t40;33\x1B[0m\n") .no_stderr(); } +*/ #[test] fn test_extra_operand() { From 4d2ae8485cd65429e64606119acac3156158ea2b Mon Sep 17 00:00:00 2001 From: Arpit Bhadauria Date: Mon, 4 Dec 2023 22:44:18 +0000 Subject: [PATCH 127/429] impl from trait instead of into --- src/uu/expr/src/syntax_tree.rs | 43 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 7677b5e7e54..ae2a44e5205 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -89,8 +89,8 @@ impl RelationOp { impl NumericOp { fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { - let a = >>::into(left.eval()?)?; - let b = >>::into(right.eval()?)?; + let a = ExprResult::::from(left.eval()?)?; + let b = ExprResult::::from(right.eval()?)?; Ok(NumOrStr::Num(match self { Self::Add => a + b, Self::Sub => a - b, @@ -218,29 +218,29 @@ impl From for NumOrStr { } } -impl Into> for NumOrStr { - fn into(self) -> Option { - match self.into() { +impl From for Option { + fn from(s: NumOrStr) -> Self { + match s.into() { Ok(num) => num.to_usize(), Err(_) => None, } } } -impl Into for NumOrStr { - fn into(self) -> String { - match self { - Self::Num(num) => num.to_string(), - Self::Str(str) => str.to_string(), +impl From for String { + fn from(s: NumOrStr) -> Self { + match s { + NumOrStr::Num(num) => num.to_string(), + NumOrStr::Str(str) => str.to_string(), } } } -impl Into> for NumOrStr { - fn into(self) -> ExprResult { - match self { - Self::Num(num) => Ok(num), - Self::Str(str) => str +impl From for ExprResult { + fn from(s: NumOrStr) -> Self { + match s { + NumOrStr::Num(num) => Ok(num), + NumOrStr::Str(str) => str .parse::() .map_err(|_| ExprError::NonIntegerArgument), } @@ -303,9 +303,8 @@ impl AstNode { // // So we coerce errors into 0 to make that the only case we // have to care about. - let pos: usize = >>::into(pos.eval()?).unwrap_or(0); - let length: usize = - >>::into(length.eval()?).unwrap_or(0); + let pos: usize = Option::::from(pos.eval()?).unwrap_or(0); + let length: usize = Option::::from(length.eval()?).unwrap_or(0); let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { return Ok(NumOrStr::from(String::new())); @@ -315,11 +314,9 @@ impl AstNode { string.chars().skip(pos).take(length).collect::(), )) } - Self::Length { string } => Ok(NumOrStr::from( - >::into(string.eval()?) - .chars() - .count(), - )), + Self::Length { string } => { + Ok(NumOrStr::from(String::from(string.eval()?).chars().count())) + } } } } From 2fcfec6490f87af117a03ccd77ebf487192d548c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 5 Dec 2023 21:50:38 +0100 Subject: [PATCH 128/429] fuzz seq --- .github/workflows/fuzzing.yml | 1 + fuzz/Cargo.toml | 8 +++- fuzz/fuzz_targets/fuzz_seq.rs | 78 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 fuzz/fuzz_targets/fuzz_seq.rs diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 623a03f65c7..eed2ae2fc99 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -47,6 +47,7 @@ jobs: - { name: fuzz_expr, should_pass: true } - { name: fuzz_printf, should_pass: false } - { name: fuzz_echo, should_pass: false } + - { name: fuzz_seq, should_pass: false } - { name: fuzz_parse_glob, should_pass: true } - { name: fuzz_parse_size, should_pass: true } - { name: fuzz_parse_time, should_pass: true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index b27f5b58677..c98f105ad19 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -18,7 +18,7 @@ uu_test = { path = "../src/uu/test/" } uu_expr = { path = "../src/uu/expr/" } uu_printf = { path = "../src/uu/printf/" } uu_echo = { path = "../src/uu/echo/" } - +uu_seq = { path = "../src/uu/seq/" } # Prevent this from interfering with workspaces [workspace] @@ -42,6 +42,12 @@ path = "fuzz_targets/fuzz_echo.rs" test = false doc = false +[[bin]] +name = "fuzz_seq" +path = "fuzz_targets/fuzz_seq.rs" +test = false +doc = false + [[bin]] name = "fuzz_expr" path = "fuzz_targets/fuzz_expr.rs" diff --git a/fuzz/fuzz_targets/fuzz_seq.rs b/fuzz/fuzz_targets/fuzz_seq.rs new file mode 100644 index 00000000000..2e4ff1a4662 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_seq.rs @@ -0,0 +1,78 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore parens + +#![no_main] +use libfuzzer_sys::fuzz_target; +use uu_seq::uumain; + +use rand::Rng; +use std::ffi::OsString; + +mod fuzz_common; +use crate::fuzz_common::CommandResult; +use crate::fuzz_common::{ + compare_result, generate_and_run_uumain, generate_random_string, run_gnu_cmd, +}; +static CMD_PATH: &str = "seq"; + +fn generate_seq() -> String { + let mut rng = rand::thread_rng(); + + // Generate 1 to 3 numbers for seq arguments + let arg_count = rng.gen_range(1..=3); + let mut args = Vec::new(); + + for _ in 0..arg_count { + if rng.gen_ratio(1, 100) { + // 1% chance to add a random string + args.push(generate_random_string(rng.gen_range(1..=10))); + } else { + // 99% chance to add a numeric value + match rng.gen_range(0..=3) { + 0 => args.push(rng.gen_range(-10000..=10000).to_string()), // Large or small integers + 1 => args.push(rng.gen_range(-100.0..100.0).to_string()), // Floating-point numbers + 2 => args.push(rng.gen_range(-100..0).to_string()), // Negative integers + _ => args.push(rng.gen_range(1..=100).to_string()), // Regular integers + } + } + } + + args.join(" ") +} + +fuzz_target!(|_data: &[u8]| { + let seq = generate_seq(); + let mut args = vec![OsString::from("seq")]; + args.extend(seq.split_whitespace().map(OsString::from)); + + let rust_result = generate_and_run_uumain(&args, uumain); + + let gnu_result = match run_gnu_cmd(CMD_PATH, &args[1..], false) { + Ok(result) => result, + Err(error_result) => { + eprintln!("Failed to run GNU command:"); + eprintln!("Stderr: {}", error_result.stderr); + eprintln!("Exit Code: {}", error_result.exit_code); + CommandResult { + stdout: String::new(), + stderr: error_result.stderr, + exit_code: error_result.exit_code, + } + } + }; + + compare_result( + "seq", + &format!("{:?}", &args[1..]), + &rust_result.stdout, + &gnu_result.stdout, + &rust_result.stderr, + &gnu_result.stderr, + rust_result.exit_code, + gnu_result.exit_code, + false, // Set to true if you want to fail on stderr diff + ); +}); From 97ec99cf28d7b699e7d89793bad93f7ca0976053 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 6 Dec 2023 21:14:59 +0100 Subject: [PATCH 129/429] use a single push_str Co-authored-by: Terts Diepraam --- src/uu/dircolors/src/dircolors.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 41e640b2a5f..91544dc7353 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -481,14 +481,17 @@ fn escape(s: &str) -> String { pub fn generate_dircolors_config() -> String { let mut config = String::new(); - config.push_str("# Configuration file for dircolors, a utility to help you set the\n"); - config.push_str("# LS_COLORS environment variable used by GNU ls with the --color option.\n"); - config.push_str("# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the\n"); - config.push_str("# slackware version of dircolors) are recognized but ignored.\n"); - config.push_str("# Global config options can be specified before TERM or COLORTERM entries\n"); - config.push_str("# Below are TERM or COLORTERM entries, which can be glob patterns, which\n"); - config - .push_str("# restrict following config to systems with matching environment variables.\n"); + config.push_str( + "\ + # Configuration file for dircolors, a utility to help you set the\n\ + # LS_COLORS environment variable used by GNU ls with the --color option.\n\ + # The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the\n\ + # slackware version of dircolors) are recognized but ignored.\n\ + # Global config options can be specified before TERM or COLORTERM entries\n\ + # Below are TERM or COLORTERM entries, which can be glob patterns, which\n\ + # restrict following config to systems with matching environment variables.\n\ + ", + ); config.push_str("COLORTERM ?*\n"); for term in TERMS { config.push_str(&format!("TERM {}\n", term)); From bd667efa7b1daf9a38edc5272c3bdab8e5743f56 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 6 Dec 2023 21:15:43 +0100 Subject: [PATCH 130/429] simplify the declaration Co-authored-by: Terts Diepraam --- src/uu/dircolors/src/dircolors.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 91544dc7353..fa523451b29 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -497,18 +497,22 @@ pub fn generate_dircolors_config() -> String { config.push_str(&format!("TERM {}\n", term)); } - config.push_str("# Below are the color init strings for the basic file types.\n"); - config.push_str("# One can use codes for 256 or more colors supported by modern terminals.\n"); - config.push_str("# The default color codes use the capabilities of an 8 color terminal\n"); - config.push_str("# with some additional attributes as per the following codes:\n"); - config.push_str("# Attribute codes:\n"); - config.push_str("# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed\n"); - config.push_str("# Text color codes:\n"); - config.push_str("# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white\n"); - config.push_str("# Background color codes:\n"); - config.push_str("# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white\n"); - config.push_str("#NORMAL 00 # no color code at all\n"); - config.push_str("#FILE 00 # regular file: use no color at all\n"); + config.push_str( + "\ + # Below are the color init strings for the basic file types.\n\ + # One can use codes for 256 or more colors supported by modern terminals.\n\ + # The default color codes use the capabilities of an 8 color terminal\n\ + # with some additional attributes as per the following codes:\n\ + # Attribute codes:\n\ + # 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed\n\ + # Text color codes:\n\ + # 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white\n\ + # Background color codes:\n\ + # 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white\n\ + #NORMAL 00 # no color code at all\n\ + #FILE 00 # regular file: use no color at all\n\ + ", + ); for (name, _, code) in FILE_TYPES { config.push_str(&format!("{} {}\n", name, code)); From f99987bb35aade972e187ad3b7df6b039f5b72c2 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 6 Dec 2023 21:19:59 +0100 Subject: [PATCH 131/429] fix rustfmt --- src/uu/dircolors/src/dircolors.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index fa523451b29..ecca9d16033 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -481,7 +481,7 @@ fn escape(s: &str) -> String { pub fn generate_dircolors_config() -> String { let mut config = String::new(); - config.push_str( + config.push_str( "\ # Configuration file for dircolors, a utility to help you set the\n\ # LS_COLORS environment variable used by GNU ls with the --color option.\n\ From 5a32ab8004304fe5434bf42e1faf066a58b95736 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 6 Dec 2023 15:35:38 +0100 Subject: [PATCH 132/429] ls: implement --hyperlink --- Cargo.lock | 1 + Cargo.toml | 1 + src/uu/hostname/Cargo.toml | 2 +- src/uu/ls/Cargo.toml | 1 + src/uu/ls/src/ls.rs | 50 ++++++++++++++++++++++++++++++++++++-- tests/by-util/test_ls.rs | 30 +++++++++++++++++++++++ 6 files changed, 82 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf638b421a1..5060da44116 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2618,6 +2618,7 @@ dependencies = [ "chrono", "clap", "glob", + "hostname", "lscolors", "number_prefix", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 14e700ee0a6..13b99700851 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -284,6 +284,7 @@ fundu = "2.0.0" gcd = "2.3" glob = "0.3.1" half = "2.3" +hostname = "0.3" indicatif = "0.17" itertools = "0.12.0" libc = "0.2.150" diff --git a/src/uu/hostname/Cargo.toml b/src/uu/hostname/Cargo.toml index a9b033d123b..1fe10170964 100644 --- a/src/uu/hostname/Cargo.toml +++ b/src/uu/hostname/Cargo.toml @@ -16,7 +16,7 @@ path = "src/hostname.rs" [dependencies] clap = { workspace = true } -hostname = { version = "0.3", features = ["set"] } +hostname = { workspace = true, features = ["set"] } uucore = { workspace = true, features = ["wide"] } [target.'cfg(target_os = "windows")'.dependencies] diff --git a/src/uu/ls/Cargo.toml b/src/uu/ls/Cargo.toml index 96cf7df1a0d..a82a1f37e07 100644 --- a/src/uu/ls/Cargo.toml +++ b/src/uu/ls/Cargo.toml @@ -31,6 +31,7 @@ uucore = { workspace = true, features = [ ] } once_cell = { workspace = true } selinux = { workspace = true, optional = true } +hostname = { workspace = true } [[bin]] name = "ls" diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index c6b10677c6e..deb8aac3dfe 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -155,6 +155,7 @@ pub mod options { pub static GROUP_DIRECTORIES_FIRST: &str = "group-directories-first"; pub static ZERO: &str = "zero"; pub static DIRED: &str = "dired"; + pub static HYPERLINK: &str = "hyperlink"; } const DEFAULT_TERM_WIDTH: u16 = 80; @@ -418,6 +419,7 @@ pub struct Config { group_directories_first: bool, line_ending: LineEnding, dired: bool, + hyperlink: bool, } // Fields that can be removed or added to the long format @@ -566,6 +568,25 @@ fn extract_color(options: &clap::ArgMatches) -> bool { } } +/// Extracts the hyperlink option to use based on the options provided. +/// +/// # Returns +/// +/// A boolean representing whether to hyperlink files. +fn extract_hyperlink(options: &clap::ArgMatches) -> bool { + let hyperlink = options + .get_one::(options::HYPERLINK) + .unwrap() + .as_str(); + + match hyperlink { + "always" | "yes" | "force" => true, + "auto" | "tty" | "if-tty" => std::io::stdout().is_terminal(), + "never" | "no" | "none" => false, + _ => unreachable!("should be handled by clap"), + } +} + /// Extracts the quoting style to use based on the options provided. /// /// # Arguments @@ -736,10 +757,9 @@ impl Config { } let sort = extract_sort(options); - let time = extract_time(options); - let mut needs_color = extract_color(options); + let hyperlink = extract_hyperlink(options); let opt_block_size = options.get_one::(options::size::BLOCK_SIZE); let opt_si = opt_block_size.is_some() @@ -1020,6 +1040,7 @@ impl Config { group_directories_first: options.get_flag(options::GROUP_DIRECTORIES_FIRST), line_ending: LineEnding::from_zero_flag(options.get_flag(options::ZERO)), dired, + hyperlink, }) } } @@ -1154,6 +1175,19 @@ pub fn uu_app() -> Command { .help("generate output designed for Emacs' dired (Directory Editor) mode") .action(ArgAction::SetTrue), ) + .arg( + Arg::new(options::HYPERLINK) + .long(options::HYPERLINK) + .help("hyperlink file names WHEN") + .value_parser([ + "always", "yes", "force", "auto", "tty", "if-tty", "never", "no", "none", + ]) + .require_equals(true) + .num_args(0..=1) + .default_missing_value("always") + .default_value("never") + .value_name("WHEN"), + ) // The next four arguments do not override with the other format // options, see the comment in Config::from for the reason. // Ideally, they would use Arg::override_with, with their own name @@ -2959,6 +2993,18 @@ fn display_file_name( // infer it because the color codes mess up term_grid's width calculation. let mut width = name.width(); + if config.hyperlink { + let hostname = hostname::get().unwrap_or(OsString::from("")); + let hostname = hostname.to_string_lossy(); + + let absolute_path = fs::canonicalize(&path.p_buf).unwrap_or_default(); + let absolute_path = absolute_path.to_string_lossy(); + + // TODO encode path + // \x1b = ESC, \x07 = BEL + name = format!("\x1b]8;;file://{hostname}{absolute_path}\x07{name}\x1b]8;;\x07"); + } + if let Some(ls_colors) = &config.color { let md = path.md(out); name = if md.is_some() { diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index fcd57170d48..8bc2b75ac77 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -3855,3 +3855,33 @@ fn test_posixly_correct() { .succeeds() .stdout_contains_line("total 8"); } + +#[test] +fn test_ls_hyperlink() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let file = "a.txt"; + + at.touch(file); + + let path = at.root_dir_resolved(); + let separator = std::path::MAIN_SEPARATOR_STR; + + let result = scene.ucmd().arg("--hyperlink").succeeds(); + assert!(result.stdout_str().contains("\x1b]8;;file://")); + assert!(result + .stdout_str() + .contains(&format!("{path}{separator}{file}\x07{file}\x1b]8;;\x07"))); + + let result = scene.ucmd().arg("--hyperlink=always").succeeds(); + assert!(result.stdout_str().contains("\x1b]8;;file://")); + assert!(result + .stdout_str() + .contains(&format!("{path}{separator}{file}\x07{file}\x1b]8;;\x07"))); + + scene + .ucmd() + .arg("--hyperlink=never") + .succeeds() + .stdout_is(format!("{file}\n")); +} From 09999427ccfb822918fc6969e16d0fc656087ff1 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 7 Dec 2023 10:02:29 +0100 Subject: [PATCH 133/429] du: merge imports --- src/uu/du/src/du.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index dc03a64f218..989e663e46c 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -3,35 +3,30 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use chrono::prelude::DateTime; -use chrono::Local; -use clap::ArgAction; -use clap::{crate_version, Arg, ArgMatches, Command}; +use chrono::{DateTime, Local}; +use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use glob::Pattern; use std::collections::HashSet; use std::env; -use std::fs; -use std::fs::File; +use std::error::Error; +use std::fmt::Display; #[cfg(not(windows))] use std::fs::Metadata; -use std::io::BufRead; -use std::io::BufReader; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; #[cfg(not(windows))] use std::os::unix::fs::MetadataExt; #[cfg(windows)] use std::os::windows::fs::MetadataExt; #[cfg(windows)] use std::os::windows::io::AsRawHandle; -use std::path::Path; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::mpsc; use std::thread; use std::time::{Duration, UNIX_EPOCH}; -use std::{error::Error, fmt::Display}; use uucore::display::{print_verbatim, Quotable}; -use uucore::error::FromIo; -use uucore::error::{UError, UResult, USimpleError}; +use uucore::error::{FromIo, UError, UResult, USimpleError}; use uucore::line_ending::LineEnding; use uucore::parse_glob; use uucore::parse_size::{parse_size_u64, ParseSizeError}; From 6cae19156926ef0ca68e1e3657dd51e2066d4e8b Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 7 Dec 2023 11:06:04 +0100 Subject: [PATCH 134/429] du: remove ArgMatches from StatPrinter --- src/uu/du/src/du.rs | 165 ++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 81 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 989e663e46c..26e697abb46 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -87,6 +87,18 @@ struct Options { count_links: bool, inodes: bool, verbose: bool, + threshold: Option, + apparent_size: bool, + // TODO: the size conversion fields should be unified + si: bool, + bytes: bool, + human_readable: bool, + block_size_1k: bool, + block_size_1m: bool, + block_size: u64, + time: Option