Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 4 additions & 26 deletions src/uu/cut/src/cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Comm
use std::ffi::OsString;
use std::fs::File;
use std::io::{stdin, stdout, BufReader, BufWriter, IsTerminal, Read, Write};
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use uucore::display::Quotable;
use uucore::error::{set_exit_code, FromIo, UResult, USimpleError};
use uucore::line_ending::LineEnding;
use uucore::os_str_as_bytes;

use self::searcher::Searcher;
use matcher::{ExactMatcher, Matcher, WhitespaceMatcher};
Expand Down Expand Up @@ -59,7 +58,7 @@ impl Default for Delimiter<'_> {

impl<'a> From<&'a OsString> for Delimiter<'a> {
fn from(s: &'a OsString) -> Self {
Self::Slice(os_string_as_bytes(s).unwrap())
Self::Slice(os_str_as_bytes(s).unwrap())
}
}

Expand Down Expand Up @@ -347,27 +346,6 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
}
}

// Helper function for processing delimiter values (which could be non UTF-8)
// It converts OsString to &[u8] for unix targets only
// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
fn os_string_as_bytes(os_string: &OsString) -> UResult<&[u8]> {
#[cfg(unix)]
let bytes = os_string.as_bytes();

#[cfg(not(unix))]
let bytes = os_string
.to_str()
.ok_or_else(|| {
uucore::error::UUsageError::new(
1,
"invalid UTF-8 was detected in one or more arguments",
)
})?
.as_bytes();

Ok(bytes)
}

// Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively
// Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior
fn get_delimiters(
Expand Down Expand Up @@ -395,7 +373,7 @@ fn get_delimiters(
} else {
// For delimiter `-d` option value - allow both UTF-8 (possibly multi-byte) characters
// and Non UTF-8 (and not ASCII) single byte "characters", like `b"\xAD"` to align with GNU behavior
let bytes = os_string_as_bytes(os_string)?;
let bytes = os_str_as_bytes(os_string)?;
if os_string.to_str().is_some_and(|s| s.chars().count() > 1)
|| os_string.to_str().is_none() && bytes.len() > 1
{
Expand All @@ -422,7 +400,7 @@ fn get_delimiters(
if os_string.is_empty() || os_string == "''" {
b"\0"
} else {
os_string_as_bytes(os_string).unwrap()
os_str_as_bytes(os_string).unwrap()
}
});
Ok((delim, out_delim))
Expand Down
21 changes: 13 additions & 8 deletions src/uu/tr/src/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
mod operation;
mod unicode_table;

use clap::{crate_version, Arg, ArgAction, Command};
use clap::{crate_version, value_parser, Arg, ArgAction, Command};
use operation::{
translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation,
};
use std::ffi::OsString;
use std::io::{stdin, stdout, BufWriter};
use uucore::{format_usage, help_about, help_section, help_usage, show};
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show};

use crate::operation::DeleteOperation;
use uucore::display::Quotable;
Expand Down Expand Up @@ -43,7 +44,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// Ultimately this should be OsString, but we might want to wait for the
// pattern API on OsStr
let sets: Vec<_> = matches
.get_many::<String>(options::SETS)
.get_many::<OsString>(options::SETS)
.into_iter()
.flatten()
.map(ToOwned::to_owned)
Expand Down Expand Up @@ -97,7 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
}

if let Some(first) = sets.first() {
if first.ends_with('\\') {
if let Some(b'\\') = os_str_as_bytes(first)?.last() {
show!(USimpleError::new(
0,
"warning: an unescaped backslash at end of string is not portable"
Expand All @@ -113,10 +114,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {

// According to the man page: translating only happens if deleting or if a second set is given
let translating = !delete_flag && sets.len() > 1;
let mut sets_iter = sets.iter().map(|c| c.as_str());
let mut sets_iter = sets.iter().map(|c| c.as_os_str());
let (set1, set2) = Sequence::solve_set_characters(
sets_iter.next().unwrap_or_default().as_bytes(),
sets_iter.next().unwrap_or_default().as_bytes(),
os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
complement_flag,
// if we are not translating then we don't truncate set1
truncate_set1_flag && translating,
Expand Down Expand Up @@ -195,5 +196,9 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue)
.overrides_with(options::TRUNCATE_SET1),
)
.arg(Arg::new(options::SETS).num_args(1..))
.arg(
Arg::new(options::SETS)
.num_args(1..)
.value_parser(value_parser!(OsString)),
)
}
21 changes: 21 additions & 0 deletions src/uucore/src/lib/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ pub use crate::features::fsxattr;

//## core functions

use std::ffi::OsStr;
use std::ffi::OsString;
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::sync::atomic::Ordering;

use once_cell::sync::Lazy;
Expand Down Expand Up @@ -219,6 +222,24 @@ pub fn read_yes() -> bool {
}
}

// Helper function for processing delimiter values (which could be non UTF-8)
// It converts OsString to &[u8] for unix targets only
// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
#[cfg(unix)]
let bytes = os_string.as_bytes();

#[cfg(not(unix))]
let bytes = os_string
.to_str()
.ok_or_else(|| {
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
})?
.as_bytes();

Ok(bytes)
}

/// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'`
///
/// This macro functions accepts the same syntax as `format!`. The prompt is written to
Expand Down
18 changes: 18 additions & 0 deletions tests/by-util/test_tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer
use crate::common::util::TestScenario;

#[cfg(unix)]
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};

#[test]
fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
Expand Down Expand Up @@ -1427,3 +1430,18 @@ fn check_complement_set2_too_big() {
.fails()
.stderr_contains("when translating with complemented character classes,\nstring2 must map all characters in the domain to one");
}

#[test]
#[cfg(unix)]
fn test_truncate_non_utf8_set() {
let stdin = &[b'\x01', b'a', b'm', b'p', 0xfe_u8, 0xff_u8];
let set1 = OsStr::from_bytes(&[b'a', 0xfe_u8, 0xff_u8, b'z']);
let set2 = OsStr::from_bytes(b"01234");

new_ucmd!()
.arg(set1)
.arg(set2)
.pipe_in(*stdin)
.succeeds()
.stdout_is_bytes(b"\x010mp12");
}
Loading