Skip to content

Commit 9ab7fa9

Browse files
RenjiSannBenWiederhake
authored andcommitted
tr: accept non utf8 arguments for sets
1 parent e7f965d commit 9ab7fa9

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

src/uu/tr/src/tr.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
mod operation;
99
mod unicode_table;
1010

11-
use clap::{crate_version, Arg, ArgAction, Command};
11+
use clap::{crate_version, value_parser, Arg, ArgAction, Command};
1212
use operation::{
1313
translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation,
1414
};
15+
use std::ffi::OsString;
1516
use std::io::{stdin, stdout, BufWriter};
16-
use uucore::{format_usage, help_about, help_section, help_usage, show};
17+
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show};
1718

1819
use crate::operation::DeleteOperation;
1920
use uucore::display::Quotable;
@@ -43,7 +44,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
4344
// Ultimately this should be OsString, but we might want to wait for the
4445
// pattern API on OsStr
4546
let sets: Vec<_> = matches
46-
.get_many::<String>(options::SETS)
47+
.get_many::<OsString>(options::SETS)
4748
.into_iter()
4849
.flatten()
4950
.map(ToOwned::to_owned)
@@ -97,7 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
9798
}
9899

99100
if let Some(first) = sets.first() {
100-
if first.ends_with('\\') {
101+
if let Some(b'\\') = os_str_as_bytes(first)?.last() {
101102
show!(USimpleError::new(
102103
0,
103104
"warning: an unescaped backslash at end of string is not portable"
@@ -113,10 +114,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
113114

114115
// According to the man page: translating only happens if deleting or if a second set is given
115116
let translating = !delete_flag && sets.len() > 1;
116-
let mut sets_iter = sets.iter().map(|c| c.as_str());
117+
let mut sets_iter = sets.iter().map(|c| c.as_os_str());
117118
let (set1, set2) = Sequence::solve_set_characters(
118-
sets_iter.next().unwrap_or_default().as_bytes(),
119-
sets_iter.next().unwrap_or_default().as_bytes(),
119+
os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
120+
os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
120121
complement_flag,
121122
// if we are not translating then we don't truncate set1
122123
truncate_set1_flag && translating,
@@ -195,5 +196,9 @@ pub fn uu_app() -> Command {
195196
.action(ArgAction::SetTrue)
196197
.overrides_with(options::TRUNCATE_SET1),
197198
)
198-
.arg(Arg::new(options::SETS).num_args(1..))
199+
.arg(
200+
Arg::new(options::SETS)
201+
.num_args(1..)
202+
.value_parser(value_parser!(OsString)),
203+
)
199204
}

tests/by-util/test_tr.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer
66
use crate::common::util::TestScenario;
77

8+
#[cfg(unix)]
9+
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
10+
811
#[test]
912
fn test_invalid_arg() {
1013
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
@@ -1427,3 +1430,18 @@ fn check_complement_set2_too_big() {
14271430
.fails()
14281431
.stderr_contains("when translating with complemented character classes,\nstring2 must map all characters in the domain to one");
14291432
}
1433+
1434+
#[test]
1435+
#[cfg(unix)]
1436+
fn test_truncate_non_utf8_set() {
1437+
let stdin = &[b'\x01', b'a', b'm', b'p', 0xfe_u8, 0xff_u8];
1438+
let set1 = OsStr::from_bytes(&[b'a', 0xfe_u8, 0xff_u8, b'z']);
1439+
let set2 = OsStr::from_bytes(b"01234");
1440+
1441+
new_ucmd!()
1442+
.arg(set1)
1443+
.arg(set2)
1444+
.pipe_in(*stdin)
1445+
.succeeds()
1446+
.stdout_is_bytes(b"\x010mp12");
1447+
}

0 commit comments

Comments
 (0)