Skip to content

Commit f096a21

Browse files
Fix additional paste bugs
1 parent 884b7ef commit f096a21

File tree

2 files changed

+110
-33
lines changed

2 files changed

+110
-33
lines changed

src/uu/paste/src/paste.rs

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ fn paste(
9898
delimiters: &str,
9999
line_ending: LineEnding,
100100
) -> UResult<()> {
101+
let line_ending_byte = u8::from(line_ending);
102+
let line_ending_byte_array_ref = &[line_ending_byte];
103+
101104
let mut files = Vec::with_capacity(filenames.len());
102105
for name in filenames {
103106
let file = if name == "-" {
@@ -112,7 +115,13 @@ fn paste(
112115
files.push(file);
113116
}
114117

115-
if delimiters.ends_with('\\') && !delimiters.ends_with("\\\\") {
118+
let trailing_backslashes_count = delimiters
119+
.chars()
120+
.rev()
121+
.take_while(|&ch| ch == '\\')
122+
.count();
123+
124+
if trailing_backslashes_count % 2 != 0 {
116125
return Err(USimpleError::new(
117126
1,
118127
format!("delimiter list ends with an unescaped backslash: {delimiters}"),
@@ -129,7 +138,7 @@ fn paste(
129138
[first_delimiter, ..] => DelimiterState::MultipleDelimiters {
130139
current_delimiter: first_delimiter,
131140
delimiters: &unescaped_and_encoded_delimiters,
132-
delimiters_iter: unescaped_and_encoded_delimiters.iter().cycle(),
141+
delimiters_iterator: unescaped_and_encoded_delimiters.iter().cycle(),
133142
},
134143
};
135144

@@ -144,10 +153,10 @@ fn paste(
144153
loop {
145154
delimiter_state.advance_to_next_delimiter();
146155

147-
match read_until(file.as_mut(), line_ending as u8, &mut output) {
156+
match read_until(file.as_mut(), line_ending_byte, &mut output) {
148157
Ok(0) => break,
149158
Ok(_) => {
150-
if output.ends_with(&[line_ending as u8]) {
159+
if output.ends_with(line_ending_byte_array_ref) {
151160
output.pop();
152161
}
153162

@@ -161,14 +170,8 @@ fn paste(
161170

162171
delimiter_state.remove_trailing_delimiter(&mut output);
163172

164-
// TODO
165-
// Should the output be converted to UTF-8?
166-
write!(
167-
stdout,
168-
"{}{}",
169-
String::from_utf8_lossy(&output),
170-
line_ending
171-
)?;
173+
stdout.write_all(&output)?;
174+
stdout.write_all(line_ending_byte_array_ref)?;
172175
}
173176
} else {
174177
let mut eof = vec![false; files.len()];
@@ -184,13 +187,13 @@ fn paste(
184187
if eof[i] {
185188
eof_count += 1;
186189
} else {
187-
match read_until(file.as_mut(), line_ending as u8, &mut output) {
190+
match read_until(file.as_mut(), line_ending_byte, &mut output) {
188191
Ok(0) => {
189192
eof[i] = true;
190193
eof_count += 1;
191194
}
192195
Ok(_) => {
193-
if output.ends_with(&[line_ending as u8]) {
196+
if output.ends_with(line_ending_byte_array_ref) {
194197
output.pop();
195198
}
196199
}
@@ -216,25 +219,51 @@ fn paste(
216219

217220
delimiter_state.remove_trailing_delimiter(&mut output);
218221

219-
// TODO
220-
// Should the output be converted to UTF-8?
221-
write!(
222-
stdout,
223-
"{}{}",
224-
String::from_utf8_lossy(&output),
225-
line_ending
226-
)?;
222+
stdout.write_all(&output)?;
223+
stdout.write_all(line_ending_byte_array_ref)?;
227224
}
228225
}
229226

230227
Ok(())
231228
}
232229

233230
/// Unescape all special characters
234-
fn unescape(s: &str) -> String {
235-
s.replace("\\n", "\n")
236-
.replace("\\t", "\t")
237-
.replace("\\\\", "\\")
231+
fn unescape(input: &str) -> String {
232+
/// A single backslash char
233+
const BACKSLASH: char = '\\';
234+
235+
let mut string = String::with_capacity(input.len());
236+
237+
let mut chars = input.chars();
238+
239+
while let Some(char) = chars.next() {
240+
match char {
241+
BACKSLASH => match chars.next() {
242+
// Keep "\" if it is the last char
243+
// "\\" to "\"
244+
None | Some(BACKSLASH) => {
245+
string.push(BACKSLASH);
246+
}
247+
// "\n" to U+000A
248+
Some('n') => {
249+
string.push('\n');
250+
}
251+
// "\t" to U+0009
252+
Some('t') => {
253+
string.push('\t');
254+
}
255+
Some(other_char) => {
256+
string.push(BACKSLASH);
257+
string.push(other_char);
258+
}
259+
},
260+
non_backslash_char => {
261+
string.push(non_backslash_char);
262+
}
263+
}
264+
}
265+
266+
string
238267
}
239268

240269
fn parse_delimiters(delimiters: &str) -> Box<[Box<[u8]>]> {
@@ -268,7 +297,7 @@ enum DelimiterState<'a> {
268297
MultipleDelimiters {
269298
current_delimiter: &'a [u8],
270299
delimiters: &'a [Box<[u8]>],
271-
delimiters_iter: Cycle<Iter<'a, Box<[u8]>>>,
300+
delimiters_iterator: Cycle<Iter<'a, Box<[u8]>>>,
272301
},
273302
}
274303

@@ -278,12 +307,12 @@ impl<'a> DelimiterState<'a> {
278307
fn advance_to_next_delimiter(&mut self) {
279308
if let DelimiterState::MultipleDelimiters {
280309
current_delimiter,
281-
delimiters_iter,
310+
delimiters_iterator,
282311
..
283312
} = self
284313
{
285-
// Unwrap because "delimiters_encoded_iter" is a cycle iter and was created from a non-empty slice
286-
*current_delimiter = delimiters_iter.next().unwrap();
314+
// Unwrap because `delimiters_iterator` is a cycle iter and was created from a non-empty slice
315+
*current_delimiter = delimiters_iterator.next().unwrap();
287316
}
288317
}
289318

@@ -292,12 +321,12 @@ impl<'a> DelimiterState<'a> {
292321
/// This is a no-op unless there are multiple delimiters.
293322
fn reset_to_first_delimiter(&mut self) {
294323
if let DelimiterState::MultipleDelimiters {
295-
delimiters_iter,
324+
delimiters_iterator,
296325
delimiters,
297326
..
298327
} = self
299328
{
300-
*delimiters_iter = delimiters.iter().cycle();
329+
*delimiters_iterator = delimiters.iter().cycle();
301330
}
302331
}
303332

@@ -320,7 +349,7 @@ impl<'a> DelimiterState<'a> {
320349
output.truncate(output_without_delimiter_length);
321350
} else {
322351
// This branch is NOT unreachable, must be skipped
323-
// "output" should be empty in this case
352+
// `output` should be empty in this case
324353
assert!(output_len == 0);
325354
}
326355
}

tests/by-util/test_paste.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,54 @@ FIRST!SECOND@THIRD#FOURTH!ABCDEFG
241241
}
242242
}
243243

244+
#[test]
245+
fn test_non_utf8_input() {
246+
const PREFIX_LEN: usize = 16;
247+
const MIDDLE_LEN: usize = 3;
248+
const SUFFIX_LEN: usize = 2;
249+
250+
const TOTAL_LEN: usize = PREFIX_LEN + MIDDLE_LEN + SUFFIX_LEN;
251+
252+
const PREFIX: &[u8; PREFIX_LEN] = b"Non-UTF-8 test: ";
253+
// 0xC0 is not valid UTF-8
254+
const MIDDLE: &[u8; MIDDLE_LEN] = &[0xC0, 0x00, 0xC0];
255+
const SUFFIX: &[u8; SUFFIX_LEN] = b".\n";
256+
257+
let mut input = Vec::<u8>::with_capacity(TOTAL_LEN);
258+
259+
input.extend_from_slice(PREFIX);
260+
261+
input.extend_from_slice(MIDDLE);
262+
263+
input.extend_from_slice(SUFFIX);
264+
265+
let input_clone = input.clone();
266+
267+
new_ucmd!()
268+
.pipe_in(input_clone)
269+
.succeeds()
270+
.stdout_only_bytes(input);
271+
}
272+
273+
#[test]
274+
fn test_three_trailing_backslashes_delimiter() {
275+
const ONE_BACKSLASH_STR: &str = "\\";
276+
277+
let three_backslashes_string = ONE_BACKSLASH_STR.repeat(3);
278+
279+
for option_style in ["-d", "--delimiters"] {
280+
new_ucmd!()
281+
.args(&[option_style, &three_backslashes_string])
282+
.fails()
283+
.no_stdout()
284+
.stderr_str_check(|st| {
285+
st.ends_with(&format!(
286+
": delimiter list ends with an unescaped backslash: {three_backslashes_string}\n"
287+
))
288+
});
289+
}
290+
}
291+
244292
#[test]
245293
fn test_data() {
246294
for example in EXAMPLE_DATA {

0 commit comments

Comments
 (0)