Skip to content

Commit d782cc4

Browse files
committed
csplit: don't add a newline if the file doesn't end with one
1 parent de32281 commit d782cc4

File tree

2 files changed

+75
-19
lines changed

2 files changed

+75
-19
lines changed

src/uu/csplit/src/csplit.rs

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#![allow(rustdoc::private_intra_doc_links)]
77

88
use std::cmp::Ordering;
9-
use std::io::{self, BufReader};
9+
use std::io::{self, BufReader, ErrorKind};
1010
use std::{
1111
fs::{File, remove_file},
1212
io::{BufRead, BufWriter, Write},
@@ -71,6 +71,35 @@ impl CsplitOptions {
7171
}
7272
}
7373

74+
pub struct LinesWithNewlines<T: BufRead> {
75+
inner: T,
76+
}
77+
78+
impl<T: BufRead> LinesWithNewlines<T> {
79+
fn new(s: T) -> Self {
80+
Self { inner: s }
81+
}
82+
}
83+
84+
impl<T: BufRead> Iterator for LinesWithNewlines<T> {
85+
type Item = io::Result<String>;
86+
87+
fn next(&mut self) -> Option<Self::Item> {
88+
fn ret(v: Vec<u8>) -> io::Result<String> {
89+
String::from_utf8(v).map_err(|_| {
90+
io::Error::new(ErrorKind::InvalidData, "stream did not contain valid UTF-8")
91+
})
92+
}
93+
94+
let mut v = Vec::new();
95+
match self.inner.read_until(b'\n', &mut v) {
96+
Ok(0) => None,
97+
Ok(_) => Some(ret(v)),
98+
Err(e) => Some(Err(e)),
99+
}
100+
}
101+
}
102+
74103
/// Splits a file into severals according to the command line patterns.
75104
///
76105
/// # Errors
@@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
87116
where
88117
T: BufRead,
89118
{
90-
let enumerated_input_lines = input
91-
.lines()
119+
let enumerated_input_lines = LinesWithNewlines::new(input)
92120
.map(|line| line.map_err_context(|| "read error".to_string()))
93121
.enumerate();
94122
let mut input_iter = InputSplitter::new(enumerated_input_lines);
@@ -243,7 +271,7 @@ impl SplitWriter<'_> {
243271
self.dev_null = true;
244272
}
245273

246-
/// Writes the line to the current split, appending a newline character.
274+
/// Writes the line to the current split.
247275
/// If [`self.dev_null`] is true, then the line is discarded.
248276
///
249277
/// # Errors
@@ -255,8 +283,7 @@ impl SplitWriter<'_> {
255283
Some(ref mut current_writer) => {
256284
let bytes = line.as_bytes();
257285
current_writer.write_all(bytes)?;
258-
current_writer.write_all(b"\n")?;
259-
self.size += bytes.len() + 1;
286+
self.size += bytes.len();
260287
}
261288
None => panic!("trying to write to a split that was not created"),
262289
}
@@ -321,11 +348,11 @@ impl SplitWriter<'_> {
321348

322349
let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string()));
323350
while let Some((ln, line)) = input_iter.next() {
324-
let l = line?;
351+
let line = line?;
325352
match n.cmp(&(&ln + 1)) {
326353
Ordering::Less => {
327354
assert!(
328-
input_iter.add_line_to_buffer(ln, l).is_none(),
355+
input_iter.add_line_to_buffer(ln, line).is_none(),
329356
"the buffer is big enough to contain 1 line"
330357
);
331358
ret = Ok(());
@@ -334,15 +361,15 @@ impl SplitWriter<'_> {
334361
Ordering::Equal => {
335362
assert!(
336363
self.options.suppress_matched
337-
|| input_iter.add_line_to_buffer(ln, l).is_none(),
364+
|| input_iter.add_line_to_buffer(ln, line).is_none(),
338365
"the buffer is big enough to contain 1 line"
339366
);
340367
ret = Ok(());
341368
break;
342369
}
343370
Ordering::Greater => (),
344371
}
345-
self.writeln(&l)?;
372+
self.writeln(&line)?;
346373
}
347374
self.finish_split();
348375
ret
@@ -379,23 +406,33 @@ impl SplitWriter<'_> {
379406
input_iter.set_size_of_buffer(1);
380407

381408
while let Some((ln, line)) = input_iter.next() {
382-
let l = line?;
409+
let line = line?;
410+
let l = if let Some(l) = line.strip_suffix('\n') {
411+
if let Some(l) = l.strip_suffix('\r') {
412+
l
413+
} else {
414+
l
415+
}
416+
} else {
417+
&line
418+
}
419+
.to_string();
383420
if regex.is_match(&l) {
384421
let mut next_line_suppress_matched = false;
385422
match (self.options.suppress_matched, offset) {
386423
// no offset, add the line to the next split
387424
(false, 0) => {
388425
assert!(
389-
input_iter.add_line_to_buffer(ln, l).is_none(),
426+
input_iter.add_line_to_buffer(ln, line).is_none(),
390427
"the buffer is big enough to contain 1 line"
391428
);
392429
}
393430
// a positive offset, some more lines need to be added to the current split
394-
(false, _) => self.writeln(&l)?,
431+
(false, _) => self.writeln(&line)?,
395432
// suppress matched option true, but there is a positive offset, so the line is printed
396433
(true, 1..) => {
397434
next_line_suppress_matched = true;
398-
self.writeln(&l)?;
435+
self.writeln(&line)?;
399436
}
400437
_ => (),
401438
};
@@ -424,7 +461,7 @@ impl SplitWriter<'_> {
424461
}
425462
return Ok(());
426463
}
427-
self.writeln(&l)?;
464+
self.writeln(&line)?;
428465
}
429466
} else {
430467
// With a negative offset we use a buffer to keep the lines within the offset.
@@ -435,7 +472,17 @@ impl SplitWriter<'_> {
435472
let offset_usize = -offset as usize;
436473
input_iter.set_size_of_buffer(offset_usize);
437474
while let Some((ln, line)) = input_iter.next() {
438-
let l = line?;
475+
let line = line?;
476+
let l = if let Some(l) = line.strip_suffix('\n') {
477+
if let Some(l) = l.strip_suffix('\r') {
478+
l
479+
} else {
480+
l
481+
}
482+
} else {
483+
&line
484+
}
485+
.to_string();
439486
if regex.is_match(&l) {
440487
for line in input_iter.shrink_buffer_to_size() {
441488
self.writeln(&line)?;
@@ -444,12 +491,12 @@ impl SplitWriter<'_> {
444491
// since offset_usize is for sure greater than 0
445492
// the first element of the buffer should be removed and this
446493
// line inserted to be coherent with GNU implementation
447-
input_iter.add_line_to_buffer(ln, l);
494+
input_iter.add_line_to_buffer(ln, line);
448495
} else {
449496
// add 1 to the buffer size to make place for the matched line
450497
input_iter.set_size_of_buffer(offset_usize + 1);
451498
assert!(
452-
input_iter.add_line_to_buffer(ln, l).is_none(),
499+
input_iter.add_line_to_buffer(ln, line).is_none(),
453500
"should be big enough to hold every lines"
454501
);
455502
}
@@ -460,7 +507,7 @@ impl SplitWriter<'_> {
460507
}
461508
return Ok(());
462509
}
463-
if let Some(line) = input_iter.add_line_to_buffer(ln, l) {
510+
if let Some(line) = input_iter.add_line_to_buffer(ln, line) {
464511
self.writeln(&line)?;
465512
}
466513
}

tests/by-util/test_csplit.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,3 +1476,12 @@ fn test_directory_input_file() {
14761476
.fails_with_code(1)
14771477
.stderr_only("csplit: cannot open 'test_directory' for reading: Permission denied\n");
14781478
}
1479+
1480+
#[test]
1481+
fn test_stdin_no_trailing_newline() {
1482+
let mut ucmd = new_ucmd!();
1483+
ucmd.args(&["-", "2"])
1484+
.pipe_in("a\nb\nc\nd")
1485+
.succeeds()
1486+
.stdout_only("2\n5\n");
1487+
}

0 commit comments

Comments
 (0)