From d412f582cba2f6f035380431fdd08c7c1d67c980 Mon Sep 17 00:00:00 2001 From: yuankunzhang Date: Sat, 3 May 2025 23:02:41 +0800 Subject: [PATCH] split: fix a racing condition that causes issue #7869 --- src/uu/split/src/split.rs | 46 ++++++++++++++++++++++--------------- tests/by-util/test_split.rs | 31 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 79aea3e1552..64548ea387d 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -834,13 +834,7 @@ struct LineChunkWriter<'a> { impl<'a> LineChunkWriter<'a> { fn new(chunk_size: u64, settings: &'a Settings) -> UResult { let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?; - let filename = filename_iterator - .next() - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - if settings.verbose { - println!("creating file {}", filename.quote()); - } - let inner = settings.instantiate_current_writer(&filename, true)?; + let inner = Self::start_new_chunk(settings, &mut filename_iterator)?; Ok(LineChunkWriter { settings, chunk_size, @@ -850,6 +844,19 @@ impl<'a> LineChunkWriter<'a> { filename_iterator, }) } + + fn start_new_chunk( + settings: &Settings, + filename_iterator: &mut FilenameIterator, + ) -> io::Result>> { + let filename = filename_iterator + .next() + .ok_or_else(|| io::Error::other("output file suffixes exhausted"))?; + if settings.verbose { + println!("creating file {}", filename.quote()); + } + settings.instantiate_current_writer(&filename, true) + } } impl Write for LineChunkWriter<'_> { @@ -869,14 +876,7 @@ impl Write for LineChunkWriter<'_> { // corresponding writer. if self.num_lines_remaining_in_current_chunk == 0 { self.num_chunks_written += 1; - let filename = self - .filename_iterator - .next() - .ok_or_else(|| io::Error::other("output file suffixes exhausted"))?; - if self.settings.verbose { - println!("creating file {}", filename.quote()); - } - self.inner = self.settings.instantiate_current_writer(&filename, true)?; + self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?; self.num_lines_remaining_in_current_chunk = self.chunk_size; } @@ -889,9 +889,19 @@ impl Write for LineChunkWriter<'_> { self.num_lines_remaining_in_current_chunk -= 1; } - let num_bytes_written = - custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?; - total_bytes_written += num_bytes_written; + // There might be bytes remaining in the buffer, and we write + // them to the current chunk. But first, we may need to rotate + // the current chunk in case it has already reached its line + // limit. + if prev < buf.len() { + if self.num_lines_remaining_in_current_chunk == 0 { + self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?; + self.num_lines_remaining_in_current_chunk = self.chunk_size; + } + let num_bytes_written = + custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?; + total_bytes_written += num_bytes_written; + } Ok(total_bytes_written) } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 7013207aea5..59d70a31ff0 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -120,6 +120,15 @@ impl RandomFile { n -= 1; } } + + /// Add n lines each of the given size. + fn add_lines_with_line_size(&mut self, lines: usize, line_size: usize) { + let mut n = lines; + while n > 0 { + writeln!(self.inner, "{}", random_chars(line_size)).unwrap(); + n -= 1; + } + } } #[test] @@ -430,6 +439,28 @@ fn test_split_lines_number() { .stderr_only("split: invalid number of lines: 'file'\n"); } +/// Test interference between split line size and IO buffer capacity. +/// See issue #7869. +#[test] +fn test_split_lines_interfere_with_io_buf_capacity() { + let buf_capacity = BufWriter::new(Vec::new()).capacity(); + // We intentionally set the line size to be less than the IO write buffer + // capacity. This is to trigger the condition where after the first split + // file is written, there are still bytes left in the buffer. We then + // test that those bytes are written to the next split file. + let line_size = buf_capacity - 2; + + let (at, mut ucmd) = at_and_ucmd!(); + let name = "split_lines_interfere_with_io_buf_capacity"; + RandomFile::new(&at, name).add_lines_with_line_size(2, line_size); + ucmd.args(&["-l", "1", name]).succeeds(); + + // Note that `lines_size` doesn't take the trailing newline into account, + // we add 1 for adjustment. + assert_eq!(at.read("xaa").len(), line_size + 1); + assert_eq!(at.read("xab").len(), line_size + 1); +} + /// Test short lines option with value concatenated #[test] fn test_split_lines_short_concatenated_with_value() {