Skip to content

Commit 32155e1

Browse files
jfinkelssylvestre
authored andcommitted
split: implement round-robin arg to --number
Implement distributing lines of a file in a round-robin manner to a specified number of chunks. For example, $ (seq 1 10 | split -n r/3) && head -v xa[abc] ==> xaa <== 1 4 7 10 ==> xab <== 2 5 8 ==> xac <== 3 6 9
1 parent 493a262 commit 32155e1

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

src/uu/split/src/split.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,46 @@ where
11951195
Ok(())
11961196
}
11971197

1198+
fn split_into_n_chunks_by_line_round_robin<R>(
1199+
settings: &Settings,
1200+
reader: &mut R,
1201+
num_chunks: u64,
1202+
) -> UResult<()>
1203+
where
1204+
R: BufRead,
1205+
{
1206+
// This object is responsible for creating the filename for each chunk.
1207+
let mut filename_iterator = FilenameIterator::new(
1208+
&settings.prefix,
1209+
&settings.additional_suffix,
1210+
settings.suffix_length,
1211+
settings.suffix_type,
1212+
);
1213+
1214+
// Create one writer for each chunk. This will create each
1215+
// of the underlying files (if not in `--filter` mode).
1216+
let mut writers = vec![];
1217+
for _ in 0..num_chunks {
1218+
let filename = filename_iterator
1219+
.next()
1220+
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
1221+
let writer = settings.instantiate_current_writer(filename.as_str())?;
1222+
writers.push(writer);
1223+
}
1224+
1225+
let num_chunks: usize = num_chunks.try_into().unwrap();
1226+
for (i, line_result) in reader.lines().enumerate() {
1227+
let line = line_result.unwrap();
1228+
let maybe_writer = writers.get_mut(i % num_chunks);
1229+
let writer = maybe_writer.unwrap();
1230+
let bytes = line.as_bytes();
1231+
writer.write_all(bytes)?;
1232+
writer.write_all(b"\n")?;
1233+
}
1234+
1235+
Ok(())
1236+
}
1237+
11981238
fn split(settings: &Settings) -> UResult<()> {
11991239
let mut reader = BufReader::new(if settings.input == "-" {
12001240
Box::new(stdin()) as Box<dyn Read>
@@ -1221,6 +1261,9 @@ fn split(settings: &Settings) -> UResult<()> {
12211261
let chunk_number = chunk_number - 1;
12221262
kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
12231263
}
1264+
Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
1265+
split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
1266+
}
12241267
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
12251268
Strategy::Lines(chunk_size) => {
12261269
let mut writer = LineChunkWriter::new(chunk_size, settings)?;

tests/by-util/test_split.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,3 +701,19 @@ fn test_multiple_of_input_chunk() {
701701
}
702702
assert_eq!(glob.collate(), at.read_bytes(name));
703703
}
704+
705+
#[test]
706+
fn test_round_robin() {
707+
let (at, mut ucmd) = at_and_ucmd!();
708+
709+
let file_read = |f| {
710+
let mut s = String::new();
711+
at.open(f).read_to_string(&mut s).unwrap();
712+
s
713+
};
714+
715+
ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();
716+
717+
assert_eq!(file_read("xaa"), "1\n3\n5\n");
718+
assert_eq!(file_read("xab"), "2\n4\n");
719+
}

0 commit comments

Comments
 (0)