diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 6f290a7d5e4..da8dcd51b4a 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -48,6 +48,9 @@ static OPT_IO_BLKSIZE: &str = "-io-blksize"; static ARG_INPUT: &str = "input"; static ARG_PREFIX: &str = "prefix"; +// 128 KiB +const COPY_BUFFER_SIZE: usize = 128 * 1024; + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let (args, obs_lines) = handle_obsolete(args); @@ -1054,6 +1057,15 @@ impl ManageOutFiles for OutFiles { } } +fn copy_exact( + reader: &mut R, + writer: &mut W, + num_bytes: u64, +) -> io::Result { + let mut buf_reader = BufReader::with_capacity(COPY_BUFFER_SIZE, reader.take(num_bytes)); + io::copy(&mut buf_reader, writer) +} + /// Split a file or STDIN into a specific number of chunks by byte. /// /// When file size cannot be evenly divided into the number of chunks of the same size, @@ -1147,49 +1159,32 @@ where for i in 1_u64..=num_chunks { let chunk_size = chunk_size_base + (chunk_size_reminder > i - 1) as u64; - let buf = &mut Vec::new(); if num_bytes > 0 { - // Read `chunk_size` bytes from the reader into `buf` + // Read `chunk_size` bytes from the reader // except the last. // // The last chunk gets all remaining bytes so that if the number // of bytes in the input file was not evenly divisible by // `num_chunks`, we don't leave any bytes behind. - let limit = { - if i == num_chunks { - num_bytes - } else { - chunk_size - } + let limit = if i == num_chunks { + num_bytes + } else { + chunk_size }; - let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf); - - match n_bytes_read { - Ok(n_bytes) => { - num_bytes -= n_bytes as u64; - } - Err(error) => { - return Err(USimpleError::new( - 1, - translate!("split-error-cannot-read-from-input", "input" => settings.input.maybe_quote(), "error" => error), - )); - } - } - - match kth_chunk { - Some(chunk_number) => { - if i == chunk_number { - stdout_writer.write_all(buf)?; - break; - } + let n_bytes = match kth_chunk { + Some(chunk_number) if i == chunk_number => { + copy_exact(&mut reader, &mut stdout_writer, limit)?; + break; } + Some(_) => copy_exact(&mut reader, &mut io::sink(), limit)?, None => { let idx = (i - 1) as usize; let writer = out_files.get_writer(idx, settings)?; - writer.write_all(buf)?; + copy_exact(&mut reader, writer, limit)? } - } + }; + num_bytes -= n_bytes; } else { break; } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 497559aca3e..de13f3eadb0 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -1988,6 +1988,23 @@ fn test_split_separator_same_multiple() { .fails(); } +#[test] +#[cfg(target_os = "linux")] +fn test_number_n_chunks_streaming() { + let (at, mut ucmd) = at_and_ucmd!(); + + // 100MB file, 100MB memory limit, split into 2x50MB chunks + let mut f = std::fs::File::create(at.plus("hundred_mb.bin")).unwrap(); + f.write_all(&vec![0u8; 100 * 1024 * 1024]).unwrap(); + + ucmd.args(&["--number=2", "hundred_mb.bin"]) + .limit(Resource::AS, 100 * 1024 * 1024, 100 * 1024 * 1024) + .succeeds(); + + assert_eq!(at.metadata("xaa").len(), 50 * 1024 * 1024); + assert_eq!(at.metadata("xab").len(), 50 * 1024 * 1024); +} + #[test] fn test_long_lines() { let (at, mut ucmd) = at_and_ucmd!();