diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index 61424141a04..4e05678ef2d 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use std::ffi::OsString; use std::fs::{File, metadata}; -use std::io::{self, BufRead, BufReader, Read, StdinLock, stdin}; +use std::io::{self, BufRead, BufReader, BufWriter, Read, StdinLock, Write, stdin}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; @@ -184,6 +184,16 @@ pub fn are_files_identical(path1: &Path, path2: &Path) -> io::Result { } } +fn write_line_with_delimiter(writer: &mut W, delim: &[u8], line: &[u8]) -> UResult<()> { + writer + .write_all(delim) + .map_err_context(|| translate!("comm-error-write"))?; + writer + .write_all(line) + .map_err_context(|| translate!("comm-error-write"))?; + Ok(()) +} + fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> { let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1)); let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2)); @@ -191,6 +201,8 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) let delim_col_2 = delim.repeat(width_col_1); let delim_col_3 = delim.repeat(width_col_1 + width_col_2); + let mut writer = BufWriter::new(io::stdout().lock()); + let ra = &mut Vec::new(); let mut na = a.read_line(ra); let rb = &mut Vec::new(); @@ -239,7 +251,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_1) { - print!("{}", String::from_utf8_lossy(ra)); + writer + .write_all(ra) + .map_err_context(|| translate!("comm-error-write"))?; } ra.clear(); na = a.read_line(ra); @@ -250,7 +264,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_2) { - print!("{delim_col_2}{}", String::from_utf8_lossy(rb)); + write_line_with_delimiter(&mut writer, delim_col_2.as_bytes(), rb)?; } rb.clear(); nb = b.read_line(rb); @@ -262,7 +276,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_3) { - print!("{delim_col_3}{}", String::from_utf8_lossy(ra)); + write_line_with_delimiter(&mut writer, delim_col_3.as_bytes(), ra)?; } ra.clear(); rb.clear(); @@ -280,12 +294,16 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) if opts.get_flag(options::TOTAL) { let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED)); - print!( + write!( + writer, "{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}{}{line_ending}", translate!("comm-total") - ); + ) + .map_err_context(|| translate!("comm-error-write"))?; } + writer.flush().ok(); + if should_check_order && (checker1.has_error || checker2.has_error) { // Print the input error message once at the end if input_error { diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index 3194d270e94..dbcee059855 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -649,6 +649,30 @@ fn test_comm_eintr_handling() { .stdout_contains("line3"); } +#[test] +fn test_output_lossy_utf8() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Create files with invalid UTF-8 + // A: \xfe\n\xff\n + // B: \xff\n\xfe\n + at.write_bytes("a", b"\xfe\n\xff\n"); + at.write_bytes("b", b"\xff\n\xfe\n"); + + // GNU comm output (and uutils with fix): + // \xfe\n (col 1) + // \t\t\xff\n (col 3) + // \t\xfe\n (col 2) + // Hex: fe 0a 09 09 ff 0a 09 fe 0a + + scene + .ucmd() + .args(&["a", "b"]) + .fails() // Fails because of unsorted input + .stdout_is_bytes(b"\xfe\n\t\t\xff\n\t\xfe\n"); +} + #[test] #[cfg(any(target_os = "linux", target_os = "android"))] fn test_comm_anonymous_pipes() {