From c3d3c23997875e5e8338f228aeb88f166f3965be Mon Sep 17 00:00:00 2001 From: Paol0B Date: Mon, 12 Jan 2026 21:15:45 +0100 Subject: [PATCH 1/5] fix(comm): improve stdout handling and add test for lossy UTF-8 output --- src/uu/comm/src/comm.rs | 18 ++++++++++++------ tests/by-util/test_comm.rs | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index 80b20b53f18..e822fb1683c 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use std::ffi::OsString; use std::fs::{File, metadata}; -use std::io::{self, BufRead, BufReader, Read, StdinLock, stdin}; +use std::io::{self, BufRead, BufReader, Read, StdinLock, Write, stdin}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; @@ -186,6 +186,8 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) let delim_col_2 = delim.repeat(width_col_1); let delim_col_3 = delim.repeat(width_col_1 + width_col_2); + let mut stdout = io::stdout().lock(); + let ra = &mut Vec::new(); let mut na = a.read_line(ra); let rb = &mut Vec::new(); @@ -234,7 +236,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_1) { - print!("{}", String::from_utf8_lossy(ra)); + stdout.write_all(ra).map_err_context(|| "write error".to_string())?; } ra.clear(); na = a.read_line(ra); @@ -245,7 +247,8 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_2) { - print!("{delim_col_2}{}", String::from_utf8_lossy(rb)); + stdout.write_all(delim_col_2.as_bytes()).map_err_context(|| "write error".to_string())?; + stdout.write_all(rb).map_err_context(|| "write error".to_string())?; } rb.clear(); nb = b.read_line(rb); @@ -257,7 +260,8 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_3) { - print!("{delim_col_3}{}", String::from_utf8_lossy(ra)); + stdout.write_all(delim_col_3.as_bytes()).map_err_context(|| "write error".to_string())?; + stdout.write_all(ra).map_err_context(|| "write error".to_string())?; } ra.clear(); rb.clear(); @@ -275,10 +279,12 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) if opts.get_flag(options::TOTAL) { let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED)); - print!( + write!( + stdout, "{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}{}{line_ending}", translate!("comm-total") - ); + ) + .map_err_context(|| "write error".to_string())?; } if should_check_order && (checker1.has_error || checker2.has_error) { diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index bf719d7fbd1..8426e65b09e 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -648,3 +648,27 @@ fn test_comm_eintr_handling() { .stdout_contains("line2") .stdout_contains("line3"); } + +#[test] +fn test_output_lossy_utf8() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Create files with invalid UTF-8 + // A: \xfe\n\xff\n + // B: \xff\n\xfe\n + at.write_bytes("a", b"\xfe\n\xff\n"); + at.write_bytes("b", b"\xff\n\xfe\n"); + + // GNU comm output (and uutils with fix): + // \xfe\n (col 1) + // \t\t\xff\n (col 3) + // \t\xfe\n (col 2) + // Hex: fe 0a 09 09 ff 0a 09 fe 0a + + scene + .ucmd() + .args(&["a", "b"]) + .fails() // Fails because of unsorted input + .stdout_is_bytes(b"\xfe\n\t\t\xff\n\t\xfe\n"); +} From 03b8fa5ddc1ae97650968b3cc75d9b0c1de019d4 Mon Sep 17 00:00:00 2001 From: Paol0B Date: Tue, 13 Jan 2026 00:32:20 +0100 Subject: [PATCH 2/5] run cargo fmt --- src/uu/comm/src/comm.rs | 20 +++++++++++++++----- tests/by-util/test_comm.rs | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index e822fb1683c..bbda8f42600 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -236,7 +236,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_1) { - stdout.write_all(ra).map_err_context(|| "write error".to_string())?; + stdout + .write_all(ra) + .map_err_context(|| "write error".to_string())?; } ra.clear(); na = a.read_line(ra); @@ -247,8 +249,12 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_2) { - stdout.write_all(delim_col_2.as_bytes()).map_err_context(|| "write error".to_string())?; - stdout.write_all(rb).map_err_context(|| "write error".to_string())?; + stdout + .write_all(delim_col_2.as_bytes()) + .map_err_context(|| "write error".to_string())?; + stdout + .write_all(rb) + .map_err_context(|| "write error".to_string())?; } rb.clear(); nb = b.read_line(rb); @@ -260,8 +266,12 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_3) { - stdout.write_all(delim_col_3.as_bytes()).map_err_context(|| "write error".to_string())?; - stdout.write_all(ra).map_err_context(|| "write error".to_string())?; + stdout + .write_all(delim_col_3.as_bytes()) + .map_err_context(|| "write error".to_string())?; + stdout + .write_all(ra) + .map_err_context(|| "write error".to_string())?; } ra.clear(); rb.clear(); diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index 8426e65b09e..0e3b3502808 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -653,7 +653,7 @@ fn test_comm_eintr_handling() { fn test_output_lossy_utf8() { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; - + // Create files with invalid UTF-8 // A: \xfe\n\xff\n // B: \xff\n\xfe\n @@ -665,7 +665,7 @@ fn test_output_lossy_utf8() { // \t\t\xff\n (col 3) // \t\xfe\n (col 2) // Hex: fe 0a 09 09 ff 0a 09 fe 0a - + scene .ucmd() .args(&["a", "b"]) From ea47ce40522bb717ec1c7c8baf7a5ed9fd0baaaf Mon Sep 17 00:00:00 2001 From: Paol0B Date: Fri, 16 Jan 2026 00:42:21 +0100 Subject: [PATCH 3/5] perf(comm): use BufWriter for buffered stdout output Wrap stdout in BufWriter to improve performance and avoid duplicate error messages, matching GNU comm behavior. --- src/uu/comm/src/comm.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index bbda8f42600..02dc335f428 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use std::ffi::OsString; use std::fs::{File, metadata}; -use std::io::{self, BufRead, BufReader, Read, StdinLock, Write, stdin}; +use std::io::{self, BufRead, BufReader, BufWriter, Read, StdinLock, Write, stdin}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; @@ -186,7 +186,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) let delim_col_2 = delim.repeat(width_col_1); let delim_col_3 = delim.repeat(width_col_1 + width_col_2); - let mut stdout = io::stdout().lock(); + let mut writer = BufWriter::new(io::stdout().lock()); let ra = &mut Vec::new(); let mut na = a.read_line(ra); @@ -236,7 +236,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_1) { - stdout + writer .write_all(ra) .map_err_context(|| "write error".to_string())?; } @@ -249,10 +249,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_2) { - stdout + writer .write_all(delim_col_2.as_bytes()) .map_err_context(|| "write error".to_string())?; - stdout + writer .write_all(rb) .map_err_context(|| "write error".to_string())?; } @@ -266,10 +266,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_3) { - stdout + writer .write_all(delim_col_3.as_bytes()) .map_err_context(|| "write error".to_string())?; - stdout + writer .write_all(ra) .map_err_context(|| "write error".to_string())?; } @@ -290,13 +290,15 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) if opts.get_flag(options::TOTAL) { let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED)); write!( - stdout, + writer, "{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}{}{line_ending}", translate!("comm-total") ) .map_err_context(|| "write error".to_string())?; } + writer.flush().ok(); + if should_check_order && (checker1.has_error || checker2.has_error) { // Print the input error message once at the end if input_error { From dc2eab51e95edff7016ad16f6d52c23ea5f4f395 Mon Sep 17 00:00:00 2001 From: Paol0B Date: Mon, 19 Jan 2026 20:50:25 +0100 Subject: [PATCH 4/5] fix: refactor write operations in comm to use a dedicated function --- src/uu/comm/src/comm.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index f348cef99fc..2f9ac9a86df 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -184,6 +184,16 @@ pub fn are_files_identical(path1: &Path, path2: &Path) -> io::Result { } } +fn write_line_with_delimiter(writer: &mut W, delim: &[u8], line: &[u8]) -> UResult<()> { + writer + .write_all(delim) + .map_err_context(|| "write error".to_string())?; + writer + .write_all(line) + .map_err_context(|| "write error".to_string())?; + Ok(()) +} + fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> { let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1)); let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2)); @@ -254,12 +264,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_2) { - writer - .write_all(delim_col_2.as_bytes()) - .map_err_context(|| "write error".to_string())?; - writer - .write_all(rb) - .map_err_context(|| "write error".to_string())?; + write_line_with_delimiter(&mut writer, delim_col_2.as_bytes(), rb)?; } rb.clear(); nb = b.read_line(rb); @@ -271,12 +276,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) break; } if !opts.get_flag(options::COLUMN_3) { - writer - .write_all(delim_col_3.as_bytes()) - .map_err_context(|| "write error".to_string())?; - writer - .write_all(ra) - .map_err_context(|| "write error".to_string())?; + write_line_with_delimiter(&mut writer, delim_col_3.as_bytes(), ra)?; } ra.clear(); rb.clear(); From 03ec9e1034fd4954c5e4f338dd50bd70f030bb5f Mon Sep 17 00:00:00 2001 From: Paol0B <52996310+Paol0B@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:10:24 +0000 Subject: [PATCH 5/5] comm: use translate! --- src/uu/comm/src/comm.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index 2b420a6972b..4e05678ef2d 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -187,10 +187,10 @@ pub fn are_files_identical(path1: &Path, path2: &Path) -> io::Result { fn write_line_with_delimiter(writer: &mut W, delim: &[u8], line: &[u8]) -> UResult<()> { writer .write_all(delim) - .map_err_context(|| "write error".to_string())?; + .map_err_context(|| translate!("comm-error-write"))?; writer .write_all(line) - .map_err_context(|| "write error".to_string())?; + .map_err_context(|| translate!("comm-error-write"))?; Ok(()) } @@ -253,7 +253,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) if !opts.get_flag(options::COLUMN_1) { writer .write_all(ra) - .map_err_context(|| "write error".to_string())?; + .map_err_context(|| translate!("comm-error-write"))?; } ra.clear(); na = a.read_line(ra); @@ -299,7 +299,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) "{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}{}{line_ending}", translate!("comm-total") ) - .map_err_context(|| "write error".to_string())?; + .map_err_context(|| translate!("comm-error-write"))?; } writer.flush().ok();