From 208d6615c6e48a3b7f91e5687fae8a7ccfb94c8b Mon Sep 17 00:00:00 2001 From: Jane Illarionova Date: Wed, 31 Dec 2025 11:12:08 -0500 Subject: [PATCH 1/2] Remove Unicode width calculation for characters --- src/uu/unexpand/src/unexpand.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index b3990ac596f..896318484dd 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -13,7 +13,6 @@ use std::num::IntErrorKind; use std::path::Path; use std::str::from_utf8; use thiserror::Error; -use unicode_width::UnicodeWidthChar; use uucore::display::Quotable; use uucore::error::{FromIo, UError, UResult, USimpleError}; use uucore::translate; @@ -279,11 +278,7 @@ fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usi Some(' ') => (CharType::Space, 0, 1), Some('\t') => (CharType::Tab, 0, 1), Some('\x08') => (CharType::Backspace, 0, 1), - Some(c) => ( - CharType::Other, - UnicodeWidthChar::width(c).unwrap_or(0), - nbytes, - ), + Some(_) => (CharType::Other, nbytes, nbytes), None => { // invalid char snuck past the utf8_validation_iterator somehow??? (CharType::Other, 1, 1) From 2ded1fe210fbe0c73dfdccd40636ef66e4ed8cf4 Mon Sep 17 00:00:00 2001 From: Jane Illarionova Date: Wed, 31 Dec 2025 11:16:33 -0500 Subject: [PATCH 2/2] Add test for unexpand with multibyte UTF-8 input --- tests/by-util/test_unexpand.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/by-util/test_unexpand.rs b/tests/by-util/test_unexpand.rs index 0f2a6d464fe..0720dabb043 100644 --- a/tests/by-util/test_unexpand.rs +++ b/tests/by-util/test_unexpand.rs @@ -295,3 +295,15 @@ fn test_non_utf8_filename() { ucmd.arg(&filename).succeeds().stdout_is("\ta\n"); } + +#[test] +fn unexpand_multibyte_utf8_gnu_compat() { + // Verifies GNU-compatible behavior: column position uses byte count, not display width + // "1ΔΔΔ5" is 8 bytes (1 + 2*3 + 1), already at tab stop 8 + // So 3 spaces should NOT convert to tab (would need 8 more to reach tab stop 16) + new_ucmd!() + .args(&["-a"]) + .pipe_in("1ΔΔΔ5 99999\n") + .succeeds() + .stdout_is("1ΔΔΔ5 99999\n"); +}