diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index b3990ac596f..896318484dd 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -13,7 +13,6 @@ use std::num::IntErrorKind; use std::path::Path; use std::str::from_utf8; use thiserror::Error; -use unicode_width::UnicodeWidthChar; use uucore::display::Quotable; use uucore::error::{FromIo, UError, UResult, USimpleError}; use uucore::translate; @@ -279,11 +278,7 @@ fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usi Some(' ') => (CharType::Space, 0, 1), Some('\t') => (CharType::Tab, 0, 1), Some('\x08') => (CharType::Backspace, 0, 1), - Some(c) => ( - CharType::Other, - UnicodeWidthChar::width(c).unwrap_or(0), - nbytes, - ), + Some(_) => (CharType::Other, nbytes, nbytes), None => { // invalid char snuck past the utf8_validation_iterator somehow??? (CharType::Other, 1, 1) diff --git a/tests/by-util/test_unexpand.rs b/tests/by-util/test_unexpand.rs index 0f2a6d464fe..0720dabb043 100644 --- a/tests/by-util/test_unexpand.rs +++ b/tests/by-util/test_unexpand.rs @@ -295,3 +295,15 @@ fn test_non_utf8_filename() { ucmd.arg(&filename).succeeds().stdout_is("\ta\n"); } + +#[test] +fn unexpand_multibyte_utf8_gnu_compat() { + // Verifies GNU-compatible behavior: column position uses byte count, not display width + // "1ΔΔΔ5" is 8 bytes (1 + 2*3 + 1), already at tab stop 8 + // So 3 spaces should NOT convert to tab (would need 8 more to reach tab stop 16) + new_ucmd!() + .args(&["-a"]) + .pipe_in("1ΔΔΔ5 99999\n") + .succeeds() + .stdout_is("1ΔΔΔ5 99999\n"); +}