uutils · sylvestre · Dec 30, 2025 · cakebaker · Dec 31, 2025 · cakebaker
diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs
@@ -349,7 +349,62 @@ enum CharType {
     Other,
 }
 
-#[allow(clippy::cognitive_complexity)]
+/// Classify a character and determine its width and byte length.
+///
+/// Returns `(CharType, display_width, byte_length)`.
+#[inline]
+fn classify_char(buf: &[u8], byte: usize, uflag: bool) -> (CharType, usize, usize) {
+    use self::CharType::{Backspace, Other, Tab};
+
+    if uflag {
+        let nbytes = char::from(buf[byte]).len_utf8();
+
+        if byte + nbytes > buf.len() {
+            // don't overrun buffer because of invalid UTF-8
+            return (Other, 1, 1);
+        }
+
+        if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
+            match t.chars().next() {
+                Some('\t') => (Tab, 0, nbytes),
+                Some('\x08') => (Backspace, 0, nbytes),
-                Some('\t') => (Tab, 0, nbytes),
-                Some('\x08') => (Backspace, 0, nbytes),
+                Some('\t') => (Tab, 0, 1),
+                Some('\x08') => (Backspace, 0, 1),
-                Some('\t') => (Tab, 0, nbytes),
-                Some('\x08') => (Backspace, 0, nbytes),
+                Some('\t') => (Tab, 0, 1),
+                Some('\x08') => (Backspace, 0, 1),
+                Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
+                None => {
+                    // no valid char at start of t, so take 1 byte
+                    (Other, 1, 1)
+                }
+            }
+        } else {
+            (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide
+        }
+    } else {
+        (
+            match buf.get(byte) {
+                // always take exactly 1 byte in strict ASCII mode
+                Some(0x09) => Tab,
+                Some(0x08) => Backspace,
+                _ => Other,
+            },
+            1,
+            1,
+        )
+    }
+}
+
+/// Write spaces for a tab expansion.
+#[inline]
+fn write_tab_spaces(
+    output: &mut BufWriter<std::io::Stdout>,
+    nts: usize,
+    tspaces: &str,
+) -> std::io::Result<()> {
+    if nts <= tspaces.len() {
+        output.write_all(&tspaces.as_bytes()[..nts])
+    } else {
+        output.write_all(" ".repeat(nts).as_bytes())
+    }
+}
+
 fn expand_line(
     buf: &mut Vec<u8>,
     output: &mut BufWriter<std::io::Stdout>,
@@ -372,37 +427,7 @@ fn expand_line(
     let mut init = true;
 
     while byte < buf.len() {
-        let (ctype, cwidth, nbytes) = if options.uflag {
-            let nbytes = char::from(buf[byte]).len_utf8();
-
-            if byte + nbytes > buf.len() {
-                // don't overrun buffer because of invalid UTF-8
-                (Other, 1, 1)
-            } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
-                match t.chars().next() {
-                    Some('\t') => (Tab, 0, nbytes),
-                    Some('\x08') => (Backspace, 0, nbytes),
-                    Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
-                    None => {
-                        // no valid char at start of t, so take 1 byte
-                        (Other, 1, 1)
-                    }
-                }
-            } else {
-                (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide
-            }
-        } else {
-            (
-                match buf.get(byte) {
-                    // always take exactly 1 byte in strict ASCII mode
-                    Some(0x09) => Tab,
-                    Some(0x08) => Backspace,
-                    _ => Other,
-                },
-                1,
-                1,
-            )
-        };
+        let (ctype, cwidth, nbytes) = classify_char(buf, byte, options.uflag);
 
         // figure out how many columns this char takes up
         match ctype {
@@ -413,23 +438,24 @@ fn expand_line(
 
                 // now dump out either spaces if we're expanding, or a literal tab if we're not
                 if init || !options.iflag {
-                    if nts <= options.tspaces.len() {
-                        output.write_all(&options.tspaces.as_bytes()[..nts])?;
-                    } else {
-                        output.write_all(" ".repeat(nts).as_bytes())?;
-                    }
+                    write_tab_spaces(output, nts, &options.tspaces)?;
                 } else {
                     output.write_all(&buf[byte..byte + nbytes])?;
                 }
             }
-            _ => {
-                col = if ctype == Other {
-                    col + cwidth
-                } else if col > 0 {
-                    col - 1
-                } else {
-                    0
-                };
+            Backspace => {
+                col = col.saturating_sub(1);
+
+                // if we're writing anything other than a space, then we're
+                // done with the line's leading spaces
+                if buf[byte] != 0x20 {
+                    init = false;
+                }
+
+                output.write_all(&buf[byte..byte + nbytes])?;
+            }
+            Other => {
+                col += cwidth;
 
                 // if we're writing anything other than a space, then we're
                 // done with the line's leading spaces