From 14e2b563beedab5eba08bf7aa3db63c3e2340ccb Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 30 Dec 2025 22:12:57 +0100 Subject: [PATCH] expand: address a cognitive_complexity warnings --- src/uu/expand/src/expand.rs | 116 ++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 294b3bc884c..4d8e0c01d4c 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -349,7 +349,62 @@ enum CharType { Other, } -#[allow(clippy::cognitive_complexity)] +/// Classify a character and determine its width and byte length. +/// +/// Returns `(CharType, display_width, byte_length)`. +#[inline] +fn classify_char(buf: &[u8], byte: usize, uflag: bool) -> (CharType, usize, usize) { + use self::CharType::{Backspace, Other, Tab}; + + if uflag { + let nbytes = char::from(buf[byte]).len_utf8(); + + if byte + nbytes > buf.len() { + // don't overrun buffer because of invalid UTF-8 + return (Other, 1, 1); + } + + if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { + match t.chars().next() { + Some('\t') => (Tab, 0, nbytes), + Some('\x08') => (Backspace, 0, nbytes), + Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), + None => { + // no valid char at start of t, so take 1 byte + (Other, 1, 1) + } + } + } else { + (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide + } + } else { + ( + match buf.get(byte) { + // always take exactly 1 byte in strict ASCII mode + Some(0x09) => Tab, + Some(0x08) => Backspace, + _ => Other, + }, + 1, + 1, + ) + } +} + +/// Write spaces for a tab expansion. +#[inline] +fn write_tab_spaces( + output: &mut BufWriter, + nts: usize, + tspaces: &str, +) -> std::io::Result<()> { + if nts <= tspaces.len() { + output.write_all(&tspaces.as_bytes()[..nts]) + } else { + output.write_all(" ".repeat(nts).as_bytes()) + } +} + fn expand_line( buf: &mut Vec, output: &mut BufWriter, @@ -372,37 +427,7 @@ fn expand_line( let mut init = true; while byte < buf.len() { - let (ctype, cwidth, nbytes) = if options.uflag { - let nbytes = char::from(buf[byte]).len_utf8(); - - if byte + nbytes > buf.len() { - // don't overrun buffer because of invalid UTF-8 - (Other, 1, 1) - } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { - match t.chars().next() { - Some('\t') => (Tab, 0, nbytes), - Some('\x08') => (Backspace, 0, nbytes), - Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), - None => { - // no valid char at start of t, so take 1 byte - (Other, 1, 1) - } - } - } else { - (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide - } - } else { - ( - match buf.get(byte) { - // always take exactly 1 byte in strict ASCII mode - Some(0x09) => Tab, - Some(0x08) => Backspace, - _ => Other, - }, - 1, - 1, - ) - }; + let (ctype, cwidth, nbytes) = classify_char(buf, byte, options.uflag); // figure out how many columns this char takes up match ctype { @@ -413,23 +438,24 @@ fn expand_line( // now dump out either spaces if we're expanding, or a literal tab if we're not if init || !options.iflag { - if nts <= options.tspaces.len() { - output.write_all(&options.tspaces.as_bytes()[..nts])?; - } else { - output.write_all(" ".repeat(nts).as_bytes())?; - } + write_tab_spaces(output, nts, &options.tspaces)?; } else { output.write_all(&buf[byte..byte + nbytes])?; } } - _ => { - col = if ctype == Other { - col + cwidth - } else if col > 0 { - col - 1 - } else { - 0 - }; + Backspace => { + col = col.saturating_sub(1); + + // if we're writing anything other than a space, then we're + // done with the line's leading spaces + if buf[byte] != 0x20 { + init = false; + } + + output.write_all(&buf[byte..byte + nbytes])?; + } + Other => { + col += cwidth; // if we're writing anything other than a space, then we're // done with the line's leading spaces