From 3e15738d5b4b961e805f80a1909ec03f2d886d88 Mon Sep 17 00:00:00 2001 From: Ruiyang Wang Date: Sat, 3 Jan 2026 13:28:32 -0800 Subject: [PATCH 1/3] cut: fix -s flag ignored when delimiter is newline When using newline as the delimiter (-d $'\n') with -s (only-delimited), cut should suppress lines that do not contain the delimiter. However, cut_fields_newline_char_delim() was not checking the only_delimited flag, causing it to always output even when -s was specified. The fix uses read_until() instead of split() to read segments. Unlike split(), read_until() includes the delimiter in the buffer when found, allowing us to detect whether a delimiter was actually present or if we just hit EOF. This commit: - Adds only_delimited parameter to cut_fields_newline_char_delim() - Uses read_until() to detect delimiter presence while reading - If no delimiter found and only_delimited is true, returns early - Adds test case for newline delimiter with -s flag Fixes #10012 --- src/uu/cut/src/cut.rs | 35 ++++++++++++++++++++++++++++++++--- tests/by-util/test_cut.rs | 19 +++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index cc48edfab71..51dc362f319 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -265,12 +265,34 @@ fn cut_fields_newline_char_delim( reader: R, out: &mut W, ranges: &[Range], + only_delimited: bool, newline_char: u8, out_delim: &[u8], ) -> UResult<()> { - let buf_in = BufReader::new(reader); + let mut buf_in = BufReader::new(reader); + let mut segments: Vec> = Vec::new(); + let mut found_delimiter = false; + + // Read segments using read_until, which includes the delimiter in the buffer + // This lets us detect whether a delimiter was actually found + loop { + let mut segment = Vec::new(); + if buf_in.read_until(newline_char, &mut segment)? == 0 { + break; + } + // If segment ends with delimiter, we found one - remove it from segment + if segment.last() == Some(&newline_char) { + found_delimiter = true; + segment.pop(); + } + segments.push(segment); + } + + // With -s (only_delimited), suppress output if no delimiter found + if only_delimited && !found_delimiter { + return Ok(()); + } - let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect(); let mut print_delim = false; for &Range { low, high } in ranges { @@ -303,7 +325,14 @@ fn cut_fields( match field_opts.delimiter { Delimiter::Slice(delim) if delim == [newline_char] => { let out_delim = opts.out_delimiter.unwrap_or(delim); - cut_fields_newline_char_delim(reader, out, ranges, newline_char, out_delim) + cut_fields_newline_char_delim( + reader, + out, + ranges, + field_opts.only_delimited, + newline_char, + out_delim, + ) } Delimiter::Slice(delim) => { let matcher = ExactMatcher::new(delim); diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 79fb1488ab8..919f6364475 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -301,6 +301,25 @@ fn test_newline_as_delimiter_with_output_delimiter() { .stdout_only_bytes("a:b\n"); } +#[test] +fn test_newline_as_delimiter_with_only_delimited() { + // When input has no newline delimiter and -s is specified, + // the line should be suppressed (no output) + new_ucmd!() + .args(&["-f1", "-d", "\n", "-s"]) + .pipe_in("abc") + .succeeds() + .stdout_only_bytes(""); + + // When input has newline delimiter and -s is specified, + // it should output the selected field + new_ucmd!() + .args(&["-f1", "-d", "\n", "-s"]) + .pipe_in("line1\nline2") + .succeeds() + .stdout_only_bytes("line1\n"); +} + #[test] fn test_multiple_delimiters() { new_ucmd!() From 526b5d867e7f3ba24407204953e860c4770ec841 Mon Sep 17 00:00:00 2001 From: Ruiyang Wang Date: Sun, 4 Jan 2026 11:29:53 -0800 Subject: [PATCH 2/3] cut: simplify newline delimiter handling - Use read_to_end + split instead of read_until loop - Use Vec<&[u8]> instead of Vec> (no extra allocations) - Rename found_delimiter to has_delimiter - Check has_delimiter before removing trailing empty segment - Only write trailing newline if we output something - Remove unused BufRead import - Add tests for edge cases: just newline, empty input --- src/uu/cut/src/cut.rs | 44 +++++++++++++++++++-------------------- tests/by-util/test_cut.rs | 15 +++++++++++++ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 51dc362f319..78b0b565181 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -9,7 +9,7 @@ use bstr::io::BufReadExt; use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser}; use std::ffi::OsString; use std::fs::File; -use std::io::{BufRead, BufReader, BufWriter, IsTerminal, Read, Write, stdin, stdout}; +use std::io::{BufReader, BufWriter, IsTerminal, Read, Write, stdin, stdout}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, set_exit_code}; @@ -262,34 +262,31 @@ fn cut_fields_implicit_out_delim( /// The input delimiter is identical to `newline_char` fn cut_fields_newline_char_delim( - reader: R, + mut reader: R, out: &mut W, ranges: &[Range], only_delimited: bool, newline_char: u8, out_delim: &[u8], ) -> UResult<()> { - let mut buf_in = BufReader::new(reader); - let mut segments: Vec> = Vec::new(); - let mut found_delimiter = false; - - // Read segments using read_until, which includes the delimiter in the buffer - // This lets us detect whether a delimiter was actually found - loop { - let mut segment = Vec::new(); - if buf_in.read_until(newline_char, &mut segment)? == 0 { - break; - } - // If segment ends with delimiter, we found one - remove it from segment - if segment.last() == Some(&newline_char) { - found_delimiter = true; - segment.pop(); - } - segments.push(segment); + // Read entire input - we need all of it since fields are lines + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + + // Split by newline to get fields + let mut segments: Vec<&[u8]> = buffer.split(|&b| b == newline_char).collect(); + + // Check for delimiter BEFORE removing trailing empty (split always gives at least 1 element) + let has_delimiter = segments.len() > 1; + + // Remove trailing empty segment if present + // (artifact of split when input ends with delimiter - GNU cut doesn't count it as a field) + if segments.last() == Some(&b"".as_slice()) { + segments.pop(); } // With -s (only_delimited), suppress output if no delimiter found - if only_delimited && !found_delimiter { + if only_delimited && !has_delimiter { return Ok(()); } @@ -304,13 +301,16 @@ fn cut_fields_newline_char_delim( } else { print_delim = true; } - out.write_all(segment.as_slice())?; + out.write_all(segment)?; } else { break; } } } - out.write_all(&[newline_char])?; + // Only write newline if we output something + if print_delim { + out.write_all(&[newline_char])?; + } Ok(()) } diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 919f6364475..92018692943 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -318,6 +318,21 @@ fn test_newline_as_delimiter_with_only_delimited() { .pipe_in("line1\nline2") .succeeds() .stdout_only_bytes("line1\n"); + + // Edge case: input is just a newline (delimiter exists, fields are empty) + // GNU cut outputs the newline because the delimiter is present + new_ucmd!() + .args(&["-f1-", "-d", "\n", "-s"]) + .pipe_in("\n") + .succeeds() + .stdout_only_bytes("\n"); + + // Edge case: empty input should produce no output + new_ucmd!() + .args(&["-f1-", "-d", "\n"]) + .pipe_in("") + .succeeds() + .stdout_only_bytes(""); } #[test] From 43986aa2cd77e6a6ef379b848cfab9cf74841866 Mon Sep 17 00:00:00 2001 From: Ruiyang Wang Date: Tue, 6 Jan 2026 14:41:47 -0800 Subject: [PATCH 3/3] cut: use streaming DelimReader for newline delimiter Address PR review comments: - Replace read_to_end() with streaming DelimReader iterator - DelimReader uses read_until() and tracks delimiter per segment - Only collect selected fields, not entire input - Remove b"".as_slice() check (DelimReader doesn't create trailing empties) - Simplify output with split_first() join pattern --- src/uu/cut/src/cut.rs | 97 +++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 78b0b565181..ac16b259dbd 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -9,7 +9,7 @@ use bstr::io::BufReadExt; use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser}; use std::ffi::OsString; use std::fs::File; -use std::io::{BufReader, BufWriter, IsTerminal, Read, Write, stdin, stdout}; +use std::io::{BufRead, BufReader, BufWriter, IsTerminal, Read, Write, stdin, stdout}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, set_exit_code}; @@ -260,57 +260,82 @@ fn cut_fields_implicit_out_delim( Ok(()) } +/// Iterator that yields (segment, was_followed_by_delimiter) pairs. +/// Unlike `BufRead::split`, this correctly tracks whether each segment ended with delimiter. +struct DelimReader { + inner: BufReader, + delim: u8, +} + +impl DelimReader { + fn new(reader: R, delim: u8) -> Self { + Self { + inner: BufReader::new(reader), + delim, + } + } +} + +impl Iterator for DelimReader { + type Item = std::io::Result<(Vec, bool)>; + + fn next(&mut self) -> Option { + let mut buf = Vec::new(); + match self.inner.read_until(self.delim, &mut buf) { + Ok(0) => None, + Ok(_) => { + let was_delimited = buf.last() == Some(&self.delim); + if was_delimited { + buf.pop(); + } + Some(Ok((buf, was_delimited))) + } + Err(e) => Some(Err(e)), + } + } +} + +/// Check if a field number is within any of the given ranges +fn is_field_in_ranges(field: usize, ranges: &[Range]) -> bool { + ranges.iter().any(|r| field >= r.low && field <= r.high) +} + /// The input delimiter is identical to `newline_char` fn cut_fields_newline_char_delim( - mut reader: R, + reader: R, out: &mut W, ranges: &[Range], only_delimited: bool, newline_char: u8, out_delim: &[u8], ) -> UResult<()> { - // Read entire input - we need all of it since fields are lines - let mut buffer = Vec::new(); - reader.read_to_end(&mut buffer)?; - - // Split by newline to get fields - let mut segments: Vec<&[u8]> = buffer.split(|&b| b == newline_char).collect(); - - // Check for delimiter BEFORE removing trailing empty (split always gives at least 1 element) - let has_delimiter = segments.len() > 1; - - // Remove trailing empty segment if present - // (artifact of split when input ends with delimiter - GNU cut doesn't count it as a field) - if segments.last() == Some(&b"".as_slice()) { - segments.pop(); + // Stream through input, collecting only selected fields + let mut has_delim = false; + let mut selected: Vec> = Vec::new(); + + for (idx, result) in DelimReader::new(reader, newline_char).enumerate() { + let (segment, was_delimited) = result?; + has_delim = has_delim || was_delimited; + if is_field_in_ranges(idx + 1, ranges) { + selected.push(segment); + } } - // With -s (only_delimited), suppress output if no delimiter found - if only_delimited && !has_delimiter { + // With -s and no delimiter, suppress output + if only_delimited && !has_delim { return Ok(()); } - let mut print_delim = false; - - for &Range { low, high } in ranges { - for i in low..=high { - // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0 - if let Some(segment) = segments.get(i - 1) { - if print_delim { - out.write_all(out_delim)?; - } else { - print_delim = true; - } - out.write_all(segment)?; - } else { - break; - } + // Output selected fields joined by delimiter + if let Some((first, rest)) = selected.split_first() { + out.write_all(first)?; + for seg in rest { + out.write_all(out_delim)?; + out.write_all(seg)?; } - } - // Only write newline if we output something - if print_delim { out.write_all(&[newline_char])?; } + Ok(()) }