diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index cc48edfab71..ac16b259dbd 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -260,35 +260,82 @@ fn cut_fields_implicit_out_delim( Ok(()) } +/// Iterator that yields (segment, was_followed_by_delimiter) pairs. +/// Unlike `BufRead::split`, this correctly tracks whether each segment ended with delimiter. +struct DelimReader { + inner: BufReader, + delim: u8, +} + +impl DelimReader { + fn new(reader: R, delim: u8) -> Self { + Self { + inner: BufReader::new(reader), + delim, + } + } +} + +impl Iterator for DelimReader { + type Item = std::io::Result<(Vec, bool)>; + + fn next(&mut self) -> Option { + let mut buf = Vec::new(); + match self.inner.read_until(self.delim, &mut buf) { + Ok(0) => None, + Ok(_) => { + let was_delimited = buf.last() == Some(&self.delim); + if was_delimited { + buf.pop(); + } + Some(Ok((buf, was_delimited))) + } + Err(e) => Some(Err(e)), + } + } +} + +/// Check if a field number is within any of the given ranges +fn is_field_in_ranges(field: usize, ranges: &[Range]) -> bool { + ranges.iter().any(|r| field >= r.low && field <= r.high) +} + /// The input delimiter is identical to `newline_char` fn cut_fields_newline_char_delim( reader: R, out: &mut W, ranges: &[Range], + only_delimited: bool, newline_char: u8, out_delim: &[u8], ) -> UResult<()> { - let buf_in = BufReader::new(reader); + // Stream through input, collecting only selected fields + let mut has_delim = false; + let mut selected: Vec> = Vec::new(); + + for (idx, result) in DelimReader::new(reader, newline_char).enumerate() { + let (segment, was_delimited) = result?; + has_delim = has_delim || was_delimited; + if is_field_in_ranges(idx + 1, ranges) { + selected.push(segment); + } + } - let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect(); - let mut print_delim = false; + // With -s and no delimiter, suppress output + if only_delimited && !has_delim { + return Ok(()); + } - for &Range { low, high } in ranges { - for i in low..=high { - // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0 - if let Some(segment) = segments.get(i - 1) { - if print_delim { - out.write_all(out_delim)?; - } else { - print_delim = true; - } - out.write_all(segment.as_slice())?; - } else { - break; - } + // Output selected fields joined by delimiter + if let Some((first, rest)) = selected.split_first() { + out.write_all(first)?; + for seg in rest { + out.write_all(out_delim)?; + out.write_all(seg)?; } + out.write_all(&[newline_char])?; } - out.write_all(&[newline_char])?; + Ok(()) } @@ -303,7 +350,14 @@ fn cut_fields( match field_opts.delimiter { Delimiter::Slice(delim) if delim == [newline_char] => { let out_delim = opts.out_delimiter.unwrap_or(delim); - cut_fields_newline_char_delim(reader, out, ranges, newline_char, out_delim) + cut_fields_newline_char_delim( + reader, + out, + ranges, + field_opts.only_delimited, + newline_char, + out_delim, + ) } Delimiter::Slice(delim) => { let matcher = ExactMatcher::new(delim); diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 79fb1488ab8..92018692943 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -301,6 +301,40 @@ fn test_newline_as_delimiter_with_output_delimiter() { .stdout_only_bytes("a:b\n"); } +#[test] +fn test_newline_as_delimiter_with_only_delimited() { + // When input has no newline delimiter and -s is specified, + // the line should be suppressed (no output) + new_ucmd!() + .args(&["-f1", "-d", "\n", "-s"]) + .pipe_in("abc") + .succeeds() + .stdout_only_bytes(""); + + // When input has newline delimiter and -s is specified, + // it should output the selected field + new_ucmd!() + .args(&["-f1", "-d", "\n", "-s"]) + .pipe_in("line1\nline2") + .succeeds() + .stdout_only_bytes("line1\n"); + + // Edge case: input is just a newline (delimiter exists, fields are empty) + // GNU cut outputs the newline because the delimiter is present + new_ucmd!() + .args(&["-f1-", "-d", "\n", "-s"]) + .pipe_in("\n") + .succeeds() + .stdout_only_bytes("\n"); + + // Edge case: empty input should produce no output + new_ucmd!() + .args(&["-f1-", "-d", "\n"]) + .pipe_in("") + .succeeds() + .stdout_only_bytes(""); +} + #[test] fn test_multiple_delimiters() { new_ucmd!()