Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 72 additions & 18 deletions src/uu/cut/src/cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,35 +260,82 @@ fn cut_fields_implicit_out_delim<R: Read, W: Write, M: Matcher>(
Ok(())
}

/// Iterator that yields (segment, was_followed_by_delimiter) pairs.
/// Unlike `BufRead::split`, this correctly tracks whether each segment ended with delimiter.
struct DelimReader<R> {
inner: BufReader<R>,
delim: u8,
}

impl<R: Read> DelimReader<R> {
fn new(reader: R, delim: u8) -> Self {
Self {
inner: BufReader::new(reader),
delim,
}
}
}

impl<R: Read> Iterator for DelimReader<R> {
type Item = std::io::Result<(Vec<u8>, bool)>;

fn next(&mut self) -> Option<Self::Item> {
let mut buf = Vec::new();
match self.inner.read_until(self.delim, &mut buf) {
Ok(0) => None,
Ok(_) => {
let was_delimited = buf.last() == Some(&self.delim);
if was_delimited {
buf.pop();
}
Some(Ok((buf, was_delimited)))
}
Err(e) => Some(Err(e)),
}
}
}

/// Check if a field number is within any of the given ranges
fn is_field_in_ranges(field: usize, ranges: &[Range]) -> bool {
ranges.iter().any(|r| field >= r.low && field <= r.high)
}

/// The input delimiter is identical to `newline_char`
fn cut_fields_newline_char_delim<R: Read, W: Write>(
reader: R,
out: &mut W,
ranges: &[Range],
only_delimited: bool,
newline_char: u8,
out_delim: &[u8],
) -> UResult<()> {
let buf_in = BufReader::new(reader);
// Stream through input, collecting only selected fields
let mut has_delim = false;
let mut selected: Vec<Vec<u8>> = Vec::new();

for (idx, result) in DelimReader::new(reader, newline_char).enumerate() {
let (segment, was_delimited) = result?;
has_delim = has_delim || was_delimited;
if is_field_in_ranges(idx + 1, ranges) {
selected.push(segment);
}
}

let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect();
let mut print_delim = false;
// With -s and no delimiter, suppress output
if only_delimited && !has_delim {
return Ok(());
}

for &Range { low, high } in ranges {
for i in low..=high {
// "- 1" is necessary because fields start from 1 whereas a Vec starts from 0
if let Some(segment) = segments.get(i - 1) {
if print_delim {
out.write_all(out_delim)?;
} else {
print_delim = true;
}
out.write_all(segment.as_slice())?;
} else {
break;
}
// Output selected fields joined by delimiter
if let Some((first, rest)) = selected.split_first() {
out.write_all(first)?;
for seg in rest {
out.write_all(out_delim)?;
out.write_all(seg)?;
}
out.write_all(&[newline_char])?;
}
out.write_all(&[newline_char])?;

Ok(())
}

Expand All @@ -303,7 +350,14 @@ fn cut_fields<R: Read, W: Write>(
match field_opts.delimiter {
Delimiter::Slice(delim) if delim == [newline_char] => {
let out_delim = opts.out_delimiter.unwrap_or(delim);
cut_fields_newline_char_delim(reader, out, ranges, newline_char, out_delim)
cut_fields_newline_char_delim(
reader,
out,
ranges,
field_opts.only_delimited,
newline_char,
out_delim,
)
}
Delimiter::Slice(delim) => {
let matcher = ExactMatcher::new(delim);
Expand Down
34 changes: 34 additions & 0 deletions tests/by-util/test_cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,40 @@ fn test_newline_as_delimiter_with_output_delimiter() {
.stdout_only_bytes("a:b\n");
}

#[test]
fn test_newline_as_delimiter_with_only_delimited() {
// When input has no newline delimiter and -s is specified,
// the line should be suppressed (no output)
new_ucmd!()
.args(&["-f1", "-d", "\n", "-s"])
.pipe_in("abc")
.succeeds()
.stdout_only_bytes("");

// When input has newline delimiter and -s is specified,
// it should output the selected field
new_ucmd!()
.args(&["-f1", "-d", "\n", "-s"])
.pipe_in("line1\nline2")
.succeeds()
.stdout_only_bytes("line1\n");

// Edge case: input is just a newline (delimiter exists, fields are empty)
// GNU cut outputs the newline because the delimiter is present
new_ucmd!()
.args(&["-f1-", "-d", "\n", "-s"])
.pipe_in("\n")
.succeeds()
.stdout_only_bytes("\n");

// Edge case: empty input should produce no output
new_ucmd!()
.args(&["-f1-", "-d", "\n"])
.pipe_in("")
.succeeds()
.stdout_only_bytes("");
}

#[test]
fn test_multiple_delimiters() {
new_ucmd!()
Expand Down
Loading