diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index d7746b9150e..b6a850c378e 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -167,6 +167,7 @@ impl Options { ), ParseSizeError::ParseFailure(s) => OptionsError::InvalidBlockSize(s), ParseSizeError::PhysicalMem(s) => OptionsError::InvalidBlockSize(s), + ParseSizeError::BuilderConfig(e) => OptionsError::InvalidBlockSize(e.to_string()), })?, header_mode: { if matches.get_flag(OPT_HUMAN_READABLE_BINARY) diff --git a/src/uu/du/locales/en-US.ftl b/src/uu/du/locales/en-US.ftl index d7141c7c079..c18e55115fe 100644 --- a/src/uu/du/locales/en-US.ftl +++ b/src/uu/du/locales/en-US.ftl @@ -69,6 +69,7 @@ du-error-printing-thread-panicked = Printing thread panicked. du-error-invalid-suffix = invalid suffix in --{ $option } argument { $value } du-error-invalid-argument = invalid --{ $option } argument { $value } du-error-argument-too-large = --{ $option } argument { $value } too large +du-error-invalid-configuration = invalid configuration for --{ $option }: { $error } du-error-hyphen-file-name-not-allowed = when reading file names from standard input, no file name of '-' allowed # Verbose/status messages diff --git a/src/uu/du/locales/fr-FR.ftl b/src/uu/du/locales/fr-FR.ftl index 52b89145973..f49d5a5f3de 100644 --- a/src/uu/du/locales/fr-FR.ftl +++ b/src/uu/du/locales/fr-FR.ftl @@ -69,6 +69,7 @@ du-error-printing-thread-panicked = Le thread d'affichage a paniqué. du-error-invalid-suffix = suffixe invalide dans l'argument --{ $option } { $value } du-error-invalid-argument = argument --{ $option } invalide { $value } du-error-argument-too-large = argument --{ $option } { $value } trop grand +du-error-invalid-configuration = configuration invalide pour --{ $option }: { $error } du-error-hyphen-file-name-not-allowed = le nom de fichier '-' n'est pas autorisé lors de la lecture de l'entrée standard # Messages verbeux/de statut diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 1b8084e2ebb..2ea5e771e57 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -1490,6 +1490,9 @@ fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String ParseSizeError::SizeTooBig(_) => { translate!("du-error-argument-too-large", "option" => option, "value" => s.quote()) } + ParseSizeError::BuilderConfig(e) => { + translate!("du-error-invalid-configuration", "option" => option, "error" => e.to_string()) + } } } diff --git a/src/uu/od/locales/en-US.ftl b/src/uu/od/locales/en-US.ftl index bcafe1fe2c1..a72f88545ae 100644 --- a/src/uu/od/locales/en-US.ftl +++ b/src/uu/od/locales/en-US.ftl @@ -59,6 +59,7 @@ od-error-overflow = Numerical result out of range od-error-invalid-suffix = invalid suffix in {$option} argument {$value} od-error-invalid-argument = invalid {$option} argument {$value} od-error-argument-too-large = {$option} argument {$value} too large +od-error-invalid-configuration = invalid configuration for {$option}: {$error} od-error-skip-past-end = tried to skip past end of input # Help messages diff --git a/src/uu/od/locales/fr-FR.ftl b/src/uu/od/locales/fr-FR.ftl index df07eebe61e..9a0efc57d25 100644 --- a/src/uu/od/locales/fr-FR.ftl +++ b/src/uu/od/locales/fr-FR.ftl @@ -59,6 +59,7 @@ od-error-parse-failed = échec de l'analyse od-error-invalid-suffix = suffixe invalide dans l'argument {$option} {$value} od-error-invalid-argument = argument {$option} invalide {$value} od-error-argument-too-large = argument {$option} {$value} trop grand +od-error-invalid-configuration = configuration invalide pour {$option}: {$error} od-error-skip-past-end = tentative d'ignorer au-delà de la fin de l'entrée # Messages d'aide diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index e8f9841b162..57399092188 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -804,5 +804,8 @@ fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String ParseSizeError::SizeTooBig(_) => { translate!("od-error-argument-too-large", "option" => option, "value" => s.quote()) } + ParseSizeError::BuilderConfig(e) => { + translate!("od-error-invalid-configuration", "option" => option, "error" => e.to_string()) + } } } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 071163c5aee..6399de07488 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -317,12 +317,13 @@ impl GlobalSettings { fn parse_byte_count(input: &str) -> Result { // GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y // GNU sort (8.32) invalid: b, B, 1B, p, e, z, y + // Note: "%" is also valid for percentage-based memory allocation on Unix-like systems let size = Parser::default() .with_allow_list(&[ "b", "k", "K", "m", "M", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", "%", - ]) - .with_default_unit("K") - .with_b_byte_count(true) + ])? + .with_default_unit("K")? + .with_b_byte_count(true)? .parse(input.trim())?; usize::try_from(size).map_err(|_| { @@ -2268,6 +2269,9 @@ fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String ParseSizeError::SizeTooBig(_) => { translate!("sort-option-arg-too-large", "option" => option, "arg" => s.quote()) } + ParseSizeError::BuilderConfig(e) => { + format!("invalid configuration for {option}: {e}") + } } } diff --git a/src/uucore/src/lib/features/parser/parse_size.rs b/src/uucore/src/lib/features/parser/parse_size.rs index 05c270e4cf8..9bd8475b364 100644 --- a/src/uucore/src/lib/features/parser/parse_size.rs +++ b/src/uucore/src/lib/features/parser/parse_size.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) hdsf ghead gtail ACDBK hexdigit +// spell-checker:ignore (ToDO) hdsf ghead gtail ACDBK hexdigit memsize physmem //! Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). @@ -18,7 +18,7 @@ use procfs::{Current, Meminfo}; enum SystemError { IOError, ParseError, - #[cfg(not(target_os = "linux"))] + #[allow(dead_code)] NotFound, } @@ -77,10 +77,117 @@ pub fn available_memory_bytes() -> Option { None } -/// Get the total number of bytes of physical memory. +/// Helper function for BSD-based systems to query physical memory via sysctl. /// -/// TODO Implement this for non-Linux systems. -#[cfg(not(target_os = "linux"))] +/// # Arguments +/// +/// * `hw_constant` - The platform-specific HW constant (e.g., `HW_MEMSIZE`, `HW_PHYSMEM`) +/// * `sysctl_name` - The sysctl parameter name for subprocess fallback (e.g., "hw.memsize") +/// +/// # Returns +/// +/// The total physical memory in bytes, or an error if the query fails. +#[cfg(any( + target_os = "macos", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" +))] +use nix::libc; + +#[cfg(any( + target_os = "macos", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" +))] +fn bsd_sysctl_memory(hw_constant: libc::c_int, sysctl_name: &str) -> Result { + use std::mem; + use std::ptr; + + // Try native sysctl syscall first (more reliable in restricted environments) + let mut size: u64 = 0; + let mut len = mem::size_of::(); + let mut mib = [libc::CTL_HW, hw_constant]; + + let result = unsafe { + libc::sysctl( + mib.as_mut_ptr(), + 2, + ptr::from_mut(&mut size).cast::(), + &raw mut len, + ptr::null_mut(), + 0, + ) + }; + + if result == 0 { + return Ok(size as u128); + } + + // Fallback to subprocess if native call fails + // Configure stdin/stdout/stderr to prevent file descriptor inheritance issues + let output = std::process::Command::new("sysctl") + .arg("-n") + .arg(sysctl_name) + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output()?; + + let mem_str = String::from_utf8_lossy(&output.stdout); + let mem_bytes: u128 = mem_str.trim().parse()?; + Ok(mem_bytes) +} + +/// Get the total number of bytes of physical memory on macOS. +/// +/// Uses native `sysctl` syscall to query `hw.memsize`, with subprocess fallback. +#[cfg(target_os = "macos")] +fn total_physical_memory() -> Result { + bsd_sysctl_memory(libc::HW_MEMSIZE, "hw.memsize") +} + +/// Get the total number of bytes of physical memory on FreeBSD. +/// +/// Uses native `sysctl` syscall to query `hw.physmem`, with subprocess fallback. +#[cfg(target_os = "freebsd")] +fn total_physical_memory() -> Result { + // HW_PHYSMEM constant (not always exported by libc) + const HW_PHYSMEM: libc::c_int = 5; + bsd_sysctl_memory(HW_PHYSMEM, "hw.physmem") +} + +/// Get the total number of bytes of physical memory on OpenBSD. +/// +/// Uses native `sysctl` syscall to query `hw.physmem`, with subprocess fallback. +#[cfg(target_os = "openbsd")] +fn total_physical_memory() -> Result { + // HW_PHYSMEM constant (not always exported by libc) + const HW_PHYSMEM: libc::c_int = 19; + bsd_sysctl_memory(HW_PHYSMEM, "hw.physmem") +} + +/// Get the total number of bytes of physical memory on NetBSD. +/// +/// Uses native `sysctl` syscall to query `hw.physmem64`, with subprocess fallback. +#[cfg(target_os = "netbsd")] +fn total_physical_memory() -> Result { + // HW_PHYSMEM64 constant (not always exported by libc) + const HW_PHYSMEM64: libc::c_int = 9; + bsd_sysctl_memory(HW_PHYSMEM64, "hw.physmem64") +} + +/// Get the total number of bytes of physical memory (fallback for unsupported platforms). +/// +/// Returns an error for platforms where we haven't implemented memory detection yet. +#[cfg(not(any( + target_os = "linux", + target_os = "macos", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" +)))] fn total_physical_memory() -> Result { Err(SystemError::NotFound) } @@ -111,27 +218,196 @@ enum NumberSystem { impl<'parser> Parser<'parser> { /// Change allow_list of the parser - whitelist for the suffix - pub fn with_allow_list(&mut self, allow_list: &'parser [&str]) -> &mut Self { + /// + /// # Errors + /// + /// Returns `ParserBuilderError` if: + /// - Any entry in the allow_list is not a valid unit + /// - The allow_list contains duplicates + /// - The allow_list contains "%" mixed with size units + /// - The current default_unit (if set) is not in the new allow_list + pub fn with_allow_list( + &mut self, + allow_list: &'parser [&str], + ) -> Result<&mut Self, ParserBuilderError> { + // Validate the allow_list itself + Self::validate_allow_list(allow_list)?; + + // Validate compatibility with current state + self.validate_allow_list_compat(allow_list)?; + self.allow_list = Some(allow_list); - self + Ok(self) } /// Change default_unit of the parser - when no suffix is provided - pub fn with_default_unit(&mut self, default_unit: &'parser str) -> &mut Self { + /// + /// # Errors + /// + /// Returns `ParserBuilderError` if: + /// - The unit string is empty or contains whitespace + /// - The unit is not a valid unit string + /// - The unit is not in the allow_list (if allow_list is set) + /// - The unit is "b" and b_byte_count is true (ambiguous) + pub fn with_default_unit( + &mut self, + default_unit: &'parser str, + ) -> Result<&mut Self, ParserBuilderError> { + // Validate unit string + Self::is_valid_unit_string(default_unit).map_err(|reason| { + ParserBuilderError::InvalidUnit { + unit: default_unit.to_string(), + reason, + } + })?; + + // Validate compatibility with current state + self.validate_default_unit_compat(default_unit)?; + self.default_unit = Some(default_unit); - self + Ok(self) } /// Change b_byte_count of the parser - to treat "b" as a "byte count" instead of "block" - pub fn with_b_byte_count(&mut self, value: bool) -> &mut Self { + /// + /// # Errors + /// + /// Returns `ParserBuilderError` if: + /// - The default_unit is "b" (would create ambiguity) + pub fn with_b_byte_count(&mut self, value: bool) -> Result<&mut Self, ParserBuilderError> { + // Check for conflict with default_unit="b" + if value && self.default_unit == Some("b") { + return Err(ParserBuilderError::BByteCountConflict { + default_unit: "b".to_string(), + b_byte_count: value, + }); + } + self.b_byte_count = value; - self + Ok(self) } /// Change no_empty_numeric of the parser - to allow empty numeric strings - pub fn with_allow_empty_numeric(&mut self, value: bool) -> &mut Self { + /// + /// This method always succeeds as there are no validation constraints. + pub fn with_allow_empty_numeric( + &mut self, + value: bool, + ) -> Result<&mut Self, ParserBuilderError> { self.no_empty_numeric = value; - self + Ok(self) + } + + /// Validate that a unit string is valid + fn is_valid_unit_string(unit: &str) -> Result<(), String> { + // Check empty + if unit.is_empty() { + return Err("empty string".to_string()); + } + + // Check whitespace + if unit.trim() != unit || unit.chars().any(|c| c.is_whitespace()) { + return Err("contains whitespace".to_string()); + } + + // Valid units based on test analysis (lines 540-580) + const VALID_UNITS: &[&str] = &[ + // Single char uppercase (1024 powers) + "K", "M", "G", "T", "P", "E", "Z", "Y", "R", "Q", + // Single char lowercase (1024 powers) - GNU sort compatibility + "k", "m", "g", "t", "p", "e", "z", "y", "r", "q", + // Two char decimal (1000 powers) + "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "RB", "QB", + // Binary IEC (1024 powers) + "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", "RiB", "QiB", + // Lowercase binary + "kiB", "miB", "giB", "tiB", "piB", "eiB", "ziB", "yiB", "riB", "qiB", + // Special + "b", "B", "%", + ]; + + if !VALID_UNITS.contains(&unit) { + return Err(format!("'{unit}' is not a valid unit")); + } + + Ok(()) + } + + /// Validate an allow_list + fn validate_allow_list(allow_list: &[&str]) -> Result<(), ParserBuilderError> { + use std::collections::HashMap; + + let mut invalid = Vec::new(); + let mut seen = HashMap::new(); + + for (idx, &unit) in allow_list.iter().enumerate() { + // Check validity + if let Err(_reason) = Self::is_valid_unit_string(unit) { + invalid.push(unit.to_string()); + continue; + } + + // Check duplicates + if let Some(&prev_idx) = seen.get(unit) { + return Err(ParserBuilderError::DuplicateInAllowList { + unit: unit.to_string(), + indices: vec![prev_idx, idx], + }); + } + seen.insert(unit, idx); + } + + if !invalid.is_empty() { + return Err(ParserBuilderError::InvalidAllowList { + reason: "contains invalid units".to_string(), + invalid_entries: invalid, + }); + } + + // Check for % mixing with size units + // Note: "%" can be mixed with size units - it's handled specially in parse() + // when the suffix is "%" (percentage of physical memory), but other units + // are still valid in the same allow_list for normal size parsing + + Ok(()) + } + + /// Validate that default_unit is compatible with current state + fn validate_default_unit_compat(&self, unit: &str) -> Result<(), ParserBuilderError> { + // Check against allow_list if set + if let Some(allow_list) = self.allow_list { + if !allow_list.contains(&unit) { + return Err(ParserBuilderError::UnitNotAllowed { + unit: unit.to_string(), + allowed: allow_list.iter().map(|s| (*s).to_string()).collect(), + }); + } + } + + // Check b_byte_count conflict + if unit == "b" && self.b_byte_count { + return Err(ParserBuilderError::BByteCountConflict { + default_unit: unit.to_string(), + b_byte_count: self.b_byte_count, + }); + } + + Ok(()) + } + + /// Validate that allow_list is compatible with current state + fn validate_allow_list_compat(&self, allow_list: &[&str]) -> Result<(), ParserBuilderError> { + // Check that default_unit (if set) is in new allow_list + if let Some(default_unit) = self.default_unit { + if !allow_list.contains(&default_unit) { + return Err(ParserBuilderError::UnitNotAllowed { + unit: default_unit.to_string(), + allowed: allow_list.iter().map(|s| (*s).to_string()).collect(), + }); + } + } + + Ok(()) } /// Parse a size string into a number of bytes. /// @@ -424,6 +700,133 @@ pub fn parse_size_u128_max(size: &str) -> Result { } /// Error type for parse_size +/// Error type for Parser builder configuration validation. +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ParserBuilderError { + /// Invalid unit string provided + InvalidUnit { unit: String, reason: String }, + + /// Unit not in allow_list + UnitNotAllowed { unit: String, allowed: Vec }, + + /// Conflicting configuration detected + ConflictingConfig { + setting: String, + previous: String, + current: String, + }, + + /// Empty or invalid allow_list + InvalidAllowList { + reason: String, + invalid_entries: Vec, + }, + + /// Duplicate unit in allow_list + DuplicateInAllowList { unit: String, indices: Vec }, + + /// Case-sensitive conflict in allow_list + CaseSensitiveConflict { + units: Vec, + explanation: String, + }, + + /// Invalid combination of settings + InvalidCombination { + settings: Vec, + reason: String, + }, + + /// Default unit conflicts with b_byte_count setting + BByteCountConflict { + default_unit: String, + b_byte_count: bool, + }, +} + +impl fmt::Display for ParserBuilderError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::InvalidUnit { unit, reason } => { + write!(f, "invalid unit {}: {}", unit.quote(), reason) + } + Self::UnitNotAllowed { unit, allowed } => { + write!( + f, + "unit {} is not in allow list: allowed units are [{}]", + unit.quote(), + allowed.join(", ") + ) + } + Self::ConflictingConfig { + setting, + previous, + current, + } => { + write!( + f, + "conflicting configuration for {}: previously set to {}, now {}", + setting, + previous.quote(), + current.quote() + ) + } + Self::InvalidAllowList { + reason, + invalid_entries, + } => { + write!( + f, + "invalid allow_list ({}): invalid entries: [{}]", + reason, + invalid_entries + .iter() + .map(|s| s.quote().to_string()) + .collect::>() + .join(", ") + ) + } + Self::DuplicateInAllowList { unit, indices } => { + write!( + f, + "duplicate unit {} in allow_list at positions: {:?}", + unit.quote(), + indices + ) + } + Self::CaseSensitiveConflict { units, explanation } => { + write!( + f, + "case-sensitive conflict in units [{}]: {}", + units.join(", "), + explanation + ) + } + Self::InvalidCombination { settings, reason } => { + write!( + f, + "invalid combination of settings [{}]: {}", + settings.join(", "), + reason + ) + } + Self::BByteCountConflict { + default_unit, + b_byte_count, + } => { + write!( + f, + "default_unit {} conflicts with b_byte_count={}", + default_unit.quote(), + b_byte_count + ) + } + } + } +} + +impl Error for ParserBuilderError {} + #[derive(Debug, PartialEq, Eq)] pub enum ParseSizeError { /// Suffix @@ -437,6 +840,9 @@ pub enum ParseSizeError { /// Could not determine total physical memory size. PhysicalMem(String), + + /// Builder configuration error + BuilderConfig(ParserBuilderError), } impl Error for ParseSizeError { @@ -446,19 +852,26 @@ impl Error for ParseSizeError { Self::ParseFailure(ref s) => s, Self::SizeTooBig(ref s) => s, Self::PhysicalMem(ref s) => s, + Self::BuilderConfig(_) => "builder configuration error", } } } impl fmt::Display for ParseSizeError { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - let s = match self { + match self { Self::InvalidSuffix(s) | Self::ParseFailure(s) | Self::SizeTooBig(s) - | Self::PhysicalMem(s) => s, - }; - write!(f, "{s}") + | Self::PhysicalMem(s) => write!(f, "{s}"), + Self::BuilderConfig(e) => write!(f, "{e}"), + } + } +} + +impl From for ParseSizeError { + fn from(e: ParserBuilderError) -> Self { + Self::BuilderConfig(e) } } @@ -742,7 +1155,9 @@ mod tests { parser .with_allow_list(&["k", "K", "G", "MB", "M"]) - .with_default_unit("K"); + .unwrap() + .with_default_unit("K") + .unwrap(); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); @@ -758,8 +1173,11 @@ mod tests { .with_allow_list(&[ "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", ]) + .unwrap() .with_default_unit("K") - .with_b_byte_count(true); + .unwrap() + .with_b_byte_count(true) + .unwrap(); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 6330f759df0..ee40ae76a36 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -33,10 +33,21 @@ fn test_helper(file_name: &str, possible_args: &[&str]) { #[test] fn test_buffer_sizes() { - #[cfg(target_os = "linux")] - let buffer_sizes = ["0", "50K", "50k", "1M", "100M", "0%", "10%"]; - // TODO Percentage sizes are not yet supported beyond Linux. - #[cfg(not(target_os = "linux"))] + #[cfg(any( + target_os = "linux", + target_os = "macos", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" + ))] + let buffer_sizes = ["0", "50K", "50k", "1M", "100M", "10%", "50%"]; + #[cfg(not(any( + target_os = "linux", + target_os = "macos", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" + )))] let buffer_sizes = ["0", "50K", "50k", "1M", "100M"]; for buffer_size in &buffer_sizes { new_ucmd!() diff --git a/tests/by-util/test_uptime.rs b/tests/by-util/test_uptime.rs index e475999128c..bdbd60f2789 100644 --- a/tests/by-util/test_uptime.rs +++ b/tests/by-util/test_uptime.rs @@ -338,8 +338,8 @@ fn test_uptime_macos_output_format() { "Output should contain 'up': {stdout}" ); - // Verify load average is present - let load_re = Regex::new(r"load average: \d+\.\d+, \d+\.\d+, \d+\.\d+").unwrap(); + // Verify load average is present (decimal point is optional, e.g., "10" or "10.00") + let load_re = Regex::new(r"load average: \d+(?:\.\d+)?, \d+(?:\.\d+)?, \d+(?:\.\d+)?").unwrap(); assert!( load_re.is_match(stdout), "Output should contain load average: {stdout}"