diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 91e72747a36..b2430e94cbb 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -22,7 +22,9 @@ use uucore::display::Quotable; use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; #[cfg(feature = "i18n-datetime")] -use uucore::i18n::datetime::{localize_format_string, should_use_icu_locale}; +use uucore::i18n::datetime::{ + localize_format_string, localize_format_string_with_modifiers, should_use_icu_locale, +}; use uucore::translate; use uucore::{format_usage, show}; #[cfg(windows)] @@ -705,7 +707,29 @@ fn format_date_with_locale_aware_months( config: &Config, skip_localization: bool, ) -> Result { - // First check if format string has GNU modifiers (width/flags) and format if present + let broken_down = BrokenDownTime::from(date); + + // First check if format string has E/O locale modifiers + // These require ICU-based localization for proper handling + #[cfg(feature = "i18n-datetime")] + if format_modifiers::has_locale_modifiers(format_string) && !skip_localization { + // For E/O modifiers, process them using ICU, then apply other modifiers + let (fmt, has_eo) = localize_format_string_with_modifiers(format_string, date.date()); + if has_eo { + // Check if there are remaining GNU modifiers after E/O processing + if let Some(result) = + format_modifiers::format_with_modifiers_if_present(date, &fmt, config) + { + return result.map_err(|e| e.to_string()); + } + // No remaining modifiers, just format + return broken_down + .to_string_with_config(config, &fmt) + .map_err(|e| e.to_string()); + } + } + + // Check if format string has GNU modifiers (width/flags) and format if present // This optimization combines detection and formatting in a single pass if let Some(result) = format_modifiers::format_with_modifiers_if_present(date, format_string, config) @@ -713,8 +737,6 @@ fn format_date_with_locale_aware_months( return result.map_err(|e| e.to_string()); } - let broken_down = BrokenDownTime::from(date); - let result = if !should_use_icu_locale() || skip_localization { broken_down.to_string_with_config(config, format_string) } else { diff --git a/src/uu/date/src/format_modifiers.rs b/src/uu/date/src/format_modifiers.rs index c6a3d01c857..a385aa1216c 100644 --- a/src/uu/date/src/format_modifiers.rs +++ b/src/uu/date/src/format_modifiers.rs @@ -21,6 +21,8 @@ //! - `^`: Convert to uppercase //! - `#`: Use opposite case (uppercase becomes lowercase and vice versa) //! - `+`: Force display of sign (+ for positive, - for negative) +//! - `E`: Use locale's alternative representation (e.g., alternative date format, era names) +//! - `O`: Use locale's alternative numeric symbols (e.g., Arabic-Indic digits) //! //! ### Width //! - One or more digits specifying minimum field width @@ -39,6 +41,33 @@ use regex::Regex; use std::fmt; use std::sync::OnceLock; +/// Check if format string contains E or O locale modifiers. +/// +/// E modifiers request alternative representations (e.g., era names, alternative date formats). +/// O modifiers request alternative numeric symbols (e.g., Arabic-Indic digits). +pub fn has_locale_modifiers(format_string: &str) -> bool { + // Simple check for %E or %O patterns + format_string.contains("%E") || format_string.contains("%O") +} + +/// Check if a specifier supports E modifier (alternative representation). +fn supports_e_modifier(specifier: &str) -> bool { + // E modifier is supported for: c, C, x, X, y, Y, B + matches!( + specifier.chars().last(), + Some('c' | 'C' | 'x' | 'X' | 'y' | 'Y' | 'B') + ) +} + +/// Check if a specifier supports O modifier (alternative numeric symbols). +fn supports_o_modifier(specifier: &str) -> bool { + // O modifier is supported for numeric specifiers: d, e, H, I, m, M, S, u, U, V, w, W, y + matches!( + specifier.chars().last(), + Some('d' | 'e' | 'H' | 'I' | 'm' | 'M' | 'S' | 'u' | 'U' | 'V' | 'w' | 'W' | 'y') + ) +} + /// Error type for format modifier operations #[derive(Debug)] pub enum FormatError { @@ -66,12 +95,12 @@ impl From for FormatError { /// Regex to match format specifiers with optional modifiers /// Pattern: % \[flags\] \[width\] specifier -/// Flags: -, _, 0, ^, #, + +/// Flags: -, _, 0, ^, #, +, E (alternative representation), O (alternative numeric symbols) /// Width: one or more digits /// Specifier: any letter or special sequence like :z, ::z, :::z fn format_spec_regex() -> &'static Regex { static RE: OnceLock = OnceLock::new(); - RE.get_or_init(|| Regex::new(r"%([_0^#+-]*)(\d*)(:*[a-zA-Z])").unwrap()) + RE.get_or_init(|| Regex::new(r"%([_0^#+EO-]*)(\d*)(:*[a-zA-Z])").unwrap()) } /// Check if format string contains any GNU modifiers and format if present. @@ -138,12 +167,20 @@ fn format_with_modifiers( // Add text before this match result.push_str(&temp_format[last_end..whole_match.start()]); - // Format the base specifier first + // Check if this specifier has E/O locale modifiers + // Note: E/O modifiers are handled by ICU in localize_format_string_with_modifiers + let _has_e_modifier = flags.contains('E') && supports_e_modifier(spec); + let _has_o_modifier = flags.contains('O') && supports_o_modifier(spec); + + // Format using jiff - note: jiff doesn't natively support E/O modifiers, + // so we pass the base specifier and handle E/O via ICU in the caller let base_format = format!("%{spec}"); let formatted = broken_down.to_string_with_config(config, &base_format)?; - // Check if this specifier has modifiers - if !flags.is_empty() || !width_str.is_empty() { + // Check if this specifier has modifiers (width, case, padding, E, O) + let has_modifiers = !flags.is_empty() || !width_str.is_empty(); + + if has_modifiers { // Apply modifiers to the formatted value let width: usize = width_str.parse().unwrap_or(0); let modified = apply_modifiers(&formatted, flags, width, spec); @@ -248,6 +285,10 @@ fn apply_modifiers(value: &str, flags: &str, width: usize, specifier: &str) -> S no_pad = false; pad_char = '0'; } + 'E' | 'O' => { + // E and O modifiers are handled in format_with_modifiers, + // skip them here as they don't affect padding/case + } _ => {} } } diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index 88816d9daed..bf8c1097014 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -134,9 +134,145 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { } } + // Handle E and O modifiers (POSIX locale extensions) + // These request alternative representations (e.g., era names, alternative numerals) + fmt = handle_eo_modifiers(&fmt, iso_date, locale); + fmt.replace(PERCENT_PLACEHOLDER, "%%") } +/// Handle E and O modifiers for alternative locale-specific representations. +/// +/// E modifiers request alternative representations (e.g., era names, alternative date formats). +/// O modifiers request alternative numeric symbols (e.g., Arabic-Indic digits, Eastern Arabic numerals). +fn handle_eo_modifiers(fmt: &str, iso_date: Date, locale: &Locale) -> String { + let mut result = fmt.to_string(); + let locale_prefs = locale.clone().into(); + + // Handle %OB - Alternative month names (standalone format) + // This is used when the month name appears without a day (e.g., "June" vs "June 1st") + if result.contains("%OB") { + // For now, treat %OB the same as %B since ICU doesn't have a direct standalone variant + if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { + result = result.replace("%OB", &f.format(&iso_date).to_string()); + } + } + + // Handle simple E modifiers without other flags: %EY, %Ey, %EC, %EB + // Process these first before the more complex patterns + for (pattern, _replacement) in [ + ("%EY", iso_date.extended_year().to_string()), + ("%Ey", format!("{:02}", iso_date.extended_year() % 100)), + ("%EC", format!("{:02}", iso_date.extended_year() / 100)), + ] { + if result.contains(pattern) { + // For non-Gregorian calendars, use the extended year as alternative representation + let calendar_type = get_locale_calendar_type(locale); + let alt_year = if calendar_type == CalendarType::Gregorian { + iso_date.extended_year() + } else { + match calendar_type { + CalendarType::Buddhist => { + let d = iso_date.to_calendar(Buddhist); + d.extended_year() + } + CalendarType::Persian => { + let d = iso_date.to_calendar(Persian); + d.extended_year() + } + CalendarType::Ethiopian => { + let d = iso_date.to_calendar(Ethiopian::new()); + d.extended_year() + } + CalendarType::Gregorian => unreachable!(), + } + }; + + let value = match pattern { + "%EY" => alt_year.to_string(), + "%Ey" => format!("{:02}", alt_year % 100), + "%EC" => format!("{:02}", alt_year / 100), + _ => unreachable!(), + }; + result = result.replace(pattern, &value); + } + } + + // Handle O modifiers for alternative numeric symbols + // These are locale-specific and typically use native numeral systems + // For now, we fall back to standard formatting since full O modifier support + // requires ICU's FixedDecimalFormatter with locale-specific numeral systems + let o_specifiers = [ + ("%Od", "d"), + ("%Oe", "e"), + ("%OH", "H"), + ("%OI", "I"), + ("%Om", "m"), + ("%OM", "M"), + ("%OS", "S"), + ("%Ou", "u"), + ("%OU", "U"), + ("%OV", "V"), + ("%Ow", "w"), + ("%OW", "W"), + ("%Oy", "y"), + ]; + + for (o_spec, base_spec) in o_specifiers { + if result.contains(o_spec) { + // Convert O modifier to base specifier for jiff to handle + // Full O modifier support would use ICU's FixedDecimalFormatter + // with the locale's default numeral system + result = result.replace(o_spec, &format!("%{base_spec}")); + } + } + + result +} + +/// Check if format string contains E or O locale modifiers. +/// +/// This is a simple check that looks for the presence of %E or %O patterns. +/// It handles both simple modifiers (%EY, %Od) and modifiers with flags/width (%_10EY). +pub fn has_locale_modifiers(format: &str) -> bool { + // Simple check for %E or %O patterns + // Note: This is a quick check that may have false positives for %%E or similar, + // but that's acceptable for our use case + format.contains("%E") || format.contains("%O") +} + +/// Transform a strftime format string with E/O modifiers to use locale-specific values. +/// +/// This function processes E/O modifiers and returns a tuple of: +/// - The transformed format string with E/O modifiers replaced by their values +/// - A flag indicating whether E/O modifiers were found and processed +/// +/// This is used by the date command to handle POSIX locale extensions. +pub fn localize_format_string_with_modifiers(format: &str, date: JiffDate) -> (String, bool) { + const PERCENT_PLACEHOLDER: &str = "\x00\x00"; + + let (locale, _) = get_time_locale(); + + // Check if format contains E or O modifiers + let has_eo_modifiers = has_locale_modifiers(format); + + if !has_eo_modifiers { + // No E/O modifiers, use standard localization + return (localize_format_string(format, date), false); + } + + let iso_date = Date::::convert_from(date); + let mut fmt = format.replace("%%", PERCENT_PLACEHOLDER); + + // Process E and O modifiers + fmt = handle_eo_modifiers(&fmt, iso_date, locale); + + // Apply standard localization for remaining specifiers + fmt = localize_format_string(&fmt, date); + + (fmt.replace(PERCENT_PLACEHOLDER, "%%"), true) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/uucore/src/lib/features/uptime.rs b/src/uucore/src/lib/features/uptime.rs index 1330c3d33c9..050107642c2 100644 --- a/src/uucore/src/lib/features/uptime.rs +++ b/src/uucore/src/lib/features/uptime.rs @@ -511,7 +511,10 @@ mod tests { assert!(boot_time > 0, "Boot time should be positive"); // Boot time should be after 2000-01-01 (946684800 seconds since epoch) - assert!(boot_time > 946684800, "Boot time should be after year 2000"); + assert!( + boot_time > 946_684_800, + "Boot time should be after year 2000" + ); // Boot time should be before current time let now = Timestamp::now().as_second(); diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 1471634df37..b24d7a8387b 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2334,6 +2334,78 @@ fn test_date_format_modifier_percent_escape() { .stdout_is("%Y=0000001999\n"); } +// Tests for E and O locale modifiers (POSIX extension) +#[test] +fn test_date_format_modifier_e_alternative_representation() { + // Test E modifier for alternative representation + // %EY should provide locale's alternative year representation (e.g., era names in Japanese) + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%EY"]) + .succeeds(); + + // Test %EC for alternative century representation + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%EC"]) + .succeeds(); +} + +#[test] +fn test_date_format_modifier_o_alternative_numerals() { + // Test O modifier for alternative numeric symbols + // %Od should provide locale's alternative day representation + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%Od"]) + .succeeds(); + + // Test %Om for alternative month representation + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%Om"]) + .succeeds(); + + // Test %OH for alternative hour representation + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01 12:00:00", "+%OH"]) + .succeeds(); +} + +#[test] +fn test_date_format_modifier_eo_combined_with_other_modifiers() { + // Test that E/O modifiers can be combined with other modifiers + // %_10EY should use alternative year with space padding + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%_10EY"]) + .succeeds(); + + // Test %010Od with zero padding and alternative numerals + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%010Od"]) + .succeeds(); +} + +#[test] +fn test_date_format_modifier_ob_alternative_month_name() { + // Test %OB for alternative month names (standalone format) + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%OB"]) + .succeeds(); +} + // Tests for --debug flag #[test] fn test_date_debug_basic() {