Skip to content

Commit 65c88f3

Browse files
authored
Merge pull request #115 from jfinkels/patterns-with-length
Greedy parsing of datetime before time delta
2 parents d0dceb6 + 2c55db2 commit 65c88f3

File tree

1 file changed

+126
-44
lines changed

1 file changed

+126
-44
lines changed

src/lib.rs

Lines changed: 126 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ mod parse_time_only_str;
2121
mod parse_weekday;
2222

2323
use chrono::{
24-
DateTime, Datelike, Duration, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone,
25-
Timelike,
24+
DateTime, Datelike, Duration, FixedOffset, Local, LocalResult, MappedLocalTime, NaiveDateTime,
25+
TimeZone, Timelike,
2626
};
2727

2828
use parse_relative_time::parse_relative_time_at_date;
@@ -78,11 +78,88 @@ mod format {
7878
pub const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z";
7979
pub const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z";
8080
pub const YYYYMMDDHHMMSS_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M:%S %#z";
81+
pub const YYYYMMDDHHMMSS_HYPHENATED_ZULU: &str = "%Y-%m-%d %H:%M:%SZ";
8182
pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET: &str = "%Y-%m-%dT%H:%M:%S%#z";
83+
pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_SPACE_OFFSET: &str = "%Y-%m-%dT%H:%M:%S %#z";
8284
pub const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S";
8385
pub const UTC_OFFSET: &str = "UTC%#z";
8486
pub const ZULU_OFFSET: &str = "Z%#z";
8587
pub const NAKED_OFFSET: &str = "%#z";
88+
89+
/// Whether the pattern ends in the character `Z`.
90+
pub(crate) fn is_zulu(pattern: &str) -> bool {
91+
pattern == YYYYMMDDHHMMSS_HYPHENATED_ZULU
92+
}
93+
94+
/// Patterns for datetimes with timezones.
95+
///
96+
/// These are in decreasing order of length. The same pattern may
97+
/// appear multiple times with different lengths if the pattern
98+
/// accepts input strings of different lengths. For example, the
99+
/// specifier `%#z` accepts two-digit time zone offsets (`+00`)
100+
/// and four-digit time zone offsets (`+0000`).
101+
pub(crate) const PATTERNS_TZ: [(&str, usize); 9] = [
102+
(YYYYMMDDHHMMSS_HYPHENATED_OFFSET, 25),
103+
(YYYYMMDDHHMMSS_T_SEP_HYPHENATED_SPACE_OFFSET, 25),
104+
(YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET, 24),
105+
(YYYYMMDDHHMMSS_HYPHENATED_OFFSET, 23),
106+
(YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET, 22),
107+
(YYYYMMDDHHMM_HYPHENATED_OFFSET, 22),
108+
(YYYYMMDDHHMM_UTC_OFFSET, 20),
109+
(YYYYMMDDHHMM_OFFSET, 18),
110+
(YYYYMMDDHHMM_ZULU_OFFSET, 18),
111+
];
112+
113+
/// Patterns for datetimes without timezones.
114+
///
115+
/// These are in decreasing order of length.
116+
pub(crate) const PATTERNS_NO_TZ: [(&str, usize); 8] = [
117+
(YYYYMMDDHHMMSS, 29),
118+
(POSIX_LOCALE, 24),
119+
(YYYYMMDDHHMMSS_HYPHENATED_ZULU, 20),
120+
(YYYYMMDDHHMMS_T_SEP, 19),
121+
(YYYYMMDDHHMMS, 19),
122+
(YYYY_MM_DD_HH_MM, 16),
123+
(YYYYMMDDHHMM_DOT_SS, 15),
124+
(YYYYMMDDHHMM, 12),
125+
];
126+
127+
/// Patterns for dates with neither times nor timezones.
128+
///
129+
/// These are in decreasing order of length. The same pattern may
130+
/// appear multiple times with different lengths if the pattern
131+
/// accepts input strings of different lengths. For example, the
132+
/// specifier `%m` accepts one-digit month numbers (like `2`) and
133+
/// two-digit month numbers (like `02` or `12`).
134+
pub(crate) const PATTERNS_DATE_NO_TZ: [(&str, usize); 8] = [
135+
(ISO_8601, 10),
136+
(MMDDYYYY_SLASH, 10),
137+
(ISO_8601, 9),
138+
(MMDDYYYY_SLASH, 9),
139+
(ISO_8601, 8),
140+
(MMDDYY_SLASH, 8),
141+
(MMDDYYYY_SLASH, 8),
142+
(ISO_8601_NO_SEP, 8),
143+
];
144+
145+
/// Patterns for lone timezone offsets.
146+
///
147+
/// These are in decreasing order of length. The same pattern may
148+
/// appear multiple times with different lengths if the pattern
149+
/// accepts input strings of different lengths. For example, the
150+
/// specifier `%#z` accepts two-digit time zone offsets (`+00`)
151+
/// and four-digit time zone offsets (`+0000`).
152+
pub(crate) const PATTERNS_OFFSET: [(&str, usize); 9] = [
153+
(UTC_OFFSET, 9),
154+
(UTC_OFFSET, 8),
155+
(ZULU_OFFSET, 7),
156+
(UTC_OFFSET, 6),
157+
(ZULU_OFFSET, 6),
158+
(NAKED_OFFSET, 6),
159+
(NAKED_OFFSET, 5),
160+
(ZULU_OFFSET, 4),
161+
(NAKED_OFFSET, 3),
162+
];
86163
}
87164

88165
/// Parses a time string and returns a `DateTime` representing the
@@ -161,33 +238,33 @@ pub fn parse_datetime_at_date<S: AsRef<str> + Clone>(
161238
// sign, then insert a 0 between the sign and the digit to make it
162239
// possible for `chrono` to parse it.
163240
let pattern = Regex::new(r"([\+-])(\d)$").unwrap();
164-
let s = pattern.replace(s.as_ref(), "${1}0${2}");
165-
for fmt in [
166-
format::YYYYMMDDHHMM_OFFSET,
167-
format::YYYYMMDDHHMM_HYPHENATED_OFFSET,
168-
format::YYYYMMDDHHMMSS_HYPHENATED_OFFSET,
169-
format::YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET,
170-
format::YYYYMMDDHHMM_UTC_OFFSET,
171-
format::YYYYMMDDHHMM_ZULU_OFFSET,
172-
] {
173-
if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) {
174-
return Ok(parsed);
241+
let tmp_s = pattern.replace(s.as_ref(), "${1}0${2}");
242+
for (fmt, n) in format::PATTERNS_TZ {
243+
if tmp_s.len() >= n {
244+
if let Ok(parsed) = DateTime::parse_from_str(&tmp_s[0..n], fmt) {
245+
return Ok(parsed);
246+
}
175247
}
176248
}
177249

178250
// Parse formats with no offset, assume local time
179-
for fmt in [
180-
format::YYYYMMDDHHMMS_T_SEP,
181-
format::YYYYMMDDHHMM,
182-
format::YYYYMMDDHHMMS,
183-
format::YYYYMMDDHHMMSS,
184-
format::YYYY_MM_DD_HH_MM,
185-
format::YYYYMMDDHHMM_DOT_SS,
186-
format::POSIX_LOCALE,
187-
] {
188-
if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) {
189-
if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) {
190-
return Ok(dt);
251+
for (fmt, n) in format::PATTERNS_NO_TZ {
252+
if s.as_ref().len() >= n {
253+
if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[0..n], fmt) {
254+
// Special case: `chrono` can only parse a datetime like
255+
// `2000-01-01 01:23:45Z` as a naive datetime, so we
256+
// manually force it to be in UTC.
257+
if format::is_zulu(fmt) {
258+
match FixedOffset::east_opt(0)
259+
.unwrap()
260+
.from_local_datetime(&parsed)
261+
{
262+
MappedLocalTime::Single(datetime) => return Ok(datetime),
263+
_ => return Err(ParseDateTimeError::InvalidInput),
264+
}
265+
} else if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) {
266+
return Ok(dt);
267+
}
191268
}
192269
}
193270
}
@@ -222,16 +299,13 @@ pub fn parse_datetime_at_date<S: AsRef<str> + Clone>(
222299

223300
let ts = s.as_ref().to_owned() + " 0000";
224301
// Parse date only formats - assume midnight local timezone
225-
for fmt in [
226-
format::ISO_8601,
227-
format::ISO_8601_NO_SEP,
228-
format::MMDDYYYY_SLASH,
229-
format::MMDDYY_SLASH,
230-
] {
231-
let f = fmt.to_owned() + " %H%M";
232-
if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) {
233-
if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) {
234-
return Ok(dt);
302+
for (fmt, n) in format::PATTERNS_DATE_NO_TZ {
303+
if ts.len() >= n + 5 {
304+
let f = fmt.to_owned() + " %H%M";
305+
if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts[0..n + 5], &f) {
306+
if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) {
307+
return Ok(dt);
308+
}
235309
}
236310
}
237311
}
@@ -240,15 +314,13 @@ pub fn parse_datetime_at_date<S: AsRef<str> + Clone>(
240314
// offsets, so instead we replicate parse_date behaviour by getting
241315
// the current date with local, and create a date time string at midnight,
242316
// before trying offset suffixes
243-
let ts = format!("{}0000{}", date.format("%Y%m%d"), s);
244-
for fmt in [
245-
format::UTC_OFFSET,
246-
format::ZULU_OFFSET,
247-
format::NAKED_OFFSET,
248-
] {
249-
let f = format::YYYYMMDDHHMM.to_owned() + fmt;
250-
if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) {
251-
return Ok(parsed);
317+
let ts = format!("{}0000{}", date.format("%Y%m%d"), tmp_s.as_ref());
318+
for (fmt, n) in format::PATTERNS_OFFSET {
319+
if ts.len() == n + 12 {
320+
let f = format::YYYYMMDDHHMM.to_owned() + fmt;
321+
if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) {
322+
return Ok(parsed);
323+
}
252324
}
253325
}
254326

@@ -575,4 +647,14 @@ mod tests {
575647
assert_eq!(result, Err(ParseDateTimeError::InvalidInput));
576648
}
577649
}
650+
651+
#[test]
652+
fn test_datetime_ending_in_z() {
653+
use crate::parse_datetime;
654+
use chrono::{TimeZone, Utc};
655+
656+
let actual = parse_datetime("2023-06-03 12:00:01Z").unwrap();
657+
let expected = Utc.with_ymd_and_hms(2023, 6, 3, 12, 0, 1).unwrap();
658+
assert_eq!(actual, expected);
659+
}
578660
}

0 commit comments

Comments
 (0)