Skip to content

Commit e53aabd

Browse files
committed
add datetime parser
Add a relaxed datetime parser. This datetime parser functions by using `chrono`s own parsing utilities and a try/succeed approach to parsing. This implementation of the datetime parser has some drawbacks and some positives. On the positive side: - it was easy to implement - it is easy to add more datetime formats to In order to add additionally supported formats, a developer can add the required format string to the `format` mod in `parse_datetime.rs`, and then add it as a potential format to the relevant `fmts` vec. On the negative: - It is not easily customiseable beyond the supported `chrono` parsing formats. E.g., `chrono` does not currently support parsing offsets without trailing zeros. `from_str("UTC+1")` should return a valid response but `chrono` fails to parse this. - Because it is an attempt driven parser, it is likely not that performant. I have not done any performance testing as part of this change, but I would expect a custom parser to perform much better.
1 parent e5d7fbc commit e53aabd

File tree

2 files changed

+219
-0
lines changed

2 files changed

+219
-0
lines changed

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
// For the full copyright and license information, please view the LICENSE
22
// file that was distributed with this source code.
33

4+
// Expose parse_datetime
5+
pub mod parse_datetime;
6+
47
use chrono::{Duration, Local, NaiveDate, Utc};
58
use regex::{Error as RegexError, Regex};
69
use std::error::Error;

src/parse_datetime.rs

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone};
2+
3+
use crate::ParseDurationError;
4+
5+
/// Formats that parse input can take.
6+
/// Taken from `touch` core util
7+
mod format {
8+
pub(crate) const ISO_8601: &str = "%Y-%m-%d";
9+
pub(crate) const ISO_8601_NO_SEP: &str = "%Y%m%d";
10+
pub(crate) const POSIX_LOCALE: &str = "%a %b %e %H:%M:%S %Y";
11+
pub(crate) const YYYYMMDDHHMM_DOT_SS: &str = "%Y%m%d%H%M.%S";
12+
pub(crate) const YYYYMMDDHHMMSS: &str = "%Y-%m-%d %H:%M:%S.%f";
13+
pub(crate) const YYYYMMDDHHMMS: &str = "%Y-%m-%d %H:%M:%S";
14+
pub(crate) const YYYY_MM_DD_HH_MM: &str = "%Y-%m-%d %H:%M";
15+
pub(crate) const YYYYMMDDHHMM: &str = "%Y%m%d%H%M";
16+
pub(crate) const YYYYMMDDHHMM_OFFSET: &str = "%Y%m%d%H%M %z";
17+
pub(crate) const YYYYMMDDHHMM_UTC_OFFSET: &str = "%Y%m%d%H%MUTC%z";
18+
pub(crate) const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z";
19+
pub(crate) const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z";
20+
pub(crate) const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S";
21+
pub(crate) const UTC_OFFSET: &str = "UTC%#z";
22+
pub(crate) const ZULU_OFFSET: &str = "Z%#z";
23+
}
24+
25+
/// Loosely parses a time string and returns a `DateTime` representing the
26+
/// absolute time of the string.
27+
///
28+
/// # Arguments
29+
///
30+
/// * `s` - A string slice representing the time.
31+
///
32+
/// # Examples
33+
///
34+
/// ```
35+
/// use chrono::{DateTime, Utc};
36+
/// let time = humantime_to_duration::parse_datetime::from_str("2023-06-03 12:00:01Z");
37+
/// assert_eq!(time.unwrap(), Utc.with_ymd_and_hms(2023, 06, 03, 12, 00, 01).unwrap());
38+
/// ```
39+
///
40+
/// # Supported formats
41+
///
42+
/// The function supports the following formats for time:
43+
///
44+
/// * ISO formats
45+
/// * timezone offsets, e.g., "UTC-0100"
46+
///
47+
/// # Returns
48+
///
49+
/// * `Ok(DateTime<FixedOffset>)` - If the input string can be parsed as a time
50+
/// * `Err(ParseDurationError)` - If the input string cannot be parsed as a relative time
51+
///
52+
/// # Errors
53+
///
54+
/// This function will return `Err(ParseDurationError::InvalidInput)` if the input string
55+
/// cannot be parsed as a relative time.
56+
///
57+
pub fn from_str<S: AsRef<str> + Clone>(s: S) -> Result<DateTime<FixedOffset>, ParseDurationError> {
58+
// TODO: Replace with a proper customiseable parsing solution using `nom`, `grmtools`, or
59+
// similar
60+
61+
// Formats with offsets don't require NaiveDateTime workaround
62+
for fmt in [
63+
format::YYYYMMDDHHMM_OFFSET,
64+
format::YYYYMMDDHHMM_HYPHENATED_OFFSET,
65+
format::YYYYMMDDHHMM_UTC_OFFSET,
66+
format::YYYYMMDDHHMM_ZULU_OFFSET,
67+
] {
68+
if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) {
69+
return Ok(parsed);
70+
}
71+
}
72+
73+
// Parse formats with no offset, assume local time
74+
for fmt in [
75+
format::YYYYMMDDHHMMS_T_SEP,
76+
format::YYYYMMDDHHMM,
77+
format::YYYYMMDDHHMMS,
78+
format::YYYYMMDDHHMMSS,
79+
format::YYYY_MM_DD_HH_MM,
80+
format::YYYYMMDDHHMM_DOT_SS,
81+
format::POSIX_LOCALE,
82+
] {
83+
if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) {
84+
if let Ok(dt) = naive_dt_to_fixed_offset(parsed) {
85+
return Ok(dt);
86+
}
87+
}
88+
}
89+
90+
// Parse epoch seconds
91+
if s.as_ref().bytes().next() == Some(b'@') {
92+
if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[1..], "%s") {
93+
if let Ok(dt) = naive_dt_to_fixed_offset(parsed) {
94+
return Ok(dt);
95+
}
96+
}
97+
}
98+
99+
let ts = s.as_ref().to_owned() + "0000";
100+
// Parse date only formats - assume midnight local timezone
101+
for fmt in [format::ISO_8601, format::ISO_8601_NO_SEP] {
102+
let f = fmt.to_owned() + "%H%M";
103+
if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) {
104+
if let Ok(dt) = naive_dt_to_fixed_offset(parsed) {
105+
return Ok(dt);
106+
}
107+
}
108+
}
109+
110+
// Parse offsets. chrono doesn't provide any functionality to parse
111+
// offsets, so instead we replicate parse_date behaviour by getting
112+
// the current date with local, and create a date time string at midnight,
113+
// before trying offset suffixes
114+
let local = Local::now();
115+
let ts = format!("{}", local.format("%Y%m%d")) + "0000" + s.as_ref();
116+
for fmt in [format::UTC_OFFSET, format::ZULU_OFFSET] {
117+
let f = format::YYYYMMDDHHMM.to_owned() + fmt;
118+
if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) {
119+
return Ok(parsed);
120+
}
121+
}
122+
123+
// Default parse and failure
124+
s.as_ref()
125+
.parse()
126+
.map_err(|_| (ParseDurationError::InvalidInput))
127+
}
128+
129+
// Convert NaiveDateTime to DateTime<FixedOffset> by assuming the offset
130+
// is local time
131+
fn naive_dt_to_fixed_offset(dt: NaiveDateTime) -> Result<DateTime<FixedOffset>, ()> {
132+
let now = Local::now();
133+
match now.offset().from_local_datetime(&dt) {
134+
LocalResult::Single(dt) => Ok(dt),
135+
_ => Err(()),
136+
}
137+
}
138+
139+
#[cfg(test)]
140+
mod tests {
141+
static TEST_TIME: i64 = 1613371067;
142+
143+
#[cfg(test)]
144+
mod iso_8601 {
145+
use std::env;
146+
147+
use crate::{parse_datetime::from_str, parse_datetime::tests::TEST_TIME};
148+
149+
#[test]
150+
fn test_t_sep() {
151+
env::set_var("TZ", "UTC");
152+
let dt = "2021-02-15T06:37:47";
153+
let actual = from_str(dt);
154+
assert_eq!(actual.unwrap().timestamp(), TEST_TIME);
155+
}
156+
157+
#[test]
158+
fn test_space_sep() {
159+
env::set_var("TZ", "UTC");
160+
let dt = "2021-02-15 06:37:47";
161+
let actual = from_str(dt);
162+
assert_eq!(actual.unwrap().timestamp(), TEST_TIME);
163+
}
164+
165+
#[test]
166+
fn test_space_sep_offset() {
167+
env::set_var("TZ", "UTC");
168+
let dt = "2021-02-14 22:37:47 -0800";
169+
let actual = from_str(dt);
170+
assert_eq!(actual.unwrap().timestamp(), TEST_TIME);
171+
}
172+
173+
#[test]
174+
fn test_t_sep_offset() {
175+
env::set_var("TZ", "UTC");
176+
let dt = "2021-02-14T22:37:47 -0800";
177+
let actual = from_str(dt);
178+
assert_eq!(actual.unwrap().timestamp(), TEST_TIME);
179+
}
180+
}
181+
182+
#[cfg(test)]
183+
mod offsets {
184+
use chrono::Local;
185+
186+
use crate::parse_datetime::from_str;
187+
188+
#[test]
189+
fn test_positive_offsets() {
190+
let offsets = vec![
191+
"UTC+07:00",
192+
"UTC+0700",
193+
"UTC+07",
194+
"Z+07:00",
195+
"Z+0700",
196+
"Z+07",
197+
];
198+
199+
let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0700");
200+
for offset in offsets {
201+
let actual = from_str(offset).unwrap();
202+
assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z")));
203+
}
204+
}
205+
206+
#[test]
207+
fn test_partial_offset() {
208+
let offsets = vec!["UTC+00:15", "UTC+0015", "Z+00:15", "Z+0015"];
209+
let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0015");
210+
for offset in offsets {
211+
let actual = from_str(offset).unwrap();
212+
assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z")));
213+
}
214+
}
215+
}
216+
}

0 commit comments

Comments
 (0)