Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 135 additions & 96 deletions src/uu/tr/src/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub enum BadSequence {
InvalidRepeatCount(String),
EmptySet2WhenNotTruncatingSet1,
ClassExceptLowerUpperInSet2,
ClassInSet2NotMatchedBySet1,
}

impl Display for BadSequence {
Expand All @@ -58,6 +59,9 @@ impl Display for BadSequence {
Self::ClassExceptLowerUpperInSet2 => {
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
}
Self::ClassInSet2NotMatchedBySet1 => {
write!(f, "when translating, every 'upper'/'lower' in set2 must be matched by a 'upper'/'lower' in the same position in set1")
}
}
}
}
Expand Down Expand Up @@ -91,18 +95,22 @@ pub enum Sequence {
}

impl Sequence {
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> {
pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seem that you are duplicating a bunch of code with flatten?
maybe it could be deduplicated a bit, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logicwise, this code does not duplicate things. flatten_all calls flatten_non_upper_lower for everything that is not [:lower:]/[:upper:].

match self {
Self::Char(c) => Box::new(std::iter::once(*c)),
Self::CharRange(l, r) => Box::new(*l..=*r),
Self::CharStar(c) => Box::new(std::iter::repeat(*c)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
Self::Class(class) => match class {
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()),
Class::Control => Box::new((0..=31).chain(std::iter::once(127))),
Class::Digit => Box::new(b'0'..=b'9'),
Class::Alnum => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'Z')
.chain(b'a'..=b'z')
.map(Self::Char),
),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
Class::Graph => Box::new(
(48..=57) // digit
.chain(65..=90) // uppercase
Expand All @@ -112,9 +120,9 @@ impl Sequence {
.chain(58..=64)
.chain(91..=96)
.chain(123..=126)
.chain(std::iter::once(32)), // space
.chain(std::iter::once(32))
.map(Self::Char), // space
),
Class::Lower => Box::new(b'a'..=b'z'),
Class::Print => Box::new(
(48..=57) // digit
.chain(65..=90) // uppercase
Expand All @@ -123,13 +131,37 @@ impl Sequence {
.chain(33..=47)
.chain(58..=64)
.chain(91..=96)
.chain(123..=126),
.chain(123..=126)
.map(Self::Char),
),
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()),
Class::Upper => Box::new(b'A'..=b'Z'),
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')),
Class::Punct => Box::new(
(33..=47)
.chain(58..=64)
.chain(91..=96)
.chain(123..=126)
.map(Self::Char),
),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
Class::Xdigit => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'F')
.chain(b'a'..=b'f')
.map(Self::Char),
),
s => Box::new(std::iter::once(Self::Class(*s))),
},
s => Box::new(std::iter::once(*s)),
}
}

pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Class(class) => match class {
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
s => Self::Class(*s).flatten_non_lower_upper(),
},
s => s.flatten_non_lower_upper(),
}
}

Expand All @@ -141,90 +173,97 @@ impl Sequence {
truncate_set1_flag: bool,
translating: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
let set1_star_count = set1.iter().filter(is_char_star).count();
if set1_star_count == 0 {
let set2 = Self::from_str(set2_str)?;

if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
let to_u8 = |s: Self| -> Option<u8> {
match s {
Self::Char(c) => Some(c),
_ => None,
}
};

let set1 = Self::from_str(set1_str)?;
if set1.iter().filter(is_char_star).count() != 0 {
return Err(BadSequence::CharRepeatInSet1);
}

let mut set2 = Self::from_str(set2_str)?;
if set2.iter().filter(is_char_star).count() > 1 {
return Err(BadSequence::MultipleCharRepeatInSet2);
}

let set2_star_count = set2.iter().filter(is_char_star).count();
if set2_star_count < 2 {
let char_star = set2.iter().find_map(|s| match s {
Self::CharStar(c) => Some(c),
_ => None,
});
let mut partition = set2.as_slice().split(|s| matches!(s, Self::CharStar(_)));
let set1_len = set1.iter().flat_map(Self::flatten).count();
let set2_len = set2
if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
}

let mut set1_solved: Vec<u8> = set1
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
let set1_len = set1_solved.len();

let set2_len = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten_all)
.count();

let star_compensate_len = set1_len.saturating_sub(set2_len);

//Replace CharStar with CharRepeat
set2 = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(0) => None,
Self::CharStar(c) => Some(Self::CharRepeat(*c, star_compensate_len)),
r => Some(*r),
})
.collect();

//Flatten everything but upper/lower into Char
let set1_flattened: Vec<_> = set1
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();
set2 = set2
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();

if set2
.iter()
.zip(
set1_flattened
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
let (left, right) = (partition.next(), partition.next());
let set2_solved: Vec<_> = match (left, right) {
(None, None) => match char_star {
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
None => std::iter::empty().collect(),
},
(None, Some(set2_b)) => {
if let Some(c) = char_star {
std::iter::repeat(*c)
.take(star_compensate_len)
.chain(set2_b.iter().flat_map(Self::flatten))
.collect()
} else {
set2_b.iter().flat_map(Self::flatten).collect()
}
}
(Some(set2_a), None) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.collect(),
None => set2_a.iter().flat_map(Self::flatten).collect(),
},
(Some(set2_a), Some(set2_b)) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.chain(set2_b.iter().flat_map(Self::flatten))
.collect(),
None => set2_a
.iter()
.chain(set2_b.iter())
.flat_map(Self::flatten)
.collect(),
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
} else {
Err(BadSequence::MultipleCharRepeatInSet2)
}
} else {
Err(BadSequence::CharRepeatInSet1)
.chain(std::iter::repeat(&Self::Char(0))),
)
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
{
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
}

let set2_solved: Vec<_> = set2
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();

//Truncation is done dead last. It has no influence on the other conversion steps
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
}
}

Expand Down
5 changes: 5 additions & 0 deletions tests/by-util/test_tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1369,3 +1369,8 @@ fn check_ignore_truncate_when_squeezing() {
fn check_disallow_blank_in_set2_when_translating() {
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
}

#[test]
fn check_class_in_set2_must_be_matched_in_set1() {
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
}