Skip to content

Commit f2d050e

Browse files
andrewliebenowsylvestre
authored andcommitted
Restructure non-UTF-8 input error handling
1 parent 38a8625 commit f2d050e

File tree

7 files changed

+147
-114
lines changed

7 files changed

+147
-114
lines changed

src/uu/echo/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ path = "src/echo.rs"
1818

1919
[dependencies]
2020
clap = { workspace = true }
21-
uucore = { workspace = true, features = ["format"] }
21+
uucore = { workspace = true }
2222

2323
[[bin]]
2424
name = "echo"

src/uu/echo/src/echo.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::iter::Peekable;
1212
use std::ops::ControlFlow;
1313
use std::slice::Iter;
1414
use uucore::error::UResult;
15-
use uucore::{format_usage, help_about, help_section, help_usage};
15+
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose};
1616

1717
const ABOUT: &str = help_about!("echo.md");
1818
const USAGE: &str = help_usage!("echo.md");
@@ -355,8 +355,9 @@ fn execute(
355355
arguments_after_options: ValuesRef<'_, OsString>,
356356
) -> UResult<()> {
357357
for (i, input) in arguments_after_options.enumerate() {
358-
let bytes = uucore::format::try_get_bytes_from_os_str(input)?;
358+
let bytes = os_str_as_bytes_verbose(input)?;
359359

360+
// Don't print a space before the first argument
360361
if i > 0 {
361362
stdout_lock.write_all(b" ")?;
362363
}

src/uu/printf/src/printf.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@ use std::ffi::OsString;
1111
use std::io::stdout;
1212
use std::ops::ControlFlow;
1313
use uucore::error::{UResult, UUsageError};
14-
use uucore::format::{
15-
parse_spec_and_escape, try_get_bytes_from_os_str, FormatArgument, FormatItem,
16-
};
17-
use uucore::{format_usage, help_about, help_section, help_usage};
14+
use uucore::format::{parse_spec_and_escape, FormatArgument, FormatError, FormatItem};
15+
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose};
1816

1917
const VERSION: &str = "version";
2018
const HELP: &str = "help";
@@ -35,7 +33,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
3533
.get_one::<OsString>(options::FORMAT)
3634
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
3735

38-
let format_bytes = try_get_bytes_from_os_str(format)?;
36+
let format_bytes = os_str_as_bytes_verbose(format).map_err(FormatError::from)?;
3937

4038
let values = match matches.get_many::<OsString>(options::ARGUMENT) {
4139
Some(os_string) => os_string

src/uucore/src/lib/features/format/argument.rs

Lines changed: 7 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55

66
use super::FormatError;
77
use crate::{
8-
error::{set_exit_code, UError},
8+
error::set_exit_code,
99
features::format::num_parser::{ParseError, ParsedNumber},
10+
os_str_as_bytes_verbose, os_str_as_str_verbose,
1011
quoting_style::{escape_name, Quotes, QuotingStyle},
1112
show_error, show_warning,
1213
};
1314
use os_display::Quotable;
14-
use std::{
15-
error::Error,
16-
ffi::{OsStr, OsString},
17-
fmt::Display,
18-
};
15+
use std::ffi::{OsStr, OsString};
1916

2017
/// An argument for formatting
2118
///
@@ -50,7 +47,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
5047
};
5148
match next {
5249
FormatArgument::Char(c) => Ok(*c as u8),
53-
FormatArgument::Unparsed(os) => match try_get_bytes_from_os_str(os)?.first() {
50+
FormatArgument::Unparsed(os) => match os_str_as_bytes_verbose(os)?.first() {
5451
Some(&byte) => Ok(byte),
5552
None => Ok(b'\0'),
5653
},
@@ -65,7 +62,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
6562
match next {
6663
FormatArgument::UnsignedInt(n) => Ok(*n),
6764
FormatArgument::Unparsed(os) => {
68-
let str = try_get_str_from_os_str(os)?;
65+
let str = os_str_as_str_verbose(os)?;
6966

7067
Ok(extract_value(ParsedNumber::parse_u64(str), str))
7168
}
@@ -80,7 +77,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
8077
match next {
8178
FormatArgument::SignedInt(n) => Ok(*n),
8279
FormatArgument::Unparsed(os) => {
83-
let str = try_get_str_from_os_str(os)?;
80+
let str = os_str_as_str_verbose(os)?;
8481

8582
Ok(extract_value(ParsedNumber::parse_i64(str), str))
8683
}
@@ -95,7 +92,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
9592
match next {
9693
FormatArgument::Float(n) => Ok(*n),
9794
FormatArgument::Unparsed(os) => {
98-
let str = try_get_str_from_os_str(os)?;
95+
let str = os_str_as_str_verbose(os)?;
9996

10097
Ok(extract_value(ParsedNumber::parse_f64(str), str))
10198
}
@@ -147,56 +144,3 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
147144
}
148145
}
149146
}
150-
151-
#[derive(Debug)]
152-
pub struct NonUtf8OsStr(pub String);
153-
154-
impl Display for NonUtf8OsStr {
155-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156-
f.write_fmt(format_args!(
157-
"invalid (non-UTF-8) string like {} encountered",
158-
self.0.quote(),
159-
))
160-
}
161-
}
162-
163-
impl Error for NonUtf8OsStr {}
164-
impl UError for NonUtf8OsStr {}
165-
166-
pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> {
167-
match os_str.to_str() {
168-
Some(st) => Ok(st),
169-
None => {
170-
let cow = os_str.to_string_lossy();
171-
172-
Err(NonUtf8OsStr(cow.to_string()))
173-
}
174-
}
175-
}
176-
177-
pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> {
178-
let result = {
179-
#[cfg(target_family = "unix")]
180-
{
181-
use std::os::unix::ffi::OsStrExt;
182-
183-
Ok(input.as_bytes())
184-
}
185-
186-
#[cfg(not(target_family = "unix"))]
187-
{
188-
// TODO
189-
// Verify that this works correctly on these platforms
190-
match input.to_str().map(|st| st.as_bytes()) {
191-
Some(sl) => Ok(sl),
192-
None => {
193-
let cow = input.to_string_lossy();
194-
195-
Err(NonUtf8OsStr(cow.to_string()))
196-
}
197-
}
198-
}
199-
};
200-
201-
result
202-
}

src/uucore/src/lib/features/format/mod.rs

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,13 @@ pub mod num_format;
3737
pub mod num_parser;
3838
mod spec;
3939

40-
pub use argument::*;
41-
use os_display::Quotable;
40+
pub use argument::FormatArgument;
41+
42+
use self::{
43+
escape::{parse_escape_code, EscapedChar},
44+
num_format::Formatter,
45+
};
46+
use crate::{error::UError, NonUtf8OsStrError, OsStrConversionType};
4247
use spec::Spec;
4348
use std::{
4449
error::Error,
@@ -47,13 +52,6 @@ use std::{
4752
ops::ControlFlow,
4853
};
4954

50-
use crate::error::UError;
51-
52-
use self::{
53-
escape::{parse_escape_code, EscapedChar},
54-
num_format::Formatter,
55-
};
56-
5755
#[derive(Debug)]
5856
pub enum FormatError {
5957
SpecError(Vec<u8>),
@@ -64,7 +62,7 @@ pub enum FormatError {
6462
NeedAtLeastOneSpec(Vec<u8>),
6563
WrongSpecType,
6664
InvalidPrecision(String),
67-
InvalidEncoding(NonUtf8OsStr),
65+
InvalidEncoding(NonUtf8OsStrError),
6866
}
6967

7068
impl Error for FormatError {}
@@ -76,8 +74,8 @@ impl From<std::io::Error> for FormatError {
7674
}
7775
}
7876

79-
impl From<NonUtf8OsStr> for FormatError {
80-
fn from(value: NonUtf8OsStr) -> FormatError {
77+
impl From<NonUtf8OsStrError> for FormatError {
78+
fn from(value: NonUtf8OsStrError) -> FormatError {
8179
FormatError::InvalidEncoding(value)
8280
}
8381
}
@@ -107,11 +105,18 @@ impl Display for FormatError {
107105
Self::NoMoreArguments => write!(f, "no more arguments"),
108106
Self::InvalidArgument(_) => write!(f, "invalid argument"),
109107
Self::InvalidEncoding(no) => {
110-
write!(
111-
f,
112-
"invalid (non-UTF-8) argument like {} encountered",
113-
no.0.quote()
114-
)
108+
use os_display::Quotable;
109+
110+
let quoted = no.input_lossy_string.quote();
111+
112+
match no.conversion_type {
113+
OsStrConversionType::ToBytes => f.write_fmt(format_args!(
114+
"invalid (non-UTF-8) argument like {quoted} encountered when converting argument to bytes on a platform that doesn't use UTF-8",
115+
)),
116+
OsStrConversionType::ToString => f.write_fmt(format_args!(
117+
"invalid (non-UTF-8) argument like {quoted} encountered",
118+
)),
119+
}
115120
}
116121
}
117122
}

src/uucore/src/lib/features/format/spec.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66
// spell-checker:ignore (vars) intmax ptrdiff padlen
77

88
use super::{
9+
argument::ArgumentIter,
910
num_format::{
1011
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
1112
UnsignedIntVariant,
1213
},
13-
parse_escape_only, try_get_bytes_from_os_str, ArgumentIter, FormatChar, FormatError,
14+
parse_escape_only, FormatChar, FormatError,
15+
};
16+
use crate::{
17+
os_str_as_bytes_verbose,
18+
quoting_style::{escape_name, QuotingStyle},
1419
};
15-
use crate::quoting_style::{escape_name, QuotingStyle};
1620
use std::{io::Write, ops::ControlFlow};
1721

1822
/// A parsed specification for formatting a value
@@ -333,7 +337,7 @@ impl Spec {
333337

334338
let os_str = args.get_str();
335339

336-
let bytes = try_get_bytes_from_os_str(os_str).unwrap();
340+
let bytes = os_str_as_bytes_verbose(os_str)?;
337341

338342
let truncated = match precision {
339343
Some(p) if p < os_str.len() => &bytes[..p],
@@ -345,7 +349,7 @@ impl Spec {
345349
Self::EscapedString => {
346350
let os_str = args.get_str();
347351

348-
let bytes = try_get_bytes_from_os_str(os_str).unwrap();
352+
let bytes = os_str_as_bytes_verbose(os_str)?;
349353

350354
let mut parsed = Vec::<u8>::new();
351355

0 commit comments

Comments
 (0)