From b10924eb0b7d454d06cae17b331b2da8be24ad88 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Sun, 29 Oct 2023 21:25:58 +0800 Subject: [PATCH 01/12] printf: support %q --- src/uucore/src/lib/features/tokenize/sub.rs | 32 +++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index c65a37a689b..cfe3deb42e0 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -91,7 +91,7 @@ impl Sub { // for more dry printing, field characters are grouped // in initialization of token. let field_type = match field_char { - 's' | 'b' => FieldType::Strf, + 's' | 'b' | 'q' => FieldType::Strf, 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, 'f' | 'F' => FieldType::Floatf, 'a' | 'A' => FieldType::CninetyNineHexFloatf, @@ -189,7 +189,7 @@ impl SubParser { let mut legal_fields = [ // 'a', 'A', //c99 hex float implementation not yet complete - 'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 's', 'u', 'x', 'X', + 'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 'q', 's', 'u', 'x', 'X', ]; let mut specifiers = ['h', 'j', 'l', 'L', 't', 'z']; legal_fields.sort_unstable(); @@ -391,6 +391,7 @@ impl Sub { // if %s just return arg // if %b use UnescapedText module's unescape-fn // if %c return first char of arg + // if %q return arg which non-printable characters are escaped FieldType::Strf | FieldType::Charf => { match pf_arg { Some(arg_string) => { @@ -404,11 +405,32 @@ impl Sub { UnescapedText::from_it_core(writer, &mut a_it, true); None } - // for 'c': get iter of string vals, + 'q' => { + let mut non_printable_char = [ + '`', '#', '$', '^', '&', '*', '(', ')', '[', ']', '\\', '{', + '}', '|', ';', '\'', '"', '<', '>', '?', + ]; + non_printable_char.sort_unstable(); + + let arg_string = match field.second_field { + Some(max) => String::from(&arg_string[..max as usize]), + None => arg_string.clone(), + }; + + let mut new_arg_string = String::new(); + for c in arg_string.chars() { + if non_printable_char.binary_search(&c).is_ok() { + new_arg_string.push('\\'); + } + new_arg_string.push(c); + } + + Some(new_arg_string) + } // get opt of first val // and map it to opt - /* 'c' | */ - _ => arg_string.chars().next().map(|x| x.to_string()), + 'c' => arg_string.chars().next().map(|x| x.to_string()), + _ => unreachable!(), } } None => None, From 5e2bad413f4a80c7ed834fff479aec635a4f03b3 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Sun, 29 Oct 2023 21:26:11 +0800 Subject: [PATCH 02/12] tests/printf: support %q --- tests/by-util/test_printf.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index d7ba5679ecf..fab17b0540b 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -120,6 +120,22 @@ fn sub_b_string_ignore_subs() { .stdout_only("hello world %% %i"); } +#[test] +fn sub_q_string_non_printable() { + new_ucmd!() + .args(&["non-printable: %q", "\"$test\""]) + .succeeds() + .stdout_only("non-printable: \\\"\\$test\\\""); +} + +#[test] +fn sub_q_string_more_non_printable() { + new_ucmd!() + .args(&["non-printable: %q", "[]{}<>"]) + .succeeds() + .stdout_only("non-printable: \\[\\]\\{\\}\\<\\>"); +} + #[test] fn sub_char() { new_ucmd!() From 3be960494211ed38212d156c04dd9534cc257713 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Sun, 29 Oct 2023 21:26:22 +0800 Subject: [PATCH 03/12] add %q --- src/uu/printf/printf.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/uu/printf/printf.md b/src/uu/printf/printf.md index 60b50354c6f..42f43fe9cf3 100644 --- a/src/uu/printf/printf.md +++ b/src/uu/printf/printf.md @@ -78,6 +78,9 @@ Fields second parameter is min-width, integer output below that width is padded with leading zeroes +* `%p`: ARGUMENT is printed in a format that can be reused as shell input, escaping non-printable + characters with the proposed POSIX $'' syntax. + * `%f` or `%F`: decimal floating point value * `%e` or `%E`: scientific notation floating point value * `%g` or `%G`: shorter of specially interpreted decimal or SciNote floating point value. @@ -181,6 +184,11 @@ All string fields have a 'max width' parameter still be interpreted and not throw a warning, you will have problems if you use this for a literal whose code begins with zero, as it will be viewed as in `\\0NNN` form.) +* `%q`: escaped string - the string in a format that can be reused as input by most shells. + Non-printable characters are escaped with the POSIX proposed ‘$''’ syntax, + and shell metacharacters are quoted appropriately. + This is an equivalent format to ls --quoting=shell-escape output. + #### CHAR SUBSTITUTIONS The character field does not have a secondary parameter. From d2d295347f1ca509ef2200a20c2db4a25eb16770 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Mon, 30 Oct 2023 16:26:16 +0800 Subject: [PATCH 04/12] printf: add " " in non-printable chars --- src/uucore/src/lib/features/tokenize/sub.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index cfe3deb42e0..9aca7600450 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -406,11 +406,11 @@ impl Sub { None } 'q' => { - let mut non_printable_char = [ + let mut non_printable_chars = [ '`', '#', '$', '^', '&', '*', '(', ')', '[', ']', '\\', '{', - '}', '|', ';', '\'', '"', '<', '>', '?', + '}', '|', ';', '\'', '"', '<', '>', '?', ' ', ]; - non_printable_char.sort_unstable(); + non_printable_chars.sort_unstable(); let arg_string = match field.second_field { Some(max) => String::from(&arg_string[..max as usize]), @@ -419,7 +419,7 @@ impl Sub { let mut new_arg_string = String::new(); for c in arg_string.chars() { - if non_printable_char.binary_search(&c).is_ok() { + if non_printable_chars.binary_search(&c).is_ok() { new_arg_string.push('\\'); } new_arg_string.push(c); From c41dface0889fd1f06b583398830661086f7fba0 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Mon, 30 Oct 2023 16:26:56 +0800 Subject: [PATCH 05/12] printf: add check for "~" --- src/uucore/src/lib/features/tokenize/sub.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index 9aca7600450..77126df94cb 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -418,6 +418,12 @@ impl Sub { }; let mut new_arg_string = String::new(); + // `~` is non-printable only when being the first char. + if let Some(first) = arg_string.chars().peekable().peek() { + if first == &'~' { + new_arg_string.push('\\'); + } + } for c in arg_string.chars() { if non_printable_chars.binary_search(&c).is_ok() { new_arg_string.push('\\'); From 33f2de14bfd883ec26fc32f067ec275be2f03fd7 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Mon, 30 Oct 2023 16:27:14 +0800 Subject: [PATCH 06/12] tests/printf: add test for "~" and " " --- tests/by-util/test_printf.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index fab17b0540b..8798a753072 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -136,6 +136,14 @@ fn sub_q_string_more_non_printable() { .stdout_only("non-printable: \\[\\]\\{\\}\\<\\>"); } +#[test] +fn sub_q_string_special_non_printable() { + new_ucmd!() + .args(&["non-printable: %q", "~ ~"]) + .succeeds() + .stdout_only("non-printable: \\~\\ ~"); +} + #[test] fn sub_char() { new_ucmd!() From 53395ede8134d7a0887f0b7949b3e6852069533f Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 15:27:10 +0800 Subject: [PATCH 07/12] printf: use escape_name --- src/uucore/src/lib/features/tokenize/sub.rs | 31 ++++++--------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index 77126df94cb..93a14878867 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -10,6 +10,7 @@ //! Subs which have numeric field chars make use of the num_format //! submodule use crate::error::{UError, UResult}; +use crate::quoting_style::{escape_name, QuotingStyle}; use itertools::{put_back_n, PutBackN}; use std::error::Error; use std::fmt::Display; @@ -406,32 +407,18 @@ impl Sub { None } 'q' => { - let mut non_printable_chars = [ - '`', '#', '$', '^', '&', '*', '(', ')', '[', ']', '\\', '{', - '}', '|', ';', '\'', '"', '<', '>', '?', ' ', - ]; - non_printable_chars.sort_unstable(); - let arg_string = match field.second_field { Some(max) => String::from(&arg_string[..max as usize]), None => arg_string.clone(), }; - - let mut new_arg_string = String::new(); - // `~` is non-printable only when being the first char. - if let Some(first) = arg_string.chars().peekable().peek() { - if first == &'~' { - new_arg_string.push('\\'); - } - } - for c in arg_string.chars() { - if non_printable_chars.binary_search(&c).is_ok() { - new_arg_string.push('\\'); - } - new_arg_string.push(c); - } - - Some(new_arg_string) + Some(escape_name( + arg_string.as_ref(), + &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }, + )) } // get opt of first val // and map it to opt From fc0f5816e8b4d5445be997292eb67599eabde478 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 15:29:02 +0800 Subject: [PATCH 08/12] tests/printf: correct test in %q --- tests/by-util/test_printf.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 8798a753072..b2c148f7140 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -125,23 +125,15 @@ fn sub_q_string_non_printable() { new_ucmd!() .args(&["non-printable: %q", "\"$test\""]) .succeeds() - .stdout_only("non-printable: \\\"\\$test\\\""); -} - -#[test] -fn sub_q_string_more_non_printable() { - new_ucmd!() - .args(&["non-printable: %q", "[]{}<>"]) - .succeeds() - .stdout_only("non-printable: \\[\\]\\{\\}\\<\\>"); + .stdout_only("non-printable: '\"$test\"'"); } #[test] fn sub_q_string_special_non_printable() { new_ucmd!() - .args(&["non-printable: %q", "~ ~"]) + .args(&["non-printable: %q", "test~"]) .succeeds() - .stdout_only("non-printable: \\~\\ ~"); + .stdout_only("non-printable: test~"); } #[test] From 55e737005aa8516a6bea2ff856cd3d86f7e764af Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 15:30:53 +0800 Subject: [PATCH 09/12] fix typo --- src/uu/printf/printf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/printf/printf.md b/src/uu/printf/printf.md index 42f43fe9cf3..fb965ae3f7b 100644 --- a/src/uu/printf/printf.md +++ b/src/uu/printf/printf.md @@ -186,7 +186,7 @@ All string fields have a 'max width' parameter * `%q`: escaped string - the string in a format that can be reused as input by most shells. Non-printable characters are escaped with the POSIX proposed ‘$''’ syntax, - and shell metacharacters are quoted appropriately. + and shell meta-characters are quoted appropriately. This is an equivalent format to ls --quoting=shell-escape output. #### CHAR SUBSTITUTIONS From c3b4718cc388117c78f23410f91557170843013c Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 22:57:51 +0800 Subject: [PATCH 10/12] test/printf: validate field params for %b and %q --- tests/by-util/test_printf.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index b2c148f7140..a297dbf6833 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -112,6 +112,15 @@ fn sub_b_string_handle_escapes() { .stdout_only("hello \tworld"); } +#[test] +fn sub_b_string_validate_field_params() { + new_ucmd!() + .args(&["hello %7b", "world"]) + .run() + .stdout_is("hello ") + .stderr_is("printf: %7b: invalid conversion specification\n"); +} + #[test] fn sub_b_string_ignore_subs() { new_ucmd!() @@ -128,6 +137,15 @@ fn sub_q_string_non_printable() { .stdout_only("non-printable: '\"$test\"'"); } +#[test] +fn sub_q_string_validate_field_params() { + new_ucmd!() + .args(&["hello %7q", "world"]) + .run() + .stdout_is("hello ") + .stderr_is("printf: %7q: invalid conversion specification\n"); +} + #[test] fn sub_q_string_special_non_printable() { new_ucmd!() From 6c89a4e1db7f22816acca0921bb67adcd9292f45 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 23:00:44 +0800 Subject: [PATCH 11/12] printf: validate field params for %q --- src/uucore/src/lib/features/tokenize/sub.rs | 24 ++++++++------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index 93a14878867..c7cb4180c3c 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -332,7 +332,7 @@ impl SubParser { if (field_char == 's' && self.min_width_tmp == Some(String::from("0"))) || (field_char == 'c' && (self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) - || (field_char == 'b' + || ((field_char == 'b' || field_char == 'q') && (self.min_width_tmp.is_some() || self.past_decimal || self.second_field_tmp.is_some())) @@ -406,20 +406,14 @@ impl Sub { UnescapedText::from_it_core(writer, &mut a_it, true); None } - 'q' => { - let arg_string = match field.second_field { - Some(max) => String::from(&arg_string[..max as usize]), - None => arg_string.clone(), - }; - Some(escape_name( - arg_string.as_ref(), - &QuotingStyle::Shell { - escape: true, - always_quote: false, - show_control: false, - }, - )) - } + 'q' => Some(escape_name( + arg_string.as_ref(), + &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }, + )), // get opt of first val // and map it to opt 'c' => arg_string.chars().next().map(|x| x.to_string()), From 51966f5d72afab68286031ab49d3e28f1d46e879 Mon Sep 17 00:00:00 2001 From: Zhuoxun Yang Date: Thu, 2 Nov 2023 23:01:21 +0800 Subject: [PATCH 12/12] unnecessary push to text_so_far --- src/uucore/src/lib/features/tokenize/sub.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs index c7cb4180c3c..0ae966fc332 100644 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ b/src/uucore/src/lib/features/tokenize/sub.rs @@ -261,7 +261,6 @@ impl SubParser { } x if legal_fields.binary_search(&x).is_ok() => { self.field_char = Some(ch); - self.text_so_far.push(ch); break; } x if specifiers.binary_search(&x).is_ok() => {