-
Notifications
You must be signed in to change notification settings - Fork 15k
[libc] fixed signed char issues in strsep()/strtok()/strtok_r(). #156705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules.
@llvm/pr-subscribers-libc Author: None (enh-google) ChangesAlso add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules. Full diff: https://github.com/llvm/llvm-project/pull/156705.diff 9 Files Affected:
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 26e9adde0d66e..10803488b6cf5 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -212,28 +212,28 @@ LIBC_INLINE char *string_token(char *__restrict src,
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
cpp::bitset<256> delims;
for (; *delimiter_string != '\0'; ++delimiter_string)
- delims.set(static_cast<size_t>(*delimiter_string));
+ delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string));
- char *tok_start = src;
+ unsigned char *tok_start = reinterpret_cast<unsigned char *>(src);
if constexpr (SkipDelim)
- while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+ while (*tok_start != '\0' && delims.test(*tok_start))
++tok_start;
if (*tok_start == '\0' && SkipDelim) {
*context = nullptr;
return nullptr;
}
- char *tok_end = tok_start;
- while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+ unsigned char *tok_end = tok_start;
+ while (*tok_end != '\0' && !delims.test(*tok_end))
++tok_end;
if (*tok_end == '\0') {
*context = nullptr;
} else {
*tok_end = '\0';
- *context = tok_end + 1;
+ *context = reinterpret_cast<char *>(tok_end + 1);
}
- return tok_start;
+ return reinterpret_cast<char *>(tok_start);
}
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp
index d83b3cf4fdfe8..ec98f72e37113 100644
--- a/libc/test/src/string/strcspn_test.cpp
+++ b/libc/test/src/string/strcspn_test.cpp
@@ -48,3 +48,7 @@ TEST(LlvmLibcStrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) {
EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0});
EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0});
}
+
+TEST(LlvmLibcStrCSpnTest, TopBitSet) {
+ EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5});
+}
diff --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp
index fbe14da12ac10..cc802460d10be 100644
--- a/libc/test/src/string/strpbrk_test.cpp
+++ b/libc/test/src/string/strpbrk_test.cpp
@@ -60,3 +60,7 @@ TEST(LlvmLibcStrPBrkTest, FindsFirstOfRepeated) {
TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) {
EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345");
}
+
+TEST(LlvmLibcStrPBrkTest, TopBitSet) {
+ EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world");
+}
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index e2a5d52bbeddb..553edd99604ef 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -61,6 +61,14 @@ TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
}
+TEST(LlvmLibcStrsepTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ char *p = top_bit_set_str;
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr);
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp
index 82f9b2aef0dfd..813612f09fc16 100644
--- a/libc/test/src/string/strspn_test.cpp
+++ b/libc/test/src/string/strspn_test.cpp
@@ -85,6 +85,10 @@ TEST(LlvmLibcStrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4});
}
+TEST(LlvmLibcStrSpnTest, TopBitSet) {
+ EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6});
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index a19390d0b0c2d..8c4d3c362f778 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -131,3 +131,11 @@ TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
}
+
+TEST(LlvmLibcStrTokReentrantTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ char *p;
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index 76efeddda6f4a..3c097fdee0713 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -83,3 +83,10 @@ TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
}
+
+TEST(LlvmLibcStrTokTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b2cd3fdd468af..acfd0d96a28bf 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5251,6 +5251,16 @@ libc_function(
],
)
+libc_function(
+ name = "strtok_r",
+ srcs = ["src/string/strtok_r.cpp"],
+ hdrs = ["src/string/strtok_r.h"],
+ deps = [
+ ":__support_common",
+ ":string_utils",
+ ],
+)
+
################################ fcntl targets #################################
libc_function(
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index d90992417a721..1a95dece8bf20 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,14 @@ libc_test(
],
)
+libc_test(
+ name = "strpbrk_test",
+ srcs = ["strpbrk_test.cpp"],
+ deps = [
+ "//libc:strpbrk",
+ ],
+)
+
libc_test(
name = "strsep_test",
srcs = ["strsep_test.cpp"],
@@ -127,6 +135,14 @@ libc_test(
],
)
+libc_test(
+ name = "strtok_r_test",
+ srcs = ["strtok_r_test.cpp"],
+ deps = [
+ "//libc:strtok_r",
+ ],
+)
+
libc_test_library(
name = "memory_check_utils",
hdrs = ["memory_utils/memory_check_utils.h"],
|
lntue
approved these changes
Sep 3, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules.