Skip to content

Conversation

enh-google
Copy link
Contributor

Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules.

Also add the missing tests for all the related functions (even the ones
that were already right), and add the missing bazel build rules.
@llvmbot llvmbot added libc bazel "Peripheral" support tier build system: utils/bazel labels Sep 3, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 3, 2025

@llvm/pr-subscribers-libc

Author: None (enh-google)

Changes

Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules.


Full diff: https://github.com/llvm/llvm-project/pull/156705.diff

9 Files Affected:

  • (modified) libc/src/string/string_utils.h (+7-7)
  • (modified) libc/test/src/string/strcspn_test.cpp (+4)
  • (modified) libc/test/src/string/strpbrk_test.cpp (+4)
  • (modified) libc/test/src/string/strsep_test.cpp (+8)
  • (modified) libc/test/src/string/strspn_test.cpp (+4)
  • (modified) libc/test/src/string/strtok_r_test.cpp (+8)
  • (modified) libc/test/src/string/strtok_test.cpp (+7)
  • (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+10)
  • (modified) utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel (+16)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 26e9adde0d66e..10803488b6cf5 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -212,28 +212,28 @@ LIBC_INLINE char *string_token(char *__restrict src,
   static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
   cpp::bitset<256> delims;
   for (; *delimiter_string != '\0'; ++delimiter_string)
-    delims.set(static_cast<size_t>(*delimiter_string));
+    delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string));
 
-  char *tok_start = src;
+  unsigned char *tok_start = reinterpret_cast<unsigned char *>(src);
   if constexpr (SkipDelim)
-    while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+    while (*tok_start != '\0' && delims.test(*tok_start))
       ++tok_start;
   if (*tok_start == '\0' && SkipDelim) {
     *context = nullptr;
     return nullptr;
   }
 
-  char *tok_end = tok_start;
-  while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+  unsigned char *tok_end = tok_start;
+  while (*tok_end != '\0' && !delims.test(*tok_end))
     ++tok_end;
 
   if (*tok_end == '\0') {
     *context = nullptr;
   } else {
     *tok_end = '\0';
-    *context = tok_end + 1;
+    *context = reinterpret_cast<char *>(tok_end + 1);
   }
-  return tok_start;
+  return reinterpret_cast<char *>(tok_start);
 }
 
 LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp
index d83b3cf4fdfe8..ec98f72e37113 100644
--- a/libc/test/src/string/strcspn_test.cpp
+++ b/libc/test/src/string/strcspn_test.cpp
@@ -48,3 +48,7 @@ TEST(LlvmLibcStrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) {
   EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0});
   EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0});
 }
+
+TEST(LlvmLibcStrCSpnTest, TopBitSet) {
+  EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5});
+}
diff --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp
index fbe14da12ac10..cc802460d10be 100644
--- a/libc/test/src/string/strpbrk_test.cpp
+++ b/libc/test/src/string/strpbrk_test.cpp
@@ -60,3 +60,7 @@ TEST(LlvmLibcStrPBrkTest, FindsFirstOfRepeated) {
 TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) {
   EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345");
 }
+
+TEST(LlvmLibcStrPBrkTest, TopBitSet) {
+  EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world");
+}
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index e2a5d52bbeddb..553edd99604ef 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -61,6 +61,14 @@ TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
   ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
 }
 
+TEST(LlvmLibcStrsepTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  char *p = top_bit_set_str;
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr);
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp
index 82f9b2aef0dfd..813612f09fc16 100644
--- a/libc/test/src/string/strspn_test.cpp
+++ b/libc/test/src/string/strspn_test.cpp
@@ -85,6 +85,10 @@ TEST(LlvmLibcStrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
   EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4});
 }
 
+TEST(LlvmLibcStrSpnTest, TopBitSet) {
+  EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6});
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index a19390d0b0c2d..8c4d3c362f778 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -131,3 +131,11 @@ TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
   ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
   ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
 }
+
+TEST(LlvmLibcStrTokReentrantTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  char *p;
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index 76efeddda6f4a..3c097fdee0713 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -83,3 +83,10 @@ TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
   ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
   ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
 }
+
+TEST(LlvmLibcStrTokTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b2cd3fdd468af..acfd0d96a28bf 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5251,6 +5251,16 @@ libc_function(
     ],
 )
 
+libc_function(
+    name = "strtok_r",
+    srcs = ["src/string/strtok_r.cpp"],
+    hdrs = ["src/string/strtok_r.h"],
+    deps = [
+        ":__support_common",
+        ":string_utils",
+    ],
+)
+
 ################################ fcntl targets #################################
 
 libc_function(
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index d90992417a721..1a95dece8bf20 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,14 @@ libc_test(
     ],
 )
 
+libc_test(
+    name = "strpbrk_test",
+    srcs = ["strpbrk_test.cpp"],
+    deps = [
+        "//libc:strpbrk",
+    ],
+)
+
 libc_test(
     name = "strsep_test",
     srcs = ["strsep_test.cpp"],
@@ -127,6 +135,14 @@ libc_test(
     ],
 )
 
+libc_test(
+    name = "strtok_r_test",
+    srcs = ["strtok_r_test.cpp"],
+    deps = [
+        "//libc:strtok_r",
+    ],
+)
+
 libc_test_library(
     name = "memory_check_utils",
     hdrs = ["memory_utils/memory_check_utils.h"],

@enh-google enh-google merged commit ded5f43 into llvm:main Sep 3, 2025
23 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bazel "Peripheral" support tier build system: utils/bazel libc
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants