Skip to content

Commit 85e84fc

Browse files
DBJDBJlemire
authored andcommitted
improved string padded (simdjson#440)
* dirent portable latest version * improved std::string argument passed by const reference ctor added with std::string_view argument `allocate_padded_buffer()` moved here with **optional** check on `length < 1` * allocate_padded_buffer moved to padded_string.h
1 parent 833e5d8 commit 85e84fc

File tree

2 files changed

+65
-24
lines changed

2 files changed

+65
-24
lines changed

include/simdjson/padded_string.h

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,84 @@
55
#include <memory>
66
#include <string>
77

8+
// padded buffer can be made with length < 1
9+
#define SIMDJSON_OK_EMPTY_PADDED_BUFFER 1
10+
811
namespace simdjson {
912
// low-level function to allocate memory with padding so we can read passed the
1013
// "length" bytes safely. if you must provide a pointer to some data, create it
1114
// with this function: length is the max. size in bytes of the string caller is
1215
// responsible to free the memory (free(...))
13-
char *allocate_padded_buffer(size_t length);
16+
inline char *allocate_padded_buffer(size_t length) noexcept {
17+
18+
#ifndef NDEBUG
19+
#ifndef SIMDJSON_OK_EMPTY_PADDED_BUFFER
20+
if (length < 1) {
21+
errno = EINVAL;
22+
perror("simdjson::allocate_padded_buffer() length argument is less than 1");
23+
return nullptr;
24+
}
25+
#endif // SIMDJSON_OK_EMPTY_PADDED_STRING
26+
#endif // NDEBUG
27+
28+
// we could do a simple malloc
29+
// return (char *) malloc(length + SIMDJSON_PADDING);
30+
// However, we might as well align to cache lines...
31+
size_t totalpaddedlength = length + SIMDJSON_PADDING;
32+
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
33+
34+
#ifndef NDEBUG
35+
if (padded_buffer == nullptr) {
36+
errno = EINVAL;
37+
perror("simdjson::allocate_padded_buffer() aligned_malloc_char() failed");
38+
return nullptr;
39+
}
40+
#endif // NDEBUG
41+
42+
memset(padded_buffer + length, 0, totalpaddedlength - length);
43+
return padded_buffer;
44+
} // allocate_padded_buffer
1445

1546
// Simple string with padded allocation.
1647
// We deliberately forbid copies, users should rely on swap or move
1748
// constructors.
18-
class padded_string {
19-
public:
49+
struct padded_string final {
50+
2051
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
52+
2153
explicit padded_string(size_t length) noexcept
2254
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
2355

2456
if (data_ptr != nullptr)
2557
data_ptr[length] = '\0'; // easier when you need a c_str
2658
}
59+
2760
explicit padded_string(char *data, size_t length) noexcept
2861
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
29-
if (data_ptr != nullptr) {
62+
if (data != nullptr) {
3063
memcpy(data_ptr, data, length);
3164
data_ptr[length] = '\0'; // easier when you need a c_str
3265
}
3366
}
34-
padded_string(std::string s) noexcept
35-
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) {
67+
68+
// note: do not pass std::string arguments by value
69+
padded_string(const std::string & str_ ) noexcept
70+
: viable_size(str_.size()), data_ptr(allocate_padded_buffer(str_.size())) {
71+
if (data_ptr != nullptr) {
72+
memcpy(data_ptr, str_.data(), str_.size());
73+
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
74+
}
75+
}
76+
77+
// note: do pass std::string_view arguments by value
78+
padded_string(std::string_view sv_) noexcept
79+
: viable_size(sv_.size()), data_ptr(allocate_padded_buffer(sv_.size())) {
3680
if (data_ptr != nullptr) {
37-
memcpy(data_ptr, s.data(), s.size());
38-
data_ptr[s.size()] = '\0'; // easier when you need a c_str
81+
memcpy(data_ptr, sv_.data(), sv_.size());
82+
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
3983
}
4084
}
85+
4186
padded_string(padded_string &&o) noexcept
4287
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
4388
o.data_ptr = nullptr; // we take ownership
@@ -60,21 +105,26 @@ class padded_string {
60105
o.viable_size = tmp_viable_size;
61106
}
62107

63-
~padded_string() { aligned_free_char(data_ptr); }
108+
~padded_string() {
109+
aligned_free_char(data_ptr);
110+
this->data_ptr = nullptr;
111+
}
64112

65-
size_t size() const { return viable_size; }
113+
size_t size() const { return viable_size; }
66114

67-
size_t length() const { return viable_size; }
115+
size_t length() const { return viable_size; }
68116

69-
char *data() const { return data_ptr; }
117+
char *data() const { return data_ptr; }
70118

71119
private:
72120
padded_string &operator=(const padded_string &o) = delete;
73121
padded_string(const padded_string &o) = delete;
74122

75-
size_t viable_size;
76-
char *data_ptr;
77-
};
123+
size_t viable_size ;
124+
char *data_ptr{nullptr};
125+
126+
}; // padded_string
127+
78128
} // namespace simdjson
79129

80130
#endif

src/jsonioutil.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,6 @@
44
#include <climits>
55

66
namespace simdjson {
7-
char *allocate_padded_buffer(size_t length) {
8-
// we could do a simple malloc
9-
// return (char *) malloc(length + SIMDJSON_PADDING);
10-
// However, we might as well align to cache lines...
11-
size_t totalpaddedlength = length + SIMDJSON_PADDING;
12-
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
13-
memset(padded_buffer + length, 0, totalpaddedlength - length);
14-
return padded_buffer;
15-
}
167

178
padded_string get_corpus(const std::string &filename) {
189
std::FILE *fp = std::fopen(filename.c_str(), "rb");

0 commit comments

Comments
 (0)