Skip to content

Commit aaa12c8

Browse files
authored
more thread testing (simdjson#1324)
* Adding a few tests. * More tests. * Trailing. * More links/documentations.
1 parent 9b5150d commit aaa12c8

File tree

4 files changed

+113
-6
lines changed

4 files changed

+113
-6
lines changed

.circleci/config.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,16 @@ jobs:
181181
executor: clang10
182182
environment: { CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_SANITIZE=ON, CTEST_FLAGS: --output-on-failure -E checkperf }
183183
steps: [ cmake_test ]
184-
184+
threadsanitize-gcc10:
185+
description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build
186+
executor: gcc10
187+
environment: { CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_SANITIZE_THREADS=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -E checkperf }
188+
steps: [ cmake_test ]
189+
threadsanitize-clang10:
190+
description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build
191+
executor: clang10
192+
environment: { CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_SANITIZE_THREADS=ON, CTEST_FLAGS: --output-on-failure -E checkperf }
193+
steps: [ cmake_test ]
185194
# dynamic
186195
dynamic-gcc10:
187196
description: Build and run tests on GCC 10 and AVX 2 with a cmake dynamic build
@@ -260,6 +269,8 @@ workflows:
260269
# full single-implementation tests
261270
- sanitize-gcc10
262271
- sanitize-clang10
272+
- threadsanitize-gcc10
273+
- threadsanitize-clang10
263274
- dynamic-gcc10
264275
- dynamic-clang10
265276
- unthreaded-gcc10

.github/workflows/fix-trailing-whitespace.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ jobs:
1515
set -eu
1616
scripts/remove_trailing_whitespace.sh
1717
git diff >whitespace.patch
18+
cat whitespace.patch
1819
if [ $(wc -c <whitespace.patch) -ne 0 ] ; then
1920
echo " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! "
2021
echo "You have trailing whitespace, please download the artifact"

include/simdjson/dom/document_stream-inl.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,18 @@ namespace dom {
1010

1111
#ifdef SIMDJSON_THREADS_ENABLED
1212
inline void stage1_worker::finish() {
13+
// After calling "run" someone would call finish() to wait
14+
// for the end of the processing.
15+
// This function will wait until either the thread has done
16+
// the processing or, else, the destructor has been called.
1317
std::unique_lock<std::mutex> lock(locking_mutex);
1418
cond_var.wait(lock, [this]{return has_work == false;});
1519
}
1620

1721
inline stage1_worker::~stage1_worker() {
22+
// The thread may never outlive the stage1_worker instance
23+
// and will always be stopped/joined before the stage1_worker
24+
// instance is gone.
1825
stop_thread();
1926
}
2027

@@ -26,15 +33,22 @@ inline void stage1_worker::start_thread() {
2633
thread = std::thread([this]{
2734
while(true) {
2835
std::unique_lock<std::mutex> thread_lock(locking_mutex);
36+
// We wait for either "run" or "stop_thread" to be called.
2937
cond_var.wait(thread_lock, [this]{return has_work || !can_work;});
38+
// If, for some reason, the stop_thread() method was called (i.e., the
39+
// destructor of stage1_worker is called, then we want to immediately destroy
40+
// the thread (and not do any more processing).
3041
if(!can_work) {
3142
break;
3243
}
3344
this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser,
3445
this->_next_batch_start);
3546
this->has_work = false;
36-
thread_lock.unlock();
47+
// The condition variable call should be moved after thread_lock.unlock() for performance
48+
// reasons but thread sanitizers may report it as a data race if we do.
49+
// See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
3750
cond_var.notify_one(); // will notify "finish"
51+
thread_lock.unlock();
3852
}
3953
}
4054
);
@@ -46,8 +60,8 @@ inline void stage1_worker::stop_thread() {
4660
// We have to make sure that all locks can be released.
4761
can_work = false;
4862
has_work = false;
49-
lock.unlock();
5063
cond_var.notify_all();
64+
lock.unlock();
5165
if(thread.joinable()) {
5266
thread.join();
5367
}
@@ -59,8 +73,11 @@ inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_
5973
_next_batch_start = next_batch_start;
6074
stage1_thread_parser = stage1;
6175
has_work = true;
76+
// The condition variable call should be moved after thread_lock.unlock() for performance
77+
// reasons but thread sanitizers may report it as a data race if we do.
78+
// See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
79+
cond_var.notify_one(); // will notify the thread lock that we have work
6280
lock.unlock();
63-
cond_var.notify_one();// will notify the thread lock
6481
}
6582
#endif
6683

@@ -167,8 +184,13 @@ inline void document_stream::start() noexcept {
167184
// Always run the first stage 1 parse immediately
168185
batch_start = 0;
169186
error = run_stage1(*parser, batch_start);
187+
if(error == EMPTY) {
188+
// In exceptional cases, we may start with an empty block
189+
batch_start = next_batch_start();
190+
if (batch_start >= len) { return; }
191+
error = run_stage1(*parser, batch_start);
192+
}
170193
if (error) { return; }
171-
172194
#ifdef SIMDJSON_THREADS_ENABLED
173195
if (use_thread && next_batch_start() < len) {
174196
// Kick off the first thread if needed

tests/document_stream_tests.cpp

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,76 @@ namespace document_stream_tests {
8181
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
8282
}
8383

84+
bool stress_data_race() {
85+
std::cout << "Running " << __func__ << std::endl;
86+
// Correct JSON.
87+
const simdjson::padded_string input = R"([1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
88+
// This will spin up and tear down 1000 worker threads.
89+
for(size_t i = 0; i < 1; i++) {
90+
simdjson::dom::parser parser;
91+
simdjson::dom::document_stream stream;
92+
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
93+
for(auto doc: stream) {
94+
auto error = doc.error();
95+
if(error) {
96+
std::cout << "Expected no error but got " << error << std::endl;
97+
return false;
98+
}
99+
}
100+
}
101+
return true;
102+
}
103+
104+
bool stress_data_race_with_error() {
105+
std::cout << "Running " << __func__ << std::endl;
106+
// Intentionally broken
107+
const simdjson::padded_string input = R"([1,23] [1,23] [1,23] [1,23 [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
108+
// This will spin up and tear down 1000 worker threads.
109+
for(size_t i = 0; i < 1; i++) {
110+
simdjson::dom::parser parser;
111+
simdjson::dom::document_stream stream;
112+
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
113+
size_t count = 0;
114+
for(auto doc: stream) {
115+
auto error = doc.error();
116+
if(count <= 2) {
117+
if(error) {
118+
std::cout << "Expected no error but got " << error << std::endl;
119+
return false;
120+
}
121+
} else {
122+
if(!error) {
123+
std::cout << "Expected an error but got " << error << std::endl;
124+
return false;
125+
}
126+
break;
127+
}
128+
count++;
129+
}
130+
}
131+
return true;
132+
}
133+
134+
bool test_leading_spaces() {
135+
std::cout << "Running " << __func__ << std::endl;
136+
const simdjson::padded_string input = R"( [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;;
137+
size_t count = 0;
138+
simdjson::dom::parser parser;
139+
simdjson::dom::document_stream stream;
140+
ASSERT_SUCCESS(parser.parse_many(input, 32).get(stream));
141+
count = 0;
142+
for(auto doc: stream) {
143+
auto error = doc.error();
144+
if(error) {
145+
std::cout << "Expected no error but got " << error << std::endl;
146+
return false;
147+
}
148+
count++;
149+
}
150+
return count == 15;
151+
}
152+
153+
84154
bool issue1307() {
85155
std::cout << "Running " << __func__ << std::endl;
86156
const simdjson::padded_string input = decode_base64("AgAMACA=");
@@ -588,7 +658,10 @@ namespace document_stream_tests {
588658
}
589659

590660
bool run() {
591-
return simple_example() &&
661+
return stress_data_race() &&
662+
stress_data_race_with_error() &&
663+
test_leading_spaces() &&
664+
simple_example() &&
592665
truncated_window() &&
593666
truncated_window_unclosed_string() &&
594667
issue1307() &&

0 commit comments

Comments
 (0)