Skip to content

Commit 4ea866f

Browse files
committed
Move stage2 classes into their own files
1 parent a476531 commit 4ea866f

16 files changed

+117
-110
lines changed

HACKING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ simdjson's source structure, from the top level, looks like this:
2929
```c++
3030
namespace simdjson {
3131
namespace haswell {
32-
#include "generic/stage1_find_marks.h"
32+
#include "generic/stage1.h"
3333
}
3434
}
3535
```

src/arm64/stage1_find_marks.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
2-
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
1+
#ifndef SIMDJSON_ARM64_STAGE1_H
2+
#define SIMDJSON_ARM64_STAGE1_H
33

44
#include "simdjson.h"
55
#include "arm64/bitmask.h"
@@ -89,4 +89,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
8989
} // namespace arm64
9090
} // namespace simdjson
9191

92-
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
92+
#endif // SIMDJSON_ARM64_STAGE1_H

src/arm64/stage2_build_tape.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
2-
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
1+
#ifndef SIMDJSON_ARM64_STAGE2_H
2+
#define SIMDJSON_ARM64_STAGE2_H
33

44
#include "simdjson.h"
55
#include "arm64/implementation.h"
@@ -10,10 +10,11 @@ namespace simdjson {
1010
namespace arm64 {
1111

1212
#include "generic/atomparsing.h"
13-
#include "generic/stage2_build_tape.h"
14-
#include "generic/stage2_streaming_build_tape.h"
13+
#include "generic/structural_iterator.h"
14+
#include "generic/structural_parser.h"
15+
#include "generic/streaming_structural_parser.h"
1516

1617
} // namespace arm64
1718
} // namespace simdjson
1819

19-
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
20+
#endif // SIMDJSON_ARM64_STAGE2_H

src/fallback/stage1_find_marks.h renamed to src/fallback/stage1.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
2-
#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
1+
#ifndef SIMDJSON_FALLBACK_STAGE1_H
2+
#define SIMDJSON_FALLBACK_STAGE1_H
33

44
#include "simdjson.h"
55
#include "fallback/implementation.h"
@@ -211,4 +211,4 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
211211
} // namespace fallback
212212
} // namespace simdjson
213213

214-
#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
214+
#endif // SIMDJSON_FALLBACK_STAGE1_H
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
2-
#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
1+
#ifndef SIMDJSON_FALLBACK_STAGE2_H
2+
#define SIMDJSON_FALLBACK_STAGE2_H
33

44
#include "simdjson.h"
55

@@ -11,10 +11,11 @@ namespace simdjson {
1111
namespace fallback {
1212

1313
#include "generic/atomparsing.h"
14-
#include "generic/stage2_build_tape.h"
15-
#include "generic/stage2_streaming_build_tape.h"
14+
#include "generic/structural_iterator.h"
15+
#include "generic/structural_parser.h"
16+
#include "generic/streaming_structural_parser.h"
1617

1718
} // namespace fallback
1819
} // namespace simdjson
1920

20-
#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
21+
#endif // SIMDJSON_FALLBACK_STAGE2_H

src/generic/json_minifier.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// This file contains the common code every implementation uses in stage1
22
// It is intended to be included multiple times and compiled multiple times
33
// We assume the file in which it is included already includes
4-
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
4+
// "simdjson/stage1.h" (this simplifies amalgation)
55

66
namespace stage1 {
77

src/generic/json_structural_indexer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// This file contains the common code every implementation uses in stage1
22
// It is intended to be included multiple times and compiled multiple times
33
// We assume the file in which it is included already includes
4-
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
4+
// "simdjson/stage1.h" (this simplifies amalgation)
55

66
namespace stage1 {
77

src/generic/structural_iterator.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
namespace stage2 {
2+
3+
class structural_iterator {
4+
public:
5+
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
6+
: buf{_buf},
7+
len{_len},
8+
structural_indexes{_structural_indexes},
9+
next_structural{next_structural_index}
10+
{}
11+
really_inline char advance_char() {
12+
idx = structural_indexes[next_structural];
13+
next_structural++;
14+
c = *current();
15+
return c;
16+
}
17+
really_inline char current_char() {
18+
return c;
19+
}
20+
really_inline const uint8_t* current() {
21+
return &buf[idx];
22+
}
23+
really_inline size_t remaining_len() {
24+
return len - idx;
25+
}
26+
template<typename F>
27+
really_inline bool with_space_terminated_copy(const F& f) {
28+
/**
29+
* We need to make a copy to make sure that the string is space terminated.
30+
* This is not about padding the input, which should already padded up
31+
* to len + SIMDJSON_PADDING. However, we have no control at this stage
32+
* on how the padding was done. What if the input string was padded with nulls?
33+
* It is quite common for an input string to have an extra null character (C string).
34+
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
35+
* document, but the string "9\0" by itself is fine. So we make a copy and
36+
* pad the input with spaces when we know that there is just one input element.
37+
* This copy is relatively expensive, but it will almost never be called in
38+
* practice unless you are in the strange scenario where you have many JSON
39+
* documents made of single atoms.
40+
*/
41+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
42+
if (copy == nullptr) {
43+
return true;
44+
}
45+
memcpy(copy, buf, len);
46+
memset(copy + len, ' ', SIMDJSON_PADDING);
47+
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
48+
free(copy);
49+
return result;
50+
}
51+
really_inline bool past_end(uint32_t n_structural_indexes) {
52+
return next_structural+1 > n_structural_indexes;
53+
}
54+
really_inline bool at_end(uint32_t n_structural_indexes) {
55+
return next_structural+1 == n_structural_indexes;
56+
}
57+
really_inline size_t next_structural_index() {
58+
return next_structural;
59+
}
60+
61+
const uint8_t* const buf;
62+
const size_t len;
63+
const uint32_t* const structural_indexes;
64+
size_t next_structural; // next structural index
65+
size_t idx{0}; // location of the structural character in the input (buf)
66+
uint8_t c{0}; // used to track the (structural) character we are looking at
67+
};
68+
69+
} // namespace stage2

src/generic/stage2_build_tape.h renamed to src/generic/structural_parser.h

Lines changed: 1 addition & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// This file contains the common code every implementation uses for stage2
22
// It is intended to be included multiple times and compiled multiple times
33
// We assume the file in which it is include already includes
4-
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
4+
// "simdjson/stage2.h" (this simplifies amalgation)
55

66
namespace stage2 {
77

@@ -47,72 +47,6 @@ struct unified_machine_addresses {
4747
#undef FAIL_IF
4848
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
4949

50-
class structural_iterator {
51-
public:
52-
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
53-
: buf{_buf},
54-
len{_len},
55-
structural_indexes{_structural_indexes},
56-
next_structural{next_structural_index}
57-
{}
58-
really_inline char advance_char() {
59-
idx = structural_indexes[next_structural];
60-
next_structural++;
61-
c = *current();
62-
return c;
63-
}
64-
really_inline char current_char() {
65-
return c;
66-
}
67-
really_inline const uint8_t* current() {
68-
return &buf[idx];
69-
}
70-
really_inline size_t remaining_len() {
71-
return len - idx;
72-
}
73-
template<typename F>
74-
really_inline bool with_space_terminated_copy(const F& f) {
75-
/**
76-
* We need to make a copy to make sure that the string is space terminated.
77-
* This is not about padding the input, which should already padded up
78-
* to len + SIMDJSON_PADDING. However, we have no control at this stage
79-
* on how the padding was done. What if the input string was padded with nulls?
80-
* It is quite common for an input string to have an extra null character (C string).
81-
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
82-
* document, but the string "9\0" by itself is fine. So we make a copy and
83-
* pad the input with spaces when we know that there is just one input element.
84-
* This copy is relatively expensive, but it will almost never be called in
85-
* practice unless you are in the strange scenario where you have many JSON
86-
* documents made of single atoms.
87-
*/
88-
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
89-
if (copy == nullptr) {
90-
return true;
91-
}
92-
memcpy(copy, buf, len);
93-
memset(copy + len, ' ', SIMDJSON_PADDING);
94-
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
95-
free(copy);
96-
return result;
97-
}
98-
really_inline bool past_end(uint32_t n_structural_indexes) {
99-
return next_structural+1 > n_structural_indexes;
100-
}
101-
really_inline bool at_end(uint32_t n_structural_indexes) {
102-
return next_structural+1 == n_structural_indexes;
103-
}
104-
really_inline size_t next_structural_index() {
105-
return next_structural;
106-
}
107-
108-
const uint8_t* const buf;
109-
const size_t len;
110-
const uint32_t* const structural_indexes;
111-
size_t next_structural; // next structural index
112-
size_t idx{0}; // location of the structural character in the input (buf)
113-
uint8_t c{0}; // used to track the (structural) character we are looking at
114-
};
115-
11650
struct number_writer {
11751
parser &doc_parser;
11852

src/haswell/stage1_find_marks.h renamed to src/haswell/stage1.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
2-
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
1+
#ifndef SIMDJSON_HASWELL_STAGE1_H
2+
#define SIMDJSON_HASWELL_STAGE1_H
33

44
#include "simdjson.h"
55

@@ -80,4 +80,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
8080
} // namespace simdjson
8181
UNTARGET_REGION
8282

83-
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
83+
#endif // SIMDJSON_HASWELL_STAGE1_H
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
2-
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
1+
#ifndef SIMDJSON_HASWELL_STAGE2_H
2+
#define SIMDJSON_HASWELL_STAGE2_H
33

44
#include "simdjson.h"
55
#include "haswell/implementation.h"
@@ -11,11 +11,12 @@ namespace simdjson {
1111
namespace haswell {
1212

1313
#include "generic/atomparsing.h"
14-
#include "generic/stage2_build_tape.h"
15-
#include "generic/stage2_streaming_build_tape.h"
14+
#include "generic/structural_iterator.h"
15+
#include "generic/structural_parser.h"
16+
#include "generic/streaming_structural_parser.h"
1617

1718
} // namespace haswell
1819
} // namespace simdjson
1920
UNTARGET_REGION
2021

21-
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
22+
#endif // SIMDJSON_HASWELL_STAGE2_H

src/stage1_find_marks.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#if SIMDJSON_IMPLEMENTATION_ARM64
2-
#include "arm64/stage1_find_marks.h"
2+
#include "arm64/stage1.h"
33
#endif
44
#if SIMDJSON_IMPLEMENTATION_FALLBACK
5-
#include "fallback/stage1_find_marks.h"
5+
#include "fallback/stage1.h"
66
#endif
77
#if SIMDJSON_IMPLEMENTATION_HASWELL
8-
#include "haswell/stage1_find_marks.h"
8+
#include "haswell/stage1.h"
99
#endif
1010
#if SIMDJSON_IMPLEMENTATION_WESTMERE
11-
#include "westmere/stage1_find_marks.h"
11+
#include "westmere/stage1.h"
1212
#endif

src/stage2_build_tape.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ void found_bad_string(const uint8_t *buf);
1212
#endif
1313

1414
#if SIMDJSON_IMPLEMENTATION_ARM64
15-
#include "arm64/stage2_build_tape.h"
15+
#include "arm64/stage2.h"
1616
#endif
1717
#if SIMDJSON_IMPLEMENTATION_FALLBACK
18-
#include "fallback/stage2_build_tape.h"
18+
#include "fallback/stage2.h"
1919
#endif
2020
#if SIMDJSON_IMPLEMENTATION_HASWELL
21-
#include "haswell/stage2_build_tape.h"
21+
#include "haswell/stage2.h"
2222
#endif
2323
#if SIMDJSON_IMPLEMENTATION_WESTMERE
24-
#include "westmere/stage2_build_tape.h"
24+
#include "westmere/stage2.h"
2525
#endif

src/westmere/stage1_find_marks.h renamed to src/westmere/stage1.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
2-
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
1+
#ifndef SIMDJSON_WESTMERE_STAGE1_H
2+
#define SIMDJSON_WESTMERE_STAGE1_H
33

44
#include "simdjson.h"
55
#include "westmere/bitmask.h"
@@ -79,4 +79,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
7979
} // namespace simdjson
8080
UNTARGET_REGION
8181

82-
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
82+
#endif // SIMDJSON_WESTMERE_STAGE1_H
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
2-
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
1+
#ifndef SIMDJSON_WESTMERE_STAGE2_H
2+
#define SIMDJSON_WESTMERE_STAGE2_H
33

44
#include "simdjson.h"
55
#include "westmere/implementation.h"
@@ -11,10 +11,11 @@ namespace simdjson {
1111
namespace westmere {
1212

1313
#include "generic/atomparsing.h"
14-
#include "generic/stage2_build_tape.h"
15-
#include "generic/stage2_streaming_build_tape.h"
14+
#include "generic/structural_iterator.h"
15+
#include "generic/structural_parser.h"
16+
#include "generic/streaming_structural_parser.h"
1617

1718
} // namespace westmere
1819
} // namespace simdjson
1920
UNTARGET_REGION
20-
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
21+
#endif // SIMDJSON_WESTMERE_STAGE2_H

0 commit comments

Comments
 (0)