Eric-coder
diff --git a/‎.drone.yml
Lines changed: 13 additions & 0 deletions b/‎.drone.yml
Lines changed: 13 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 2 additions & 2 deletions b/‎CONTRIBUTING.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎Makefile
Lines changed: 2 additions & 2 deletions b/‎Makefile
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 47 additions & 19 deletions b/‎README.md
Lines changed: 47 additions & 19 deletions
diff --git a/‎amalgamation.sh
Lines changed: 1 addition & 1 deletion b/‎amalgamation.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/benchmarker.h
Lines changed: 1 addition & 1 deletion b/‎benchmark/benchmarker.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/minifiercompetition.cpp
Lines changed: 9 additions & 9 deletions b/‎benchmark/minifiercompetition.cpp
Lines changed: 9 additions & 9 deletions
diff --git a/‎benchmark/parse.cpp
Lines changed: 6 additions & 1 deletion b/‎benchmark/parse.cpp
Lines changed: 6 additions & 1 deletion
diff --git a/‎doc/tape.md
Lines changed: 3 additions & 3 deletions b/‎doc/tape.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎include/CMakeLists.txt
Lines changed: 0 additions & 1 deletion b/‎include/CMakeLists.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎include/simdjson.h
Lines changed: 0 additions & 1 deletion b/‎include/simdjson.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎include/simdjson/document.h
Lines changed: 14 additions & 1 deletion b/‎include/simdjson/document.h
Lines changed: 14 additions & 1 deletion
@@ -1,4 +1,17 @@
 kind: pipeline
+name: x64-quicktests-libc
+
+platform:
+  os: linux
+  arch: amd64
+
+steps:
+- name: quicktests
+  image: conanio/clang8
+  user: root
+  commands: [ EXTRAFLAGS=-stdlib=libc++ make quicktests ]
+---
+kind: pipeline
 name: x64-quicktests
 
 platform:
 
@@ -6,7 +6,7 @@ In particular, the following contributions are invited:
 
 - The library is focused on performance. Well-documented performance optimization are invited.
 - Fixes to known or newly discovered bugs are always welcome. Typically, a bug fix should come with a test demonstrating that the bug has been fixed.
-- The simdjson library is advanced software and maintanability and flexibility are always a concern. Specific contributions to improve maintanability and flexibility are invited.
+- The simdjson library is advanced software and maintainability and flexibility are always a concern. Specific contributions to improve maintainability and flexibility are invited.
 
 
 
@@ -28,5 +28,5 @@ Contributors are encouraged to
 
 
 
-Though we do not have a formal code of conduct, we will not tolerate bullying, bigotery or intimidation. Everyone is welcome to contribute.
+Though we do not have a formal code of conduct, we will not tolerate bullying, bigotry or intimidation. Everyone is welcome to contribute.
 
@@ -53,15 +53,15 @@ endif # ifeq ($(SANITIZE),1)
 endif # ifeq ($(MEMSANITIZE),1)
 
 # Headers and sources
-SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
+SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/json_scanner.h src/generic/json_string_scanner.h src/generic/json_structural_indexer.h src/generic/json_minifier.h src/generic/buf_block_reader.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
 SRCHEADERS_ARM64=      src/arm64/bitmanipulation.h    src/arm64/bitmask.h    src/arm64/intrinsics.h    src/arm64/numberparsing.h    src/arm64/simd.h    src/arm64/stage1_find_marks.h    src/arm64/stage2_build_tape.h    src/arm64/stringparsing.h
 SRCHEADERS_HASWELL=  src/haswell/bitmanipulation.h  src/haswell/bitmask.h  src/haswell/intrinsics.h  src/haswell/numberparsing.h  src/haswell/simd.h  src/haswell/stage1_find_marks.h  src/haswell/stage2_build_tape.h  src/haswell/stringparsing.h
 SRCHEADERS_FALLBACK=  src/fallback/bitmanipulation.h src/fallback/implementation.h src/fallback/numberparsing.h src/fallback/stage1_find_marks.h src/fallback/stage2_build_tape.h src/fallback/stringparsing.h
 SRCHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
 SRCHEADERS_SRC=src/isadetection.h src/jsoncharutils.h src/simdprune_tables.h src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document_parser_callbacks.h
 SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE) $(SRCHEADERS_FALLBACK)
 
-INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
+INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
 
 ifeq ($(SIMDJSON_TEST_AMALGAMATED_HEADERS),1)
 	HEADERS=singleheader/simdjson.h
 
@@ -1,16 +1,44 @@
-#  simdjson : Parsing gigabytes of JSON per second
-[![Build Status](https://cloud.drone.io/api/badges/lemire/simdjson/status.svg)](https://cloud.drone.io/lemire/simdjson/)
-[![CircleCI](https://circleci.com/gh/lemire/simdjson.svg?style=svg)](https://circleci.com/gh/lemire/simdjson)
-[![Build Status](https://img.shields.io/appveyor/ci/lemire/simdjson/master.svg)](https://ci.appveyor.com/project/lemire/simdjson)
+# simdjson : Parsing gigabytes of JSON per second
+
+<img src="images/logo.png" width="10%" style="float: right">
+JSON is everywhere on the Internet. Servers spend a *lot* of time parsing it. We need a fresh approach. simdjson uses commonly available SIMD instructions and microparallel algorithms to parse JSON 2.5x faster than anything else out there.
+
+* **Ludicrous Speed:** Over 2.5x faster than other production-grade JSON parsers.
+* **Delightfully Easy:** First-class, easy to use API.
+* **Complete Validation:** Full JSON and UTF-8 validation, with no compromises.
+* **Rock-Solid Reliability:** From memory allocation to error handling, simdjson's design avoids surprises.
+
+This library is part of the [Awesome Modern C++](https://awesomecpp.com) list.
+
+[![Build Status](https://cloud.drone.io/api/badges/simdjson/simdjson/status.svg)](https://cloud.drone.io/simdjson/simdjson)
+[![CircleCI](https://circleci.com/gh/simdjson/simdjson.svg?style=svg)](https://circleci.com/gh/simdjson/simdjson)
+[![Build status](https://ci.appveyor.com/api/projects/status/ae77wp5v3lebmu6n/branch/master?svg=true)](https://ci.appveyor.com/project/lemire/simdjson-jmmti/branch/master)
 [![][license img]][license]
-[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/simdjson.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:simdjson)
 
+## Quick Start
 
-## A C++ library to see how fast we can parse JSON with complete validation.
+simdjson is easily consumable with a single .h and .cpp file.
 
-JSON documents are everywhere on the Internet. Servers spend a lot of time parsing these documents. We want to accelerate the parsing of JSON per se using commonly available SIMD instructions as much as possible while doing full validation (including character encoding). This library is part of the [Awesome Modern C++](https://awesomecpp.com) list.
+0. Prerequisites: `g++` or `clang++`.
+1. Pull [simdjson.h](singleheader/simdjson.h) and [simdjson.cpp](singleheader/simdjson.cpp) into a directory, along with the sample file [twitter.json](jsonexamples/twitter.json).
+   ```
+   wget https://raw.githubusercontent.com/simdjson/simdjson/master/singleheader/simdjson.h https://raw.githubusercontent.com/simdjson/simdjson/master/singleheader/simdjson.cpp https://raw.githubusercontent.com/simdjson/simdjson/master/jsonexamples/twitter.json
+   ```
+2. Create `parser.cpp`:
 
-<img src="images/logo.png" width="10%">
+   ```c++
+   #include "simdjson.h"
+   int main(void) {
+     simdjson::document::parser parser;
+     simdjson::document& tweets = parser.load("twitter.json");
+     std::cout << tweets["search_metadata"]["count"] << " results." << std::endl;
+   }
+   ```
+3. `g++ -o parser parser.cpp` (or clang++)
+4. `./parser`
+   ```
+   100 results.
+   ```
 
 ## Real-world usage
 
@@ -110,7 +138,7 @@ be concerned with computed gotos.
 
 ## Thread safety
 
-The simdjson library is mostly single-threaded. Thread safety is the responsability of the caller: it is unsafe to reuse a document::parser object between different threads.
+The simdjson library is mostly single-threaded. Thread safety is the responsibility of the caller: it is unsafe to reuse a document::parser object between different threads.
 
 If you are on an x64 processor, the runtime dispatching assigns the right code path the first time that parsing is attempted. The runtime dispatching is thread-safe.
 
@@ -136,23 +164,23 @@ All examples below use use `#include "simdjson.h"`, `#include "simdjson.cpp"` an
 The simplest API to get started is `document::parse()`, which allocates a new parser, parses a string, and returns the DOM. This is less efficient if you're going to read multiple documents, but as long as you're only parsing a single document, this will do just fine.
 
 ```c++
-auto [doc, error] = document::parse(string("[ 1, 2, 3 ]"));
-if (error) { cerr << "Error: " << error_message(error) << endl; exit(1); }
+auto [doc, error] = document::parse("[ 1, 2, 3 ]"_padded);
+if (error) { cerr << "Error: " << error << endl; exit(1); }
 cout << doc;
 ```
 
 If you're using exceptions, it gets even simpler (simdjson won't use exceptions internally, so you'll only pay the performance cost of exceptions in your own calling code):
 
 ```c++
-document doc = document::parse(string("[ 1, 2, 3 ]"));
-cout << doc;
+cout << document::parse("[ 1, 2, 3 ]"_padded);
 ```
 
-The simdjson library requires SIMDJSON_PADDING extra bytes at the end of a string (it doesn't matter if the bytes are initialized). The `padded_string` class is an easy way to ensure this is accomplished up front and prevent the extra allocation:
+If you're wondering why the examples above use `_padded`, it's because the simdjson library requires SIMDJSON_PADDING extra bytes at the end of a string (it doesn't matter if the bytes are initialized). `_padded`
+is a way of creating a `padded_string` class, which assures us we have enough allocation.
 
 ```c++
-document doc = document::parse(padded_string(string("[ 1, 2, 3 ]")));
-cout << doc;
+padded_string json = "[ 1, 2, 3 ]"_padded;
+cout << document::parse(json);
 ```
 
 You can also load from a file with `parser.load()`:
@@ -463,7 +491,7 @@ You then have access to the following methods on the resulting `simdjson::docume
 * `bool move_to_key(const char *key, uint32_t length)`: as above except that the target can contain NULL characters
 * `void move_to_value()`: when at a key location within an object, this moves to the accompanying, value (located next to it).  This is equivalent but much faster than calling `next()`.
 * `bool move_to_index(uint32_t index)`: when at `[`, go one level deep, and advance to the given index, if successful, we are left pointing at the value,i f not, we are still pointing at the array
-* `bool move_to(const char *pointer, uint32_t length)`: Moves the iterator to the value correspoding to the json pointer. Always search from the root of the document. If successful, we are left pointing at the value, if not, we are still pointing the same value we were pointing before the call. The json pointer follows the rfc6901 standard's syntax: https://tools.ietf.org/html/rfc6901
+* `bool move_to(const char *pointer, uint32_t length)`: Moves the iterator to the value corresponding to the json pointer. Always search from the root of the document. If successful, we are left pointing at the value, if not, we are still pointing the same value we were pointing before the call. The json pointer follows the rfc6901 standard's syntax: https://tools.ietf.org/html/rfc6901
 * `bool move_to(const std::string &pointer) `: same as above but with a std::string parameter
 * `bool next()`:   Within a given scope (series of nodes at the same depth within either an array or an object), we move forward. Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { and [. At the object ({) or at the array ([), you can issue a "down" to visit their content. valid if we're not at the end of a scope (returns true).
 * `bool prev()`:  Within a given scope (series of nodes at the same depth within either an
@@ -567,15 +595,15 @@ make allparsingcompetition
 ```
 
 Both the `parsingcompetition` and `allparsingcompetition` tools take a `-t` flag which produces
-a table-oriented output that can be conventiently parsed by other tools.
+a table-oriented output that can be conveniently parsed by other tools.
 
 
 ## Docker
 
 One can run tests and benchmarks using docker. It especially makes sense under Linux. A privileged access may be needed to get performance counters.
 
 ```
-git clone https://github.com/lemire/simdjson.git
+git clone https://github.com/simdjson/simdjson.git
 cd simdjson
 docker build -t simdjson .
 docker run --privileged -t simdjson
 
@@ -143,7 +143,7 @@ int main(int argc, char *argv[]) {
   // parse_many
   const char * filename2 = argv[2];
   for (auto result : parser.load_many(filename2)) {
-    error = result.error;
+    error = result.error();
   }
   if (error) {
     std::cout << "parse_many failed" << std::endl;
 
@@ -263,7 +263,7 @@ struct benchmarker {
     : filename(_filename), collector(_collector), stats(NULL) {
     verbose() << "[verbose] loading " << filename << endl;
     simdjson::error_code error;
-    std::tie(this->json, error) = padded_string::load(filename);
+    padded_string::load(filename).tie(this->json, error);
     if (error) {
       exit_error(string("Could not load the file ") + filename);
     }
 
@@ -98,16 +98,14 @@ int main(int argc, char *argv[]) {
       "despacing with RapidJSON Insitu", rapid_stringme_insitu((char *)buffer),
       memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
   memcpy(buffer, p.data(), p.size());
-
-  size_t outlength = simdjson::json_minify((const uint8_t *)buffer, p.size(),
-                                           (uint8_t *)buffer);
-  if (verbose)
-    std::cout << "json_minify length is " << outlength << std::endl;
-
+  size_t outlength;
   uint8_t *cbuffer = (uint8_t *)buffer;
-  BEST_TIME("json_minify", simdjson::json_minify(cbuffer, p.size(), cbuffer),
+  for (auto imple : simdjson::available_implementations) {
+    BEST_TIME((std::string("simdjson->minify+")+imple->name()).c_str(), (imple->minify(cbuffer, p.size(), cbuffer, outlength), outlength),
             outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
             !just_data);
+  }
+
   printf("minisize = %zu, original size = %zu  (minified down to %.2f percent "
          "of original) \n",
          outlength, p.size(), outlength * 100.0 / p.size());
@@ -121,8 +119,9 @@ int main(int argc, char *argv[]) {
             !just_data);
 
   char *mini_buffer = simdjson::internal::allocate_padded_buffer(p.size() + 1);
-  size_t minisize = simdjson::json_minify((const uint8_t *)p.data(), p.size(),
-                                          (uint8_t *)mini_buffer);
+  size_t minisize;
+  simdjson::active_implementation->minify((const uint8_t *)p.data(), p.size(),
+                                          (uint8_t *)mini_buffer, minisize);
   mini_buffer[minisize] = '\0';
 
   BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(),
@@ -171,6 +170,7 @@ int main(int argc, char *argv[]) {
                                  automated_reallocation),
             simdjson::SUCCESS, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
             !just_data);
+
   free(buffer);
   free(ast_buffer);
   free(mini_buffer);
 
@@ -109,7 +109,12 @@ struct option_struct {
         case 'a': {
           const implementation *impl = simdjson::available_implementations[optarg];
           if (!impl) {
-            exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a haswell, westmere or arm64");
+            std::string exit_message = string("Unsupported option value -a ") + optarg + ": expected -a  with one of ";
+            for (auto imple : simdjson::available_implementations) {
+              exit_message += imple->name();
+              exit_message += " ";
+            }
+            exit_usage(exit_message);
           }
           simdjson::active_implementation = impl;
           break;
 
@@ -84,12 +84,12 @@ Simple JSON nodes are represented with one tape element:
 ## Integer and Double values
 
 Integer values are represented as two 64-bit tape elements:
-- The 64-bit value `('l' << 56)` followed by the 64-bit integer value litterally. Integer values are assumed to be signed 64-bit values, using two's complement notation.
-- The 64-bit value `('u' << 56)` followed by the 64-bit integer value litterally. Integer values are assumed to be unsigned 64-bit values.
+- The 64-bit value `('l' << 56)` followed by the 64-bit integer value literally. Integer values are assumed to be signed 64-bit values, using two's complement notation.
+- The 64-bit value `('u' << 56)` followed by the 64-bit integer value literally. Integer values are assumed to be unsigned 64-bit values.
 
 
 Float values are represented as two 64-bit tape elements:
-- The 64-bit value `('d' << 56)` followed by the 64-bit double value litterally in standard IEEE 754 notation.
+- The 64-bit value `('d' << 56)` followed by the 64-bit double value literally in standard IEEE 754 notation.
 
 Performance consideration: We store numbers of the main tape because we believe that locality of reference is helpful for performance. 
 
 
@@ -16,7 +16,6 @@ set(SIMDJSON_INCLUDE
     ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/padded_string.h
     ${SIMDJSON_INCLUDE_DIR}/simdjson/internal/jsonformatutils.h
     ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
-    ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
     ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
     ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonstream.h
     ${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h
 
@@ -10,7 +10,6 @@
 #include "simdjson/implementation.h"
 #include "simdjson/document.h"
 #include "simdjson/document_stream.h"
-#include "simdjson/jsonminifier.h"
 
 // Deprecated API
 #include "simdjson/parsedjsoniterator.h"
 
@@ -1522,6 +1522,14 @@ class document::parser {
   //
   size_t _max_depth;
 
+  //
+  // The loaded buffer (reused each time load() is called)
+  //
+  std::unique_ptr<char[], decltype(&aligned_free_char)> loaded_bytes;
+
+  // Capacity of loaded_bytes buffer.
+  size_t _loaded_bytes_capacity{0};
+
   // all nodes are stored on the doc.tape using a 64-bit word.
   //
   // strings, double and ints are stored as
@@ -1543,6 +1551,11 @@ class document::parser {
   // and auto-allocate if not.
   inline error_code ensure_capacity(size_t desired_capacity) noexcept;
 
+  //
+  // Read the file into loaded_bytes
+  //
+  inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
+
 #if SIMDJSON_EXCEPTIONS
   // Used internally to get the document
   inline const document &get_document() const noexcept(false);
@@ -1555,7 +1568,7 @@ class document::parser {
 /**
  * Minifies a JSON element or document, printing the smallest possible valid JSON.
  *
- *   document doc = document::parse("   [ 1 , 2 , 3 ] "_pad);
+ *   document doc = document::parse("   [ 1 , 2 , 3 ] "_padded);
  *   cout << minify(doc) << endl; // prints [1,2,3]
  *
  */
Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,7 @@ int main(int argc, char *argv[]) {`
`143`	`143`	`// parse_many`
`144`	`144`	`const char * filename2 = argv[2];`
`145`	`145`	`for (auto result : parser.load_many(filename2)) {`
`146`		`- error = result.error;`
	`146`	`+ error = result.error();`
`147`	`147`	`}`
`148`	`148`	`if (error) {`
`149`	`149`	`std::cout << "parse_many failed" << std::endl;`
Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,7 @@ struct benchmarker {`
`263`	`263`	`: filename(_filename), collector(_collector), stats(NULL) {`
`264`	`264`	`verbose() << "[verbose] loading " << filename << endl;`
`265`	`265`	`simdjson::error_code error;`
`266`		`- std::tie(this->json, error) = padded_string::load(filename);`
	`266`	`+ padded_string::load(filename).tie(this->json, error);`
`267`	`267`	`if (error) {`
`268`	`268`	`exit_error(string("Could not load the file ") + filename);`
`269`	`269`	`}`