millijson
Lightweight JSON parser for C++
Loading...
Searching...
No Matches
millijson.hpp
Go to the documentation of this file.
1#ifndef MILLIJSON_MILLIJSON_HPP
2#define MILLIJSON_MILLIJSON_HPP
3
4#include <memory>
5#include <vector>
6#include <cstddef>
7#include <cstdlib>
8#include <string>
9#include <stdexcept>
10#include <cmath>
11#include <unordered_map>
12#include <unordered_set>
13#include <cstdio>
14
24namespace millijson {
25
30enum Type {
31 NUMBER,
32 NUMBER_AS_STRING,
33 STRING,
34 BOOLEAN,
35 NOTHING,
36 ARRAY,
37 OBJECT
38};
39
43class Base {
44public:
48 virtual Type type() const = 0;
49
53 Base() = default;
54 Base(Base&&) = default;
55 Base(const Base&) = default;
56 Base& operator=(Base&&) = default;
57 Base& operator=(const Base&) = default;
58 virtual ~Base() {}
62};
63
67class Number final : public Base {
68public:
72 Number(double x) : my_value(x) {}
73
74 Type type() const { return NUMBER; }
75
76public:
80 const double& value() const { return my_value; }
81
85 double& value() { return my_value; }
86
87private:
88 double my_value;
89};
90
94class NumberAsString final : public Base {
95public:
99 NumberAsString(std::string x) : my_value(x) {}
100
101 Type type() const { return NUMBER_AS_STRING; }
102
103public:
107 const std::string& value() const { return my_value; }
108
112 std::string& value() { return my_value; }
113
114private:
115 std::string my_value;
116};
117
121class String final : public Base {
122public:
126 String(std::string x) : my_value(std::move(x)) {}
127
128 Type type() const { return STRING; }
129
130public:
134 const std::string& value() const { return my_value; }
135
139 std::string& value() { return my_value; }
140
141private:
142 std::string my_value;
143};
144
148class Boolean final : public Base {
149public:
153 Boolean(bool x) : my_value(x) {}
154
155 Type type() const { return BOOLEAN; }
156
157public:
161 const bool& value() const { return my_value; }
162
166 bool& value() { return my_value; }
167
168private:
169 bool my_value;
170};
171
175class Nothing final : public Base {
176public:
177 Type type() const { return NOTHING; }
178};
179
183class Array final : public Base {
184public:
188 Array(std::vector<std::shared_ptr<Base> > x) : my_value(std::move(x)) {}
189
190 Type type() const { return ARRAY; }
191
192public:
196 const std::vector<std::shared_ptr<Base> >& value() const {
197 return my_value;
198 }
199
203 std::vector<std::shared_ptr<Base> >& value() {
204 return my_value;
205 }
206
207private:
208 std::vector<std::shared_ptr<Base> > my_value;
209};
210
214class Object final : public Base {
215public:
219 Object(std::unordered_map<std::string, std::shared_ptr<Base> > x) : my_value(std::move(x)) {}
220
221 Type type() const { return OBJECT; }
222
223public:
227 const std::unordered_map<std::string, std::shared_ptr<Base> >& value() const {
228 return my_value;
229 }
230
234 std::unordered_map<std::string, std::shared_ptr<Base> >& value() {
235 return my_value;
236 }
237
238private:
239 std::unordered_map<std::string, std::shared_ptr<Base> > my_value;
240};
241
251 bool number_as_string = false;
252};
253
257// Return value of the various chomp functions indicates whether there are any
258// characters left in 'input', allowing us to avoid an extra call to valid().
259template<class Input_>
260bool raw_chomp(Input_& input, bool ok) {
261 while (ok) {
262 switch(input.get()) {
263 // Allowable whitespaces as of https://www.rfc-editor.org/rfc/rfc7159#section-2.
264 case ' ': case '\n': case '\r': case '\t':
265 break;
266 default:
267 return true;
268 }
269 ok = input.advance();
270 }
271 return false;
272}
273
274template<class Input_>
275bool check_and_chomp(Input_& input) {
276 bool ok = input.valid();
277 return raw_chomp(input, ok);
278}
279
280template<class Input_>
281bool advance_and_chomp(Input_& input) {
282 bool ok = input.advance();
283 return raw_chomp(input, ok);
284}
285
286inline bool is_digit(char val) {
287 return val >= '0' && val <= '9';
288}
289
290template<class Input_>
291bool is_expected_string(Input_& input, const char* ptr, std::size_t len) {
292 // We use a hard-coded 'len' instead of scanning for '\0' to enable loop unrolling.
293 for (std::size_t i = 1; i < len; ++i) {
294 // The current character was already used to determine what string to
295 // expect, so we can skip past it in order to match the rest of the
296 // string. This is also why we start from i = 1 instead of i = 0.
297 if (!input.advance()) {
298 return false;
299 }
300 if (input.get() != ptr[i]) {
301 return false;
302 }
303 }
304 input.advance(); // move off the last character.
305 return true;
306}
307
308template<class Input_>
309std::string extract_string(Input_& input) {
310 unsigned long long start = input.position() + 1;
311 input.advance(); // get past the opening quote.
312 std::string output;
313
314 while (1) {
315 char next = input.get();
316 switch (next) {
317 case '"':
318 input.advance(); // get past the closing quote.
319 return output;
320
321 case '\\':
322 if (!input.advance()) {
323 throw std::runtime_error("unterminated string at position " + std::to_string(start));
324 } else {
325 char next2 = input.get();
326 switch (next2) {
327 case '"':
328 output += '"';
329 break;
330 case 'n':
331 output += '\n';
332 break;
333 case 'r':
334 output += '\r';
335 break;
336 case '\\':
337 output += '\\';
338 break;
339 case '/':
340 output += '/';
341 break;
342 case 'b':
343 output += '\b';
344 break;
345 case 'f':
346 output += '\f';
347 break;
348 case 't':
349 output += '\t';
350 break;
351 case 'u':
352 {
353 unsigned short mb = 0;
354 for (int i = 0; i < 4; ++i) {
355 if (!input.advance()){
356 throw std::runtime_error("unterminated string at position " + std::to_string(start));
357 }
358 mb *= 16;
359 char val = input.get();
360 switch (val) {
361 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
362 mb += val - '0';
363 break;
364 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
365 mb += (val - 'a') + 10;
366 break;
367 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
368 mb += (val - 'A') + 10;
369 break;
370 default:
371 throw std::runtime_error("invalid unicode escape detected at position " + std::to_string(input.position() + 1));
372 }
373 }
374
375 // Manually convert Unicode code points to UTF-8. We only allow
376 // 3 bytes at most because there's only 4 hex digits in JSON.
377 if (mb <= 127) {
378 output += static_cast<char>(mb);
379 } else if (mb <= 2047) {
380 unsigned char left = (mb >> 6) | 0b11000000;
381 output += *(reinterpret_cast<char*>(&left));
382 unsigned char right = (mb & 0b00111111) | 0b10000000;
383 output += *(reinterpret_cast<char*>(&right));
384 } else {
385 unsigned char left = (mb >> 12) | 0b11100000;
386 output += *(reinterpret_cast<char*>(&left));
387 unsigned char middle = ((mb >> 6) & 0b00111111) | 0b10000000;
388 output += *(reinterpret_cast<char*>(&middle));
389 unsigned char right = (mb & 0b00111111) | 0b10000000;
390 output += *(reinterpret_cast<char*>(&right));
391 }
392 }
393 break;
394 default:
395 throw std::runtime_error("unrecognized escape '\\" + std::string(1, next2) + "'");
396 }
397 }
398 break;
399
400 case (char) 0: case (char) 1: case (char) 2: case (char) 3: case (char) 4: case (char) 5: case (char) 6: case (char) 7: case (char) 8: case (char) 9:
401 case (char)10: case (char)11: case (char)12: case (char)13: case (char)14: case (char)15: case (char)16: case (char)17: case (char)18: case (char)19:
402 case (char)20: case (char)21: case (char)22: case (char)23: case (char)24: case (char)25: case (char)26: case (char)27: case (char)28: case (char)29:
403 case (char)30: case (char)31:
404 case (char)127:
405 throw std::runtime_error("string contains ASCII control character at position " + std::to_string(input.position() + 1));
406
407 default:
408 output += next;
409 break;
410 }
411
412 if (!input.advance()) {
413 throw std::runtime_error("unterminated string at position " + std::to_string(start));
414 }
415 }
416
417 return output; // Technically unreachable, but whatever.
418}
419
420template<bool as_string_, class Input_>
421typename std::conditional<as_string_, std::string, double>::type extract_number(Input_& input) {
422 unsigned long long start = input.position() + 1;
423 auto value = []{
424 if constexpr(as_string_) {
425 return std::string("");
426 } else {
427 return static_cast<double>(0);
428 }
429 }();
430 bool in_fraction = false;
431 bool in_exponent = false;
432
433 auto add_string_value = [&](char x) -> void {
434 if constexpr(as_string_) {
435 value += x;
436 }
437 };
438
439 // We assume we're starting from the absolute value, after removing any preceding negative sign.
440 char lead = input.get();
441 add_string_value(lead);
442 if (lead == '0') {
443 if (!input.advance()) {
444 return value;
445 }
446
447 auto after_zero = input.get();
448 switch (after_zero) {
449 case '.':
450 add_string_value(after_zero);
451 in_fraction = true;
452 break;
453 case 'e': case 'E':
454 add_string_value(after_zero);
455 in_exponent = true;
456 break;
457 case ',': case ']': case '}': case ' ': case '\r': case '\n': case '\t':
458 return value;
459 default:
460 throw std::runtime_error("invalid number starting with 0 at position " + std::to_string(start));
461 }
462
463 } else { // 'lead' must be a digit, as extract_number is only called when the current character is a digit.
464 if constexpr(!as_string_) {
465 value += lead - '0';
466 }
467
468 while (input.advance()) {
469 char val = input.get();
470 switch (val) {
471 case '.':
472 add_string_value(val);
473 in_fraction = true;
474 goto integral_end;
475 case 'e': case 'E':
476 add_string_value(val);
477 in_exponent = true;
478 goto integral_end;
479 case ',': case ']': case '}': case ' ': case '\r': case '\n': case '\t':
480 goto total_end;
481 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
482 if constexpr(as_string_) {
483 value += val;
484 } else {
485 value *= 10;
486 value += val - '0';
487 }
488 break;
489 default:
490 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
491 }
492 }
493
494integral_end:;
495 }
496
497 if (in_fraction) {
498 if (!input.advance()) {
499 throw std::runtime_error("invalid number with trailing '.' at position " + std::to_string(start));
500 }
501
502 char val = input.get();
503 if (!is_digit(val)) {
504 throw std::runtime_error("'.' must be followed by at least one digit at position " + std::to_string(start));
505 }
506
507 double fractional = 10;
508 if constexpr(as_string_) {
509 value += val;
510 } else {
511 value += (val - '0') / fractional;
512 }
513
514 while (input.advance()) {
515 char val = input.get();
516 switch (val) {
517 case 'e': case 'E':
518 in_exponent = true;
519 add_string_value(val);
520 goto fraction_end;
521 case ',': case ']': case '}': case ' ': case '\r': case '\n': case '\t':
522 goto total_end;
523 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
524 if constexpr(as_string_) {
525 value += val;
526 } else {
527 fractional *= 10;
528 value += (val - '0') / fractional;
529 }
530 break;
531 default:
532 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
533 }
534 }
535
536fraction_end:;
537 }
538
539 if (in_exponent) {
540 double exponent = 0;
541 bool negative_exponent = false;
542
543 if (!input.advance()) {
544 throw std::runtime_error("invalid number with trailing 'e/E' at position " + std::to_string(start));
545 }
546
547 char val = input.get();
548 if (!is_digit(val)) {
549 if (val == '-') {
550 negative_exponent = true;
551 add_string_value(val);
552 } else if (val != '+') {
553 throw std::runtime_error("'e/E' should be followed by a sign or digit in number at position " + std::to_string(start));
554 }
555
556 if (!input.advance()) {
557 throw std::runtime_error("invalid number with trailing exponent sign at position " + std::to_string(start));
558 }
559 val = input.get();
560 if (!is_digit(val)) {
561 throw std::runtime_error("exponent sign must be followed by at least one digit in number at position " + std::to_string(start));
562 }
563 }
564
565 if constexpr(as_string_) {
566 value += val;
567 } else {
568 exponent += (val - '0');
569 }
570
571 while (input.advance()) {
572 char val = input.get();
573 switch (val) {
574 case ',': case ']': case '}': case ' ': case '\r': case '\n': case '\t':
575 goto exponent_end;
576 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
577 if constexpr(as_string_) {
578 value += val;
579 } else {
580 exponent *= 10;
581 exponent += (val - '0');
582 }
583 break;
584 default:
585 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
586 }
587 }
588
589exponent_end:
590 if constexpr(!as_string_) {
591 if (exponent) {
592 if (negative_exponent) {
593 exponent *= -1;
594 }
595 value *= std::pow(10.0, exponent);
596 }
597 }
598 }
599
600total_end:
601 return value;
602}
603
604struct FakeProvisioner {
605 class FakeBase {
606 public:
607 virtual Type type() const = 0;
608 virtual ~FakeBase() {}
609 };
610 typedef FakeBase Base;
611
612 class FakeBoolean final : public FakeBase {
613 public:
614 Type type() const { return BOOLEAN; }
615 };
616 static FakeBoolean* new_boolean(bool) {
617 return new FakeBoolean;
618 }
619
620 class FakeNumber final : public FakeBase {
621 public:
622 Type type() const { return NUMBER; }
623 };
624 static FakeNumber* new_number(double) {
625 return new FakeNumber;
626 }
627
628 class FakeNumberAsString final : public FakeBase {
629 public:
630 Type type() const { return NUMBER_AS_STRING; }
631 };
632 static FakeNumberAsString* new_number_as_string(std::string) {
633 return new FakeNumberAsString;
634 }
635
636 class FakeString final : public FakeBase {
637 public:
638 Type type() const { return STRING; }
639 };
640 static FakeString* new_string(std::string) {
641 return new FakeString;
642 }
643
644 class FakeNothing final : public FakeBase {
645 public:
646 Type type() const { return NOTHING; }
647 };
648 static FakeNothing* new_nothing() {
649 return new FakeNothing;
650 }
651
652 class FakeArray final : public FakeBase {
653 public:
654 Type type() const { return ARRAY; }
655 };
656 static FakeArray* new_array(std::vector<std::shared_ptr<FakeBase> >) {
657 return new FakeArray;
658 }
659
660 class FakeObject final : public FakeBase {
661 public:
662 Type type() const { return OBJECT; }
663 };
664 static FakeObject* new_object(std::unordered_map<std::string, std::shared_ptr<FakeBase> >) {
665 return new FakeObject;
666 }
667};
668
669template<class Provisioner_, class Input_>
670std::shared_ptr<typename Provisioner_::Base> parse_internal(Input_& input, const ParseOptions& options) {
671 if (!check_and_chomp(input)) {
672 throw std::runtime_error("invalid JSON with no contents");
673 }
674
675 // The most natural algorithm for parsing nested JSON arrays/objects would involve recursion,
676 // but we avoid this to eliminate the associated risk of stack overflows (and maybe improve perf?).
677 // Instead, we use an iterative algorithm with a manual stack for the two nestable JSON types.
678 // We only have to worry about OBJECTs and ARRAYs so there's only two sets of states to manage.
679 std::vector<Type> stack;
680 typedef std::vector<std::shared_ptr<typename Provisioner_::Base> > ArrayContents;
681 std::vector<ArrayContents> array_stack;
682 struct ObjectContents {
683 ObjectContents() = default;
684 ObjectContents(std::string key) : key(std::move(key)) {}
685 std::unordered_map<std::string, std::shared_ptr<typename Provisioner_::Base> > mapping;
686 std::string key;
687 };
688 std::vector<ObjectContents> object_stack;
689
690 unsigned long long start = input.position() + 1;
691 auto extract_object_key = [&]() -> std::string {
692 char next = input.get();
693 if (next != '"') {
694 throw std::runtime_error("expected a string as the object key at position " + std::to_string(input.position() + 1));
695 }
696 auto key = extract_string(input);
697 if (!check_and_chomp(input)) {
698 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
699 }
700 if (input.get() != ':') {
701 throw std::runtime_error("expected ':' to separate keys and values at position " + std::to_string(input.position() + 1));
702 }
703 if (!advance_and_chomp(input)) {
704 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
705 }
706 return key;
707 };
708
709 std::shared_ptr<typename Provisioner_::Base> output;
710 while (1) {
711 const char current = input.get();
712 switch(current) {
713 case 't':
714 if (!is_expected_string(input, "true", 4)) {
715 throw std::runtime_error("expected a 'true' string at position " + std::to_string(start));
716 }
717 output.reset(Provisioner_::new_boolean(true));
718 break;
719
720 case 'f':
721 if (!is_expected_string(input, "false", 5)) {
722 throw std::runtime_error("expected a 'false' string at position " + std::to_string(start));
723 }
724 output.reset(Provisioner_::new_boolean(false));
725 break;
726
727 case 'n':
728 if (!is_expected_string(input, "null", 4)) {
729 throw std::runtime_error("expected a 'null' string at position " + std::to_string(start));
730 }
731 output.reset(Provisioner_::new_nothing());
732 break;
733
734 case '"':
735 output.reset(Provisioner_::new_string(extract_string(input)));
736 break;
737
738 case '[':
739 if (!advance_and_chomp(input)) {
740 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
741 }
742 if (input.get() != ']') {
743 stack.push_back(ARRAY);
744 array_stack.emplace_back();
745 continue; // prepare to parse the first element of the array.
746 }
747 input.advance(); // move past the closing bracket.
748 output.reset(Provisioner_::new_array(std::vector<std::shared_ptr<typename Provisioner_::Base> >{}));
749 break;
750
751 case '{':
752 if (!advance_and_chomp(input)) {
753 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
754 }
755 if (input.get() != '}') {
756 stack.push_back(OBJECT);
757 object_stack.emplace_back(extract_object_key());
758 continue; // prepare to parse the first value of the object.
759 }
760 input.advance(); // move past the closing brace.
761 output.reset(Provisioner_::new_object(std::unordered_map<std::string, std::shared_ptr<typename Provisioner_::Base> >{}));
762 break;
763
764 case '-':
765 if (!input.advance()) {
766 throw std::runtime_error("incomplete number starting at position " + std::to_string(start));
767 }
768 if (!is_digit(input.get())) {
769 throw std::runtime_error("invalid number starting at position " + std::to_string(start));
770 }
771 if (options.number_as_string) {
772 output.reset(Provisioner_::new_number_as_string("-" + extract_number<true>(input)));
773 } else {
774 output.reset(Provisioner_::new_number(-extract_number<false>(input)));
775 }
776 break;
777
778 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
779 if (options.number_as_string) {
780 output.reset(Provisioner_::new_number_as_string(extract_number<true>(input)));
781 } else {
782 output.reset(Provisioner_::new_number(extract_number<false>(input)));
783 }
784 break;
785
786 default:
787 throw std::runtime_error(std::string("unknown type starting with '") + std::string(1, current) + "' at position " + std::to_string(start));
788 }
789
790 while (1) {
791 if (stack.empty()) {
792 goto parse_finish; // double-break to save ourselves a conditional.
793 }
794
795 if (stack.back() == ARRAY) {
796 auto& contents = array_stack.back();
797 contents.emplace_back(std::move(output));
798
799 if (!check_and_chomp(input)) {
800 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
801 }
802
803 char next = input.get();
804 if (next == ',') {
805 if (!advance_and_chomp(input)) {
806 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
807 }
808 break; // prepare to parse the next entry of the array.
809 }
810 if (next != ']') {
811 throw std::runtime_error("unknown character '" + std::string(1, next) + "' in array at position " + std::to_string(input.position() + 1));
812 }
813 input.advance(); // skip the closing bracket.
814
815 output.reset(Provisioner_::new_array(std::move(contents)));
816 stack.pop_back();
817 array_stack.pop_back();
818
819 } else {
820 auto& mapping = object_stack.back().mapping;
821 auto& key = object_stack.back().key;
822 if (mapping.find(key) != mapping.end()) {
823 throw std::runtime_error("detected duplicate keys in the object at position " + std::to_string(input.position() + 1));
824 }
825 mapping[std::move(key)] = std::move(output); // consuming the key here.
826
827 if (!check_and_chomp(input)) {
828 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
829 }
830
831 char next = input.get();
832 if (next == ',') {
833 if (!advance_and_chomp(input)) {
834 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
835 }
836 key = extract_object_key();
837 break; // prepare to parse the next value of the object.
838 }
839 if (next != '}') {
840 throw std::runtime_error("unknown character '" + std::string(1, next) + "' in array at position " + std::to_string(input.position() + 1));
841 }
842 input.advance(); // skip the closing brace.
843
844 output.reset(Provisioner_::new_object(std::move(mapping)));
845 stack.pop_back();
846 object_stack.pop_back();
847 }
848 }
849 }
850
851parse_finish:;
852 if (check_and_chomp(input)) {
853 throw std::runtime_error("invalid JSON with trailing non-space characters at position " + std::to_string(input.position() + 1));
854 }
855 return output;
856}
870
875 static Boolean* new_boolean(bool x) {
876 return new Boolean(x);
877 }
878
883 static Number* new_number(double x) {
884 return new Number(x);
885 }
886
891 static NumberAsString* new_number_as_string(std::string x) {
892 return new NumberAsString(std::move(x));
893 }
894
899 static String* new_string(std::string x) {
900 return new String(std::move(x));
901 }
902
907 return new Nothing;
908 }
909
914 static Array* new_array(std::vector<std::shared_ptr<Base> > x) {
915 return new Array(std::move(x));
916 }
917
922 static Object* new_object(std::unordered_map<std::string, std::shared_ptr<Base> > x) {
923 return new Object(std::move(x));
924 }
925};
926
949template<class Provisioner_ = DefaultProvisioner, class Input_>
950std::shared_ptr<typename DefaultProvisioner::Base> parse(Input_& input, const ParseOptions& options) {
951 return parse_internal<Provisioner_>(input, options);
952}
953
966template<class Input_>
967Type validate(Input_& input, [[maybe_unused]] const ParseOptions& options) {
968 auto ptr = parse_internal<FakeProvisioner>(input, options);
969 return ptr->type();
970}
971
975class RawReader {
976public:
977 RawReader(const char* ptr, std::size_t len) : my_ptr(ptr), my_len(len) {}
978
979private:
980 unsigned long long my_pos = 0;
981 const char * my_ptr;
982 std::size_t my_len;
983
984public:
985 char get() const {
986 return my_ptr[my_pos];
987 }
988
989 bool valid() const {
990 return my_pos < my_len;
991 }
992
993 bool advance() {
994 ++my_pos;
995 return valid();
996 }
997
998 unsigned long long position() const {
999 return my_pos;
1000 }
1001};
1016template<class Provisioner_ = DefaultProvisioner>
1017inline std::shared_ptr<typename Provisioner_::Base> parse_string(const char* ptr, std::size_t len, const ParseOptions& options) {
1018 RawReader input(ptr, len);
1019 return parse<Provisioner_>(input, options);
1020}
1021
1032inline Type validate_string(const char* ptr, std::size_t len, const ParseOptions& options) {
1033 RawReader input(ptr, len);
1034 return validate(input, options);
1035}
1036
1040class FileReader {
1041public:
1042 FileReader(const char* path, std::size_t buffer_size) : my_handle(std::fopen(path, "rb")), my_buffer(check_buffer_size(buffer_size)) {
1043 if (!my_handle) {
1044 throw std::runtime_error("failed to open file at '" + std::string(path) + "'");
1045 }
1046 fill();
1047 }
1048
1049 ~FileReader() {
1050 std::fclose(my_handle);
1051 }
1052
1053public:
1054 typedef typename std::vector<char>::size_type Size;
1055
1056 static Size check_buffer_size(std::size_t buffer_size) {
1057 // Usually this is a no-op as the vector::size_type is a size_t.
1058 // But it doesn't hurt to confirm that it will fit properly.
1059 constexpr Size max_size = std::numeric_limits<Size>::max();
1060 if (buffer_size >= max_size) { // size_type should be unsigned, so at least this comparison is safe.
1061 return max_size;
1062 } else {
1063 return buffer_size;
1064 }
1065 }
1066
1067private:
1068 std::FILE* my_handle;
1069 std::vector<char> my_buffer;
1070 Size my_available = 0;
1071 Size my_index = 0;
1072 unsigned long long my_overall = 0;
1073 bool my_finished = false;
1074
1075public:
1076 char get() const {
1077 return my_buffer[my_index];
1078 }
1079
1080 bool valid() const {
1081 return my_index < my_available;
1082 }
1083
1084 bool advance() {
1085 ++my_index;
1086 if (my_index < my_available) {
1087 return true;
1088 }
1089
1090 my_index = 0;
1091 my_overall += my_available;
1092 fill();
1093 return valid();
1094 }
1095
1096 void fill() {
1097 if (my_finished) {
1098 my_available = 0;
1099 return;
1100 }
1101
1102 my_available = std::fread(my_buffer.data(), sizeof(char), my_buffer.size(), my_handle);
1103 if (my_available == my_buffer.size()) {
1104 return;
1105 }
1106
1107 if (std::feof(my_handle)) {
1108 my_finished = true;
1109 } else {
1110 throw std::runtime_error("failed to read file (error " + std::to_string(std::ferror(my_handle)) + ")");
1111 }
1112 }
1113
1114 unsigned long long position() const {
1115 return my_overall + my_index;
1116 }
1117};
1129 std::size_t buffer_size = 65536;
1130
1135};
1136
1145template<class Provisioner_ = DefaultProvisioner>
1146std::shared_ptr<Base> parse_file(const char* path, const FileReadOptions& options) {
1147 FileReader input(path, options.buffer_size);
1148 return parse(input, options.parse_options);
1149}
1150
1160inline Type validate_file(const char* path, const FileReadOptions& options) {
1161 FileReader input(path, options.buffer_size);
1162 return validate(input, options.parse_options);
1163}
1164
1168// Back-compatibility only.
1169template<class Provisioner_ = DefaultProvisioner, class Input_>
1170std::shared_ptr<typename DefaultProvisioner::Base> parse(Input_& input) {
1171 return parse<Provisioner_>(input, {});
1172}
1173
1174template<class Input_>
1175Type validate(Input_& input) {
1176 return validate(input, {});
1177}
1178
1179template<class Provisioner_ = DefaultProvisioner>
1180inline std::shared_ptr<typename Provisioner_::Base> parse_string(const char* ptr, std::size_t len) {
1181 return parse_string<Provisioner_>(ptr, len, {});
1182}
1183
1184inline Type validate_string(const char* ptr, std::size_t len) {
1185 RawReader input(ptr, len);
1186 return validate(input, {});
1187}
1192}
1193
1194#endif
JSON array.
Definition millijson.hpp:183
Type type() const
Definition millijson.hpp:190
std::vector< std::shared_ptr< Base > > & value()
Definition millijson.hpp:203
const std::vector< std::shared_ptr< Base > > & value() const
Definition millijson.hpp:196
Array(std::vector< std::shared_ptr< Base > > x)
Definition millijson.hpp:188
Virtual base class for all JSON types.
Definition millijson.hpp:43
virtual Type type() const =0
JSON boolean.
Definition millijson.hpp:148
Boolean(bool x)
Definition millijson.hpp:153
Type type() const
Definition millijson.hpp:155
const bool & value() const
Definition millijson.hpp:161
bool & value()
Definition millijson.hpp:166
JSON null.
Definition millijson.hpp:175
Type type() const
Definition millijson.hpp:177
JSON number as a string.
Definition millijson.hpp:94
const std::string & value() const
Definition millijson.hpp:107
Type type() const
Definition millijson.hpp:101
NumberAsString(std::string x)
Definition millijson.hpp:99
std::string & value()
Definition millijson.hpp:112
JSON number.
Definition millijson.hpp:67
double & value()
Definition millijson.hpp:85
Type type() const
Definition millijson.hpp:74
const double & value() const
Definition millijson.hpp:80
Number(double x)
Definition millijson.hpp:72
JSON object.
Definition millijson.hpp:214
const std::unordered_map< std::string, std::shared_ptr< Base > > & value() const
Definition millijson.hpp:227
std::unordered_map< std::string, std::shared_ptr< Base > > & value()
Definition millijson.hpp:234
Object(std::unordered_map< std::string, std::shared_ptr< Base > > x)
Definition millijson.hpp:219
Type type() const
Definition millijson.hpp:221
JSON string.
Definition millijson.hpp:121
const std::string & value() const
Definition millijson.hpp:134
std::string & value()
Definition millijson.hpp:139
Type type() const
Definition millijson.hpp:128
String(std::string x)
Definition millijson.hpp:126
A lightweight header-only JSON parser.
Type validate_file(const char *path, const FileReadOptions &options)
Definition millijson.hpp:1160
Type validate_string(const char *ptr, std::size_t len, const ParseOptions &options)
Definition millijson.hpp:1032
std::shared_ptr< typename DefaultProvisioner::Base > parse(Input_ &input, const ParseOptions &options)
Definition millijson.hpp:950
Type validate(Input_ &input, const ParseOptions &options)
Definition millijson.hpp:967
std::shared_ptr< typename Provisioner_::Base > parse_string(const char *ptr, std::size_t len, const ParseOptions &options)
Definition millijson.hpp:1017
Type
Definition millijson.hpp:30
std::shared_ptr< Base > parse_file(const char *path, const FileReadOptions &options)
Definition millijson.hpp:1146
Default methods to provision representations of JSON types.
Definition millijson.hpp:864
static Array * new_array(std::vector< std::shared_ptr< Base > > x)
Definition millijson.hpp:914
static NumberAsString * new_number_as_string(std::string x)
Definition millijson.hpp:891
static Number * new_number(double x)
Definition millijson.hpp:883
static Object * new_object(std::unordered_map< std::string, std::shared_ptr< Base > > x)
Definition millijson.hpp:922
static Nothing * new_nothing()
Definition millijson.hpp:906
static Boolean * new_boolean(bool x)
Definition millijson.hpp:875
::millijson::Base Base
Definition millijson.hpp:869
static String * new_string(std::string x)
Definition millijson.hpp:899
Options for parse_file() and validate_file().
Definition millijson.hpp:1125
std::size_t buffer_size
Definition millijson.hpp:1129
ParseOptions parse_options
Definition millijson.hpp:1134
Options for parse().
Definition millijson.hpp:245
bool number_as_string
Definition millijson.hpp:251