millijson
Lightweight JSON parser for C++
Loading...
Searching...
No Matches
millijson.hpp
Go to the documentation of this file.
1#ifndef MILLIJSON_MILLIJSON_HPP
2#define MILLIJSON_MILLIJSON_HPP
3
4#include <memory>
5#include <vector>
6#include <cctype>
7#include <cstdlib>
8#include <string>
9#include <stdexcept>
10#include <cmath>
11#include <unordered_map>
12#include <unordered_set>
13#include <cstdio>
14
25namespace millijson {
26
30enum Type {
31 NUMBER,
32 STRING,
33 BOOLEAN,
34 NOTHING,
35 ARRAY,
36 OBJECT
37};
38
42struct Base {
46 virtual Type type() const = 0;
47
51 virtual ~Base() {}
59 double get_number() const;
60
64 const std::string& get_string() const;
65
69 bool get_boolean() const;
70
74 const std::unordered_map<std::string, std::shared_ptr<Base> >& get_object() const;
75
79 const std::vector<std::shared_ptr<Base> >& get_array() const;
80};
81
85struct Number : public Base {
89 Number(double v) : value(v) {}
94 Type type() const { return NUMBER; }
95
99 double value;
100};
101
105struct String : public Base {
109 String(std::string s) : value(std::move(s)) {}
114 Type type() const { return STRING; }
115
119 std::string value;
120};
121
125struct Boolean : public Base {
129 Boolean(bool v) : value(v) {}
134 Type type() const { return BOOLEAN; }
135
139 bool value;
140};
141
145struct Nothing : public Base {
146 Type type() const { return NOTHING; }
147};
148
152struct Array : public Base {
153 Type type() const { return ARRAY; }
154
158 std::vector<std::shared_ptr<Base> > values;
159
163 void add(std::shared_ptr<Base> value) {
164 values.push_back(std::move(value));
165 return;
166 }
167};
168
172struct Object : public Base {
173 Type type() const { return OBJECT; }
174
178 std::unordered_map<std::string, std::shared_ptr<Base> > values;
179
184 bool has(const std::string& key) const {
185 return values.find(key) != values.end();
186 }
187
192 void add(std::string key, std::shared_ptr<Base> value) {
193 values[std::move(key)] = std::move(value);
194 return;
195 }
196};
197
201inline double Base::get_number() const {
202 return static_cast<const Number*>(this)->value;
203}
204
205inline const std::string& Base::get_string() const {
206 return static_cast<const String*>(this)->value;
207}
208
209inline bool Base::get_boolean() const {
210 return static_cast<const Boolean*>(this)->value;
211}
212
213inline const std::unordered_map<std::string, std::shared_ptr<Base> >& Base::get_object() const {
214 return static_cast<const Object*>(this)->values;
215}
216
217inline const std::vector<std::shared_ptr<Base> >& Base::get_array() const {
218 return static_cast<const Array*>(this)->values;
219}
220
221inline bool isspace(char x) {
222 // Allowable whitespaces as of https://www.rfc-editor.org/rfc/rfc7159#section-2.
223 return x == ' ' || x == '\n' || x == '\r' || x == '\t';
224}
225
226template<class Input>
227void chomp(Input& input) {
228 bool ok = input.valid();
229 while (ok && isspace(input.get())) {
230 ok = input.advance();
231 }
232 return;
233}
234
235template<class Input>
236bool is_expected_string(Input& input, const std::string& expected) {
237 for (auto x : expected) {
238 if (!input.valid()) {
239 return false;
240 }
241 if (input.get() != x) {
242 return false;
243 }
244 input.advance();
245 }
246 return true;
247}
248
249template<class Input>
250std::string extract_string(Input& input) {
251 size_t start = input.position() + 1;
252 input.advance(); // get past the opening quote.
253 std::string output;
254
255 while (1) {
256 char next = input.get();
257 switch (next) {
258 case '"':
259 input.advance(); // get past the closing quote.
260 return output;
261 case '\\':
262 if (!input.advance()) {
263 throw std::runtime_error("unterminated string at position " + std::to_string(start));
264 } else {
265 char next2 = input.get();
266 switch (next2) {
267 case '"':
268 output += '"';
269 break;
270 case 'n':
271 output += '\n';
272 break;
273 case 'r':
274 output += '\r';
275 break;
276 case '\\':
277 output += '\\';
278 break;
279 case '/':
280 output += '/';
281 break;
282 case 'b':
283 output += '\b';
284 break;
285 case 'f':
286 output += '\f';
287 break;
288 case 't':
289 output += '\t';
290 break;
291 case 'u':
292 {
293 unsigned short mb = 0;
294 for (size_t i = 0; i < 4; ++i) {
295 if (!input.advance()){
296 throw std::runtime_error("unterminated string at position " + std::to_string(start));
297 }
298 mb *= 16;
299 char val = input.get();
300 switch (val) {
301 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
302 mb += val - '0';
303 break;
304 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
305 mb += (val - 'a') + 10;
306 break;
307 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
308 mb += (val - 'A') + 10;
309 break;
310 default:
311 throw std::runtime_error("invalid unicode escape detected at position " + std::to_string(input.position() + 1));
312 }
313 }
314
315 // Manually convert Unicode code points to UTF-8. We only allow
316 // 3 bytes at most because there's only 4 hex digits in JSON.
317 if (mb <= 127) {
318 output += static_cast<char>(mb);
319 } else if (mb <= 2047) {
320 unsigned char left = (mb >> 6) | 0b11000000;
321 output += *(reinterpret_cast<char*>(&left));
322 unsigned char right = (mb & 0b00111111) | 0b10000000;
323 output += *(reinterpret_cast<char*>(&right));
324 } else {
325 unsigned char left = (mb >> 12) | 0b11100000;
326 output += *(reinterpret_cast<char*>(&left));
327 unsigned char middle = ((mb >> 6) & 0b00111111) | 0b10000000;
328 output += *(reinterpret_cast<char*>(&middle));
329 unsigned char right = (mb & 0b00111111) | 0b10000000;
330 output += *(reinterpret_cast<char*>(&right));
331 }
332 }
333 break;
334 default:
335 throw std::runtime_error("unrecognized escape '\\" + std::string(1, next2) + "'");
336 }
337 }
338 break;
339 case (char) 0: case (char) 1: case (char) 2: case (char) 3: case (char) 4: case (char) 5: case (char) 6: case (char) 7: case (char) 8: case (char) 9:
340 case (char)10: case (char)11: case (char)12: case (char)13: case (char)14: case (char)15: case (char)16: case (char)17: case (char)18: case (char)19:
341 case (char)20: case (char)21: case (char)22: case (char)23: case (char)24: case (char)25: case (char)26: case (char)27: case (char)28: case (char)29:
342 case (char)30: case (char)31:
343 throw std::runtime_error("string contains ASCII control character at position " + std::to_string(input.position() + 1));
344 default:
345 output += next;
346 break;
347 }
348
349 if (!input.advance()) {
350 throw std::runtime_error("unterminated string at position " + std::to_string(start));
351 }
352 }
353
354 return output; // Technically unreachable, but whatever.
355}
356
357template<class Input>
358double extract_number(Input& input) {
359 size_t start = input.position() + 1;
360 double value = 0;
361 double fractional = 10;
362 double exponent = 0;
363 bool negative_exponent = false;
364
365 auto is_terminator = [](char v) -> bool {
366 return v == ',' || v == ']' || v == '}' || isspace(v);
367 };
368
369 bool in_fraction = false;
370 bool in_exponent = false;
371
372 // We assume we're starting from the absolute value, after removing any preceding negative sign.
373 char lead = input.get();
374 if (lead == '0') {
375 if (!input.advance()) {
376 return 0;
377 }
378
379 char val = input.get();
380 if (val == '.') {
381 in_fraction = true;
382 } else if (val == 'e' || val == 'E') {
383 in_exponent = true;
384 } else if (is_terminator(val)) {
385 return value;
386 } else {
387 throw std::runtime_error("invalid number starting with 0 at position " + std::to_string(start));
388 }
389
390 } else if (std::isdigit(lead)) {
391 value += lead - '0';
392
393 while (input.advance()) {
394 char val = input.get();
395 if (val == '.') {
396 in_fraction = true;
397 break;
398 } else if (val == 'e' || val == 'E') {
399 in_exponent = true;
400 break;
401 } else if (is_terminator(val)) {
402 return value;
403 } else if (!std::isdigit(val)) {
404 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
405 }
406 value *= 10;
407 value += val - '0';
408 }
409
410 } else {
411 // this should never happen, as extract_number is only called when the lead is a digit (or '-').
412 }
413
414 if (in_fraction) {
415 if (!input.advance()) {
416 throw std::runtime_error("invalid number with trailing '.' at position " + std::to_string(start));
417 }
418
419 char val = input.get();
420 if (!std::isdigit(val)) {
421 throw std::runtime_error("'.' must be followed by at least one digit at position " + std::to_string(start));
422 }
423 value += (val - '0') / fractional;
424
425 while (input.advance()) {
426 char val = input.get();
427 if (val == 'e' || val == 'E') {
428 in_exponent = true;
429 break;
430 } else if (is_terminator(val)) {
431 return value;
432 } else if (!std::isdigit(val)) {
433 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
434 }
435 fractional *= 10;
436 value += (val - '0') / fractional;
437 }
438 }
439
440 if (in_exponent) {
441 if (!input.advance()) {
442 throw std::runtime_error("invalid number with trailing 'e/E' at position " + std::to_string(start));
443 }
444
445 char val = input.get();
446 if (!std::isdigit(val)) {
447 if (val == '-') {
448 negative_exponent = true;
449 } else if (val != '+') {
450 throw std::runtime_error("'e/E' should be followed by a sign or digit in number at position " + std::to_string(start));
451 }
452
453 if (!input.advance()) {
454 throw std::runtime_error("invalid number with trailing exponent sign at position " + std::to_string(start));
455 }
456 val = input.get();
457 if (!std::isdigit(val)) {
458 throw std::runtime_error("exponent sign must be followed by at least one digit in number at position " + std::to_string(start));
459 }
460 }
461
462 exponent += (val - '0');
463
464 while (input.advance()) {
465 char val = input.get();
466 if (is_terminator(val)) {
467 break;
468 } else if (!std::isdigit(val)) {
469 throw std::runtime_error("invalid number containing '" + std::string(1, val) + "' at position " + std::to_string(start));
470 }
471 exponent *= 10;
472 exponent += (val - '0');
473 }
474
475 if (exponent) {
476 if (negative_exponent) {
477 exponent *= -1;
478 }
479 value *= std::pow(10.0, exponent);
480 }
481 }
482
483 return value;
484}
485
486struct DefaultProvisioner {
487 typedef ::millijson::Base base;
488
489 static Boolean* new_boolean(bool x) {
490 return new Boolean(x);
491 }
492
493 static Number* new_number(double x) {
494 return new Number(x);
495 }
496
497 static String* new_string(std::string x) {
498 return new String(std::move(x));
499 }
500
501 static Nothing* new_nothing() {
502 return new Nothing;
503 }
504
505 static Array* new_array() {
506 return new Array;
507 }
508
509 static Object* new_object() {
510 return new Object;
511 }
512};
513
514struct FakeProvisioner {
515 struct FakeBase {
516 virtual Type type() const = 0;
517 virtual ~FakeBase() {}
518 };
519 typedef FakeBase base;
520
521 struct FakeBoolean : public FakeBase {
522 Type type() const { return BOOLEAN; }
523 };
524 static FakeBoolean* new_boolean(bool) {
525 return new FakeBoolean;
526 }
527
528 struct FakeNumber : public FakeBase {
529 Type type() const { return NUMBER; }
530 };
531 static FakeNumber* new_number(double) {
532 return new FakeNumber;
533 }
534
535 struct FakeString : public FakeBase {
536 Type type() const { return STRING; }
537 };
538 static FakeString* new_string(std::string) {
539 return new FakeString;
540 }
541
542 struct FakeNothing : public FakeBase {
543 Type type() const { return NOTHING; }
544 };
545 static FakeNothing* new_nothing() {
546 return new FakeNothing;
547 }
548
549 struct FakeArray : public FakeBase {
550 Type type() const { return ARRAY; }
551 void add(std::shared_ptr<FakeBase>) {}
552 };
553 static FakeArray* new_array() {
554 return new FakeArray;
555 }
556
557 struct FakeObject : public FakeBase {
558 Type type() const { return OBJECT; }
559 std::unordered_set<std::string> keys;
560 bool has(const std::string& key) const {
561 return keys.find(key) != keys.end();
562 }
563 void add(std::string key, std::shared_ptr<FakeBase>) {
564 keys.insert(std::move(key));
565 }
566 };
567 static FakeObject* new_object() {
568 return new FakeObject;
569 }
570};
571
572template<class Provisioner, class Input>
573std::shared_ptr<typename Provisioner::base> parse_thing(Input& input) {
574 std::shared_ptr<typename Provisioner::base> output;
575
576 size_t start = input.position() + 1;
577 const char current = input.get();
578
579 if (current == 't') {
580 if (!is_expected_string(input, "true")) {
581 throw std::runtime_error("expected a 'true' string at position " + std::to_string(start));
582 }
583 output.reset(Provisioner::new_boolean(true));
584
585 } else if (current == 'f') {
586 if (!is_expected_string(input, "false")) {
587 throw std::runtime_error("expected a 'false' string at position " + std::to_string(start));
588 }
589 output.reset(Provisioner::new_boolean(false));
590
591 } else if (current == 'n') {
592 if (!is_expected_string(input, "null")) {
593 throw std::runtime_error("expected a 'null' string at position " + std::to_string(start));
594 }
595 output.reset(Provisioner::new_nothing());
596
597 } else if (current == '"') {
598 output.reset(Provisioner::new_string(extract_string(input)));
599
600 } else if (current == '[') {
601 auto ptr = Provisioner::new_array();
602 output.reset(ptr);
603
604 input.advance();
605 chomp(input);
606 if (!input.valid()) {
607 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
608 }
609
610 if (input.get() != ']') {
611 while (1) {
612 ptr->add(parse_thing<Provisioner>(input));
613
614 chomp(input);
615 if (!input.valid()) {
616 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
617 }
618
619 char next = input.get();
620 if (next == ']') {
621 break;
622 } else if (next != ',') {
623 throw std::runtime_error("unknown character '" + std::string(1, next) + "' in array at position " + std::to_string(input.position() + 1));
624 }
625
626 input.advance();
627 chomp(input);
628 if (!input.valid()) {
629 throw std::runtime_error("unterminated array starting at position " + std::to_string(start));
630 }
631 }
632 }
633
634 input.advance(); // skip the closing bracket.
635
636 } else if (current == '{') {
637 auto ptr = Provisioner::new_object();
638 output.reset(ptr);
639
640 input.advance();
641 chomp(input);
642 if (!input.valid()) {
643 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
644 }
645
646 if (input.get() != '}') {
647 while (1) {
648 char next = input.get();
649 if (next != '"') {
650 throw std::runtime_error("expected a string as the object key at position " + std::to_string(input.position() + 1));
651 }
652 auto key = extract_string(input);
653 if (ptr->has(key)) {
654 throw std::runtime_error("detected duplicate keys in the object at position " + std::to_string(input.position() + 1));
655 }
656
657 chomp(input);
658 if (!input.valid()) {
659 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
660 }
661 if (input.get() != ':') {
662 throw std::runtime_error("expected ':' to separate keys and values at position " + std::to_string(input.position() + 1));
663 }
664
665 input.advance();
666 chomp(input);
667 if (!input.valid()) {
668 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
669 }
670 ptr->add(std::move(key), parse_thing<Provisioner>(input)); // consuming the key here.
671
672 chomp(input);
673 if (!input.valid()) {
674 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
675 }
676
677 next = input.get();
678 if (next == '}') {
679 break;
680 } else if (next != ',') {
681 throw std::runtime_error("unknown character '" + std::string(1, next) + "' in array at position " + std::to_string(input.position() + 1));
682 }
683
684 input.advance();
685 chomp(input);
686 if (!input.valid()) {
687 throw std::runtime_error("unterminated object starting at position " + std::to_string(start));
688 }
689 }
690 }
691
692 input.advance(); // skip the closing brace.
693
694 } else if (current == '-') {
695 if (!input.advance()) {
696 throw std::runtime_error("incomplete number starting at position " + std::to_string(start));
697 }
698 output.reset(Provisioner::new_number(-extract_number(input)));
699
700 } else if (std::isdigit(current)) {
701 output.reset(Provisioner::new_number(extract_number(input)));
702
703 } else {
704 throw std::runtime_error(std::string("unknown type starting with '") + std::string(1, current) + "' at position " + std::to_string(start));
705 }
706
707 return output;
708}
709
710template<class Provisioner, class Input>
711std::shared_ptr<typename Provisioner::base> parse_thing_with_chomp(Input& input) {
712 chomp(input);
713 auto output = parse_thing<Provisioner>(input);
714 chomp(input);
715 if (input.valid()) {
716 throw std::runtime_error("invalid json with trailing non-space characters at position " + std::to_string(input.position() + 1));
717 }
718 return output;
719}
735template<class Input>
736std::shared_ptr<Base> parse(Input& input) {
737 return parse_thing_with_chomp<DefaultProvisioner>(input);
738}
739
748template<class Input>
749Type validate(Input& input) {
750 auto ptr = parse_thing_with_chomp<FakeProvisioner>(input);
751 return ptr->type();
752}
753
757struct RawReader {
758 RawReader(const char* p, size_t n) : ptr_(p), len_(n) {}
759 size_t pos_ = 0;
760 const char * ptr_;
761 size_t len_;
762
763 char get() const {
764 return ptr_[pos_];
765 }
766
767 bool valid() const {
768 return pos_ < len_;
769 }
770
771 bool advance() {
772 ++pos_;
773 return valid();
774 }
775
776 size_t position() const {
777 return pos_;
778 }
779};
789inline std::shared_ptr<Base> parse_string(const char* ptr, size_t len) {
790 RawReader input(ptr, len);
791 return parse(input);
792}
793
801inline Type validate_string(const char* ptr, size_t len) {
802 RawReader input(ptr, len);
803 return validate(input);
804}
805
809struct FileReader{
810 FileReader(const char* p, size_t b) : handle(std::fopen(p, "rb")), buffer(b) {
811 if (!handle) {
812 throw std::runtime_error("failed to open file at '" + std::string(p) + "'");
813 }
814 fill();
815 }
816
817 ~FileReader() {
818 std::fclose(handle);
819 }
820
821 FILE* handle;
822 std::vector<char> buffer;
823 size_t available = 0;
824 size_t index = 0;
825 size_t overall = 0;
826 bool finished = false;
827
828 char get() const {
829 return buffer[index];
830 }
831
832 bool valid() const {
833 return index < available;
834 }
835
836 bool advance() {
837 ++index;
838 if (index < available) {
839 return true;
840 }
841
842 index = 0;
843 overall += available;
844 fill();
845 return valid();
846 }
847
848 void fill() {
849 if (finished) {
850 available = 0;
851 return;
852 }
853
854 available = std::fread(buffer.data(), sizeof(char), buffer.size(), handle);
855 if (available == buffer.size()) {
856 return;
857 }
858
859 if (std::feof(handle)) {
860 finished = true;
861 } else {
862 throw std::runtime_error("failed to read file (error " + std::to_string(std::ferror(handle)) + ")");
863 }
864 }
865
866 size_t position() const {
867 return overall + index;
868 }
869};
879inline std::shared_ptr<Base> parse_file(const char* path, size_t buffer_size = 65536) {
880 FileReader input(path, buffer_size);
881 return parse(input);
882}
883
891inline Type validate_file(const char* path, size_t buffer_size = 65536) {
892 FileReader input(path, buffer_size);
893 return validate(input);
894}
895
896}
897
898#endif
A lightweight header-only JSON parser.
std::shared_ptr< Base > parse(Input &input)
Definition millijson.hpp:736
Type validate(Input &input)
Definition millijson.hpp:749
std::shared_ptr< Base > parse_string(const char *ptr, size_t len)
Definition millijson.hpp:789
std::shared_ptr< Base > parse_file(const char *path, size_t buffer_size=65536)
Definition millijson.hpp:879
Type validate_file(const char *path, size_t buffer_size=65536)
Definition millijson.hpp:891
Type validate_string(const char *ptr, size_t len)
Definition millijson.hpp:801
Type
Definition millijson.hpp:30
JSON array.
Definition millijson.hpp:152
Type type() const
Definition millijson.hpp:153
void add(std::shared_ptr< Base > value)
Definition millijson.hpp:163
std::vector< std::shared_ptr< Base > > values
Definition millijson.hpp:158
Virtual base class for all JSON types.
Definition millijson.hpp:42
const std::string & get_string() const
const std::unordered_map< std::string, std::shared_ptr< Base > > & get_object() const
bool get_boolean() const
virtual Type type() const =0
const std::vector< std::shared_ptr< Base > > & get_array() const
double get_number() const
JSON boolean.
Definition millijson.hpp:125
bool value
Definition millijson.hpp:139
Type type() const
Definition millijson.hpp:134
JSON null.
Definition millijson.hpp:145
Type type() const
Definition millijson.hpp:146
JSON number.
Definition millijson.hpp:85
Type type() const
Definition millijson.hpp:94
double value
Definition millijson.hpp:99
JSON object.
Definition millijson.hpp:172
void add(std::string key, std::shared_ptr< Base > value)
Definition millijson.hpp:192
std::unordered_map< std::string, std::shared_ptr< Base > > values
Definition millijson.hpp:178
Type type() const
Definition millijson.hpp:173
bool has(const std::string &key) const
Definition millijson.hpp:184
JSON string.
Definition millijson.hpp:105
Type type() const
Definition millijson.hpp:114
std::string value
Definition millijson.hpp:119