45inline const std::vector<std::shared_ptr<millijson::Base> >& extract_array(
46 const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
47 const std::string& name,
48 const std::string& path)
50 auto vIt = properties.find(name);
51 if (vIt == properties.end()) {
52 throw std::runtime_error(
"expected '" + name +
"' property for object at '" + path +
"'");
55 const auto& values_ptr = vIt->second;
56 if (values_ptr->type() != millijson::ARRAY) {
57 throw std::runtime_error(
"expected an array in '" + path +
"." + name +
"'");
60 return static_cast<const millijson::Array*
>(values_ptr.get())->values;
63inline const millijson::Array* has_names(
const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
const std::string& path) {
64 auto nIt = properties.find(
"names");
65 if (nIt == properties.end()) {
69 const auto name_ptr = nIt->second;
70 if (name_ptr->type() != millijson::ARRAY) {
71 throw std::runtime_error(
"expected an array in '" + path +
".names'");
73 return static_cast<const millijson::Array*
>(name_ptr.get());
76template<
class Destination>
77void fill_names(
const millijson::Array* names_ptr, Destination* dest,
const std::string& path) {
78 const auto& names = names_ptr->values;
79 if (names.size() != dest->size()) {
80 throw std::runtime_error(
"length of 'names' and 'values' should be the same in '" + path +
"'");
83 for (
size_t i = 0; i < names.size(); ++i) {
84 if (names[i]->type() != millijson::STRING) {
85 throw std::runtime_error(
"expected a string at '" + path +
".names[" + std::to_string(i) +
"]'");
87 dest->set_name(i,
static_cast<const millijson::String*
>(names[i].get())->value);
91template<
class Function>
92auto process_array_or_scalar_values(
93 const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
94 const std::string& path,
97 auto vIt = properties.find(
"values");
98 if (vIt == properties.end()) {
99 throw std::runtime_error(
"expected 'values' property for object at '" + path +
"'");
102 auto names_ptr = has_names(properties, path);
103 bool has_names = names_ptr != NULL;
105 typename std::invoke_result<Function,std::vector<std::shared_ptr<millijson::Base> >,bool,
bool>::type out_ptr;
107 const auto& values_ptr = vIt->second;
108 if (values_ptr->type() == millijson::ARRAY) {
109 out_ptr = fun(
static_cast<const millijson::Array*
>(values_ptr.get())->values, has_names,
false);
111 std::vector<std::shared_ptr<millijson::Base> > temp { values_ptr };
112 out_ptr = fun(temp, has_names,
true);
116 fill_names(names_ptr, out_ptr, path);
121template<
class Destination,
class Function>
122void extract_integers(
const std::vector<std::shared_ptr<millijson::Base> >& values, Destination* dest, Function check,
const std::string& path,
const Version& version) {
123 for (
size_t i = 0; i < values.size(); ++i) {
124 if (values[i]->type() == millijson::NOTHING) {
125 dest->set_missing(i);
129 if (values[i]->type() != millijson::NUMBER) {
130 throw std::runtime_error(
"expected a number at '" + path +
".values[" + std::to_string(i) +
"]'");
133 auto val =
static_cast<const millijson::Number*
>(values[i].get())->value;
134 if (val != std::floor(val)) {
135 throw std::runtime_error(
"expected an integer at '" + path +
".values[" + std::to_string(i) +
"]'");
138 constexpr double upper = std::numeric_limits<int32_t>::max();
139 constexpr double lower = std::numeric_limits<int32_t>::min();
140 if (val < lower || val > upper) {
141 throw std::runtime_error(
"value at '" + path +
".values[" + std::to_string(i) +
"]' cannot be represented by a 32-bit signed integer");
145 if (version.equals(1, 0) && val == -2147483648) {
146 dest->set_missing(i);
155template<
class Destination,
class Function>
156void extract_strings(
const std::vector<std::shared_ptr<millijson::Base> >& values, Destination* dest, Function check,
const std::string& path) {
157 for (
size_t i = 0; i < values.size(); ++i) {
158 if (values[i]->type() == millijson::NOTHING) {
159 dest->set_missing(i);
163 if (values[i]->type() != millijson::STRING) {
164 throw std::runtime_error(
"expected a string at '" + path +
".values[" + std::to_string(i) +
"]'");
167 const auto& str =
static_cast<const millijson::String*
>(values[i].get())->value;
173template<
class Provisioner,
class Externals>
174std::shared_ptr<Base> parse_object(
const millijson::Base* contents, Externals& ext,
const std::string& path,
const Version& version) {
175 if (contents->type() != millijson::OBJECT) {
176 throw std::runtime_error(
"each R object should be represented by a JSON object at '" + path +
"'");
179 auto optr =
static_cast<const millijson::Object*
>(contents);
180 const auto& map = optr->values;
182 auto tIt = map.find(
"type");
183 if (tIt == map.end()) {
184 throw std::runtime_error(
"missing 'type' property for JSON object at '" + path +
"'");
186 const auto& type_ptr = tIt->second;
187 if (type_ptr->type() != millijson::STRING) {
188 throw std::runtime_error(
"expected a string at '" + path +
".type'");
190 const auto& type =
static_cast<const millijson::String*
>(type_ptr.get())->value;
192 std::shared_ptr<Base> output;
193 if (type ==
"nothing") {
194 output.reset(Provisioner::new_Nothing());
196 }
else if (type ==
"external") {
197 auto iIt = map.find(
"index");
198 if (iIt == map.end()) {
199 throw std::runtime_error(
"expected 'index' property for 'external' type at '" + path +
"'");
201 const auto& index_ptr = iIt->second;
202 if (index_ptr->type() != millijson::NUMBER) {
203 throw std::runtime_error(
"expected a number at '" + path +
".index'");
205 auto index =
static_cast<const millijson::Number*
>(index_ptr.get())->value;
207 if (index != std::floor(index)) {
208 throw std::runtime_error(
"expected an integer at '" + path +
".index'");
209 }
else if (index < 0 || index >=
static_cast<double>(ext.size())) {
210 throw std::runtime_error(
"external index out of range at '" + path +
".index'");
212 output.reset(Provisioner::new_External(ext.get(index)));
214 }
else if (type ==
"integer") {
215 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
216 auto ptr = Provisioner::new_Integer(vals.size(), named, scalar);
218 extract_integers(vals, ptr, [](int32_t) ->
void {}, path, version);
222 }
else if (type ==
"factor" || (version.equals(1, 0) && type ==
"ordered")) {
223 bool ordered =
false;
224 if (type ==
"ordered") {
227 auto oIt = map.find(
"ordered");
228 if (oIt != map.end()) {
229 if (oIt->second->type() != millijson::BOOLEAN) {
230 throw std::runtime_error(
"expected a boolean at '" + path +
".ordered'");
232 auto optr =
static_cast<const millijson::Boolean*
>((oIt->second).get());
233 ordered = optr->value;
237 const std::string levels_name =
"levels";
238 const auto& lvals = extract_array(map, levels_name, path);
239 int32_t nlevels = lvals.size();
240 auto fptr = process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
241 auto ptr = Provisioner::new_Factor(vals.size(), named, scalar, nlevels, ordered);
243 extract_integers(vals, ptr, [&](int32_t x) ->
void {
244 if (x < 0 || x >= nlevels) {
245 throw std::runtime_error(
"factor indices of out of range of levels in '" + path +
"'");
251 std::unordered_set<std::string> existing;
252 for (
size_t l = 0; l < lvals.size(); ++l) {
253 if (lvals[l]->type() != millijson::STRING) {
254 throw std::runtime_error(
"expected strings at '" + path +
".levels[" + std::to_string(l) +
"]'");
257 const auto& level =
static_cast<const millijson::String*
>(lvals[l].get())->value;
258 if (existing.find(level) != existing.end()) {
259 throw std::runtime_error(
"detected duplicate string at '" + path +
".levels[" + std::to_string(l) +
"]'");
261 fptr->set_level(l, level);
262 existing.insert(level);
265 }
else if (type ==
"boolean") {
266 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
267 auto ptr = Provisioner::new_Boolean(vals.size(), named, scalar);
270 for (
size_t i = 0; i < vals.size(); ++i) {
271 if (vals[i]->type() == millijson::NOTHING) {
276 if (vals[i]->type() != millijson::BOOLEAN) {
277 throw std::runtime_error(
"expected a boolean at '" + path +
".values[" + std::to_string(i) +
"]'");
279 ptr->set(i,
static_cast<const millijson::Boolean*
>(vals[i].get())->value);
285 }
else if (type ==
"number") {
286 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
287 auto ptr = Provisioner::new_Number(vals.size(), named, scalar);
290 for (
size_t i = 0; i < vals.size(); ++i) {
291 if (vals[i]->type() == millijson::NOTHING) {
296 if (vals[i]->type() == millijson::NUMBER) {
297 ptr->set(i,
static_cast<const millijson::Number*
>(vals[i].get())->value);
298 }
else if (vals[i]->type() == millijson::STRING) {
299 auto str =
static_cast<const millijson::String*
>(vals[i].get())->value;
301 ptr->set(i, std::numeric_limits<double>::quiet_NaN());
302 }
else if (str ==
"Inf") {
303 ptr->set(i, std::numeric_limits<double>::infinity());
304 }
else if (str ==
"-Inf") {
305 ptr->set(i, -std::numeric_limits<double>::infinity());
307 throw std::runtime_error(
"unsupported string '" + str +
"' at '" + path +
".values[" + std::to_string(i) +
"]'");
310 throw std::runtime_error(
"expected a number at '" + path +
".values[" + std::to_string(i) +
"]'");
317 }
else if (type ==
"string" || (version.equals(1, 0) && (type ==
"date" || type ==
"date-time"))) {
319 if (version.equals(1, 0)) {
320 if (type ==
"date") {
321 format = StringVector::DATE;
322 }
else if (type ==
"date-time") {
323 format = StringVector::DATETIME;
326 auto fIt = map.find(
"format");
327 if (fIt != map.end()) {
328 if (fIt->second->type() != millijson::STRING) {
329 throw std::runtime_error(
"expected a string at '" + path +
".format'");
331 auto fptr =
static_cast<const millijson::String*
>(fIt->second.get());
332 if (fptr->value ==
"date") {
333 format = StringVector::DATE;
334 }
else if (fptr->value ==
"date-time") {
335 format = StringVector::DATETIME;
337 throw std::runtime_error(
"unsupported format '" + fptr->value +
"' at '" + path +
".format'");
342 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
343 auto ptr = Provisioner::new_String(vals.size(), named, scalar, format);
346 if (format == StringVector::NONE) {
347 extract_strings(vals, ptr, [](
const std::string&) ->
void {}, path);
348 }
else if (format == StringVector::DATE) {
349 extract_strings(vals, ptr, [&](
const std::string& x) ->
void {
350 if (!ritsuko::is_date(x.c_str(), x.size())) {
351 throw std::runtime_error(
"dates should follow YYYY-MM-DD formatting in '" + path +
".values'");
354 }
else if (format == StringVector::DATETIME) {
355 extract_strings(vals, ptr, [&](
const std::string& x) ->
void {
356 if (!ritsuko::is_rfc3339(x.c_str(), x.size())) {
357 throw std::runtime_error(
"date-times should follow the Internet Date/Time format in '" + path +
".values'");
365 }
else if (type ==
"list") {
366 auto names_ptr = has_names(map, path);
367 bool has_names = names_ptr != NULL;
369 const std::string values_name =
"values";
370 const auto& vals = extract_array(map, values_name, path);
371 auto ptr = Provisioner::new_List(vals.size(), has_names);
374 for (
size_t i = 0; i < vals.size(); ++i) {
375 ptr->set(i, parse_object<Provisioner>(vals[i].get(), ext, path +
".values[" + std::to_string(i) +
"]", version));
379 fill_names(names_ptr, ptr, path);
383 throw std::runtime_error(
"unknown object type '" + type +
"' at '" + path +
".type'");
425template<
class Provisioner,
class Externals>
427 std::shared_ptr<millijson::Base> contents;
428 if (options.parallel) {
429 byteme::PerByte bytestream(&reader);
430 contents = millijson::parse(bytestream);
432 byteme::PerByteParallel bytestream(&reader);
433 contents = millijson::parse(bytestream);
437 if (contents->type() == millijson::OBJECT) {
438 auto optr =
static_cast<const millijson::Object*
>(contents.get());
439 const auto& map = optr->values;
440 auto vIt = map.find(
"version");
441 if (vIt != map.end()) {
442 if (vIt->second->type() != millijson::STRING) {
443 throw std::runtime_error(
"expected a string in 'version'");
445 const auto& vstr =
static_cast<const millijson::String*
>(vIt->second.get())->value;
446 auto vraw = ritsuko::parse_version_string(vstr.c_str(), vstr.size(),
true);
447 version.major = vraw.major;
448 version.minor = vraw.minor;
452 ExternalTracker etrack(std::move(ext));
453 auto output = parse_object<Provisioner>(contents.get(), etrack,
"", version);
455 if (options.strict_list && output->type() != LIST) {
456 throw std::runtime_error(
"top-level object should represent an R list");
460 return ParsedList(std::move(output), std::move(version));
477template<
class Provisioner>
479 DummyExternals ext(0);
500template<
class Provisioner,
class Externals>
502 byteme::SomeFileReader reader(file.c_str());
520template<
class Provisioner>
522 DummyExternals ext(0);
544template<
class Provisioner,
class Externals>
546 byteme::SomeBufferReader reader(buffer, len);
565template<
class Provisioner>
567 DummyExternals ext(0);
580 DummyExternals ext(num_external);
594 DummyExternals ext(num_external);
609 DummyExternals ext(num_external);