43inline const std::vector<std::shared_ptr<millijson::Base> >& extract_array(
44 const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
45 const std::string& name,
46 const std::string& path)
48 auto vIt = properties.find(name);
49 if (vIt == properties.end()) {
50 throw std::runtime_error(
"expected '" + name +
"' property for object at '" + path +
"'");
53 const auto& values_ptr = vIt->second;
54 if (values_ptr->type() != millijson::ARRAY) {
55 throw std::runtime_error(
"expected an array in '" + path +
"." + name +
"'");
58 return static_cast<const millijson::Array*
>(values_ptr.get())->value();
61inline const millijson::Array* has_names(
const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
const std::string& path) {
62 auto nIt = properties.find(
"names");
63 if (nIt == properties.end()) {
67 const auto name_ptr = nIt->second;
68 if (name_ptr->type() != millijson::ARRAY) {
69 throw std::runtime_error(
"expected an array in '" + path +
".names'");
71 return static_cast<const millijson::Array*
>(name_ptr.get());
74template<
class Destination_>
75void fill_names(
const millijson::Array* names_ptr, Destination_* dest,
const std::string& path) {
76 const auto& names = names_ptr->value();
77 if (names.size() != dest->size()) {
78 throw std::runtime_error(
"length of 'names' and 'values' should be the same in '" + path +
"'");
81 for (
size_t i = 0; i < names.size(); ++i) {
82 if (names[i]->type() != millijson::STRING) {
83 throw std::runtime_error(
"expected a string at '" + path +
".names[" + std::to_string(i) +
"]'");
85 dest->set_name(i,
static_cast<const millijson::String*
>(names[i].get())->value());
89template<
class Function_>
90auto process_array_or_scalar_values(
91 const std::unordered_map<std::string, std::shared_ptr<millijson::Base> >& properties,
92 const std::string& path,
95 auto vIt = properties.find(
"values");
96 if (vIt == properties.end()) {
97 throw std::runtime_error(
"expected 'values' property for object at '" + path +
"'");
100 auto names_ptr = has_names(properties, path);
101 bool has_names = names_ptr != NULL;
103 typename std::invoke_result<Function_,std::vector<std::shared_ptr<millijson::Base> >,bool,
bool>::type out_ptr;
105 const auto& values_ptr = vIt->second;
106 if (values_ptr->type() == millijson::ARRAY) {
107 out_ptr = fun(
static_cast<const millijson::Array*
>(values_ptr.get())->value(), has_names,
false);
109 std::vector<std::shared_ptr<millijson::Base> > temp { values_ptr };
110 out_ptr = fun(temp, has_names,
true);
114 fill_names(names_ptr, out_ptr, path);
119template<
class Destination_,
class Function_>
120void extract_integers(
const std::vector<std::shared_ptr<millijson::Base> >& values, Destination_* dest, Function_ check,
const std::string& path,
const Version& version) {
121 for (
size_t i = 0; i < values.size(); ++i) {
122 if (values[i]->type() == millijson::NOTHING) {
123 dest->set_missing(i);
127 if (values[i]->type() != millijson::NUMBER) {
128 throw std::runtime_error(
"expected a number at '" + path +
".values[" + std::to_string(i) +
"]'");
131 auto val =
static_cast<const millijson::Number*
>(values[i].get())->value();
132 if (val != std::floor(val)) {
133 throw std::runtime_error(
"expected an integer at '" + path +
".values[" + std::to_string(i) +
"]'");
136 constexpr double upper = std::numeric_limits<int32_t>::max();
137 constexpr double lower = std::numeric_limits<int32_t>::min();
138 if (val < lower || val > upper) {
139 throw std::runtime_error(
"value at '" + path +
".values[" + std::to_string(i) +
"]' cannot be represented by a 32-bit signed integer");
143 if (version.equals(1, 0) && val == -2147483648) {
144 dest->set_missing(i);
153template<
class Destination_,
class Function_>
154void extract_strings(
const std::vector<std::shared_ptr<millijson::Base> >& values, Destination_* dest, Function_ check,
const std::string& path) {
155 for (
size_t i = 0; i < values.size(); ++i) {
156 if (values[i]->type() == millijson::NOTHING) {
157 dest->set_missing(i);
161 if (values[i]->type() != millijson::STRING) {
162 throw std::runtime_error(
"expected a string at '" + path +
".values[" + std::to_string(i) +
"]'");
165 const auto& str =
static_cast<const millijson::String*
>(values[i].get())->value();
171template<
class Provisioner_,
class Externals_>
172std::shared_ptr<Base> parse_object(
const millijson::Base* contents, Externals_& ext,
const std::string& path,
const Version& version) {
173 if (contents->type() != millijson::OBJECT) {
174 throw std::runtime_error(
"each R object should be represented by a JSON object at '" + path +
"'");
176 const auto& map =
static_cast<const millijson::Object*
>(contents)->value();
178 auto tIt = map.find(
"type");
179 if (tIt == map.end()) {
180 throw std::runtime_error(
"missing 'type' property for JSON object at '" + path +
"'");
182 const auto& type_ptr = tIt->second;
183 if (type_ptr->type() != millijson::STRING) {
184 throw std::runtime_error(
"expected a string at '" + path +
".type'");
186 const auto& type =
static_cast<const millijson::String*
>(type_ptr.get())->value();
188 std::shared_ptr<Base> output;
189 if (type ==
"nothing") {
190 output.reset(Provisioner_::new_Nothing());
192 }
else if (type ==
"external") {
193 auto iIt = map.find(
"index");
194 if (iIt == map.end()) {
195 throw std::runtime_error(
"expected 'index' property for 'external' type at '" + path +
"'");
197 const auto& index_ptr = iIt->second;
198 if (index_ptr->type() != millijson::NUMBER) {
199 throw std::runtime_error(
"expected a number at '" + path +
".index'");
201 auto index =
static_cast<const millijson::Number*
>(index_ptr.get())->value();
203 if (index != std::floor(index)) {
204 throw std::runtime_error(
"expected an integer at '" + path +
".index'");
205 }
else if (index < 0 || index >=
static_cast<double>(ext.size())) {
206 throw std::runtime_error(
"external index out of range at '" + path +
".index'");
208 output.reset(Provisioner_::new_External(ext.get(index)));
210 }
else if (type ==
"integer") {
211 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
212 auto ptr = Provisioner_::new_Integer(vals.size(), named, scalar);
214 extract_integers(vals, ptr, [](int32_t) ->
void {}, path, version);
218 }
else if (type ==
"factor" || (version.equals(1, 0) && type ==
"ordered")) {
219 bool ordered =
false;
220 if (type ==
"ordered") {
223 auto oIt = map.find(
"ordered");
224 if (oIt != map.end()) {
225 if (oIt->second->type() != millijson::BOOLEAN) {
226 throw std::runtime_error(
"expected a boolean at '" + path +
".ordered'");
228 ordered =
static_cast<const millijson::Boolean*
>((oIt->second).get())->value();
232 const std::string levels_name =
"levels";
233 const auto& lvals = extract_array(map, levels_name, path);
234 int32_t nlevels = lvals.size();
235 auto fptr = process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
236 auto ptr = Provisioner_::new_Factor(vals.size(), named, scalar, nlevels, ordered);
238 extract_integers(vals, ptr, [&](int32_t x) ->
void {
239 if (x < 0 || x >= nlevels) {
240 throw std::runtime_error(
"factor indices of out of range of levels in '" + path +
"'");
246 std::unordered_set<std::string> existing;
247 for (
size_t l = 0; l < lvals.size(); ++l) {
248 if (lvals[l]->type() != millijson::STRING) {
249 throw std::runtime_error(
"expected strings at '" + path +
".levels[" + std::to_string(l) +
"]'");
252 const auto& level =
static_cast<const millijson::String*
>(lvals[l].get())->value();
253 if (existing.find(level) != existing.end()) {
254 throw std::runtime_error(
"detected duplicate string at '" + path +
".levels[" + std::to_string(l) +
"]'");
256 fptr->set_level(l, level);
257 existing.insert(level);
260 }
else if (type ==
"boolean") {
261 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
262 auto ptr = Provisioner_::new_Boolean(vals.size(), named, scalar);
265 for (
size_t i = 0; i < vals.size(); ++i) {
266 if (vals[i]->type() == millijson::NOTHING) {
271 if (vals[i]->type() != millijson::BOOLEAN) {
272 throw std::runtime_error(
"expected a boolean at '" + path +
".values[" + std::to_string(i) +
"]'");
274 ptr->set(i,
static_cast<const millijson::Boolean*
>(vals[i].get())->value());
280 }
else if (type ==
"number") {
281 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
282 auto ptr = Provisioner_::new_Number(vals.size(), named, scalar);
285 for (
size_t i = 0; i < vals.size(); ++i) {
286 if (vals[i]->type() == millijson::NOTHING) {
291 if (vals[i]->type() == millijson::NUMBER) {
292 ptr->set(i,
static_cast<const millijson::Number*
>(vals[i].get())->value());
293 }
else if (vals[i]->type() == millijson::STRING) {
294 auto str =
static_cast<const millijson::String*
>(vals[i].get())->value();
296 ptr->set(i, std::numeric_limits<double>::quiet_NaN());
297 }
else if (str ==
"Inf") {
298 ptr->set(i, std::numeric_limits<double>::infinity());
299 }
else if (str ==
"-Inf") {
300 ptr->set(i, -std::numeric_limits<double>::infinity());
302 throw std::runtime_error(
"unsupported string '" + str +
"' at '" + path +
".values[" + std::to_string(i) +
"]'");
305 throw std::runtime_error(
"expected a number at '" + path +
".values[" + std::to_string(i) +
"]'");
312 }
else if (type ==
"string" || (version.equals(1, 0) && (type ==
"date" || type ==
"date-time"))) {
314 if (version.equals(1, 0)) {
315 if (type ==
"date") {
316 format = StringVector::DATE;
317 }
else if (type ==
"date-time") {
318 format = StringVector::DATETIME;
321 auto fIt = map.find(
"format");
322 if (fIt != map.end()) {
323 if (fIt->second->type() != millijson::STRING) {
324 throw std::runtime_error(
"expected a string at '" + path +
".format'");
326 auto fptr =
static_cast<const millijson::String*
>(fIt->second.get());
327 if (fptr->value() ==
"date") {
328 format = StringVector::DATE;
329 }
else if (fptr->value() ==
"date-time") {
330 format = StringVector::DATETIME;
332 throw std::runtime_error(
"unsupported format '" + fptr->value() +
"' at '" + path +
".format'");
337 process_array_or_scalar_values(map, path, [&](
const auto& vals,
bool named,
bool scalar) ->
auto {
338 auto ptr = Provisioner_::new_String(vals.size(), named, scalar, format);
341 if (format == StringVector::NONE) {
342 extract_strings(vals, ptr, [](
const std::string&) ->
void {}, path);
343 }
else if (format == StringVector::DATE) {
344 extract_strings(vals, ptr, [&](
const std::string& x) ->
void {
345 if (!ritsuko::is_date(x.c_str(), x.size())) {
346 throw std::runtime_error(
"dates should follow YYYY-MM-DD formatting in '" + path +
".values'");
349 }
else if (format == StringVector::DATETIME) {
350 extract_strings(vals, ptr, [&](
const std::string& x) ->
void {
351 if (!ritsuko::is_rfc3339(x.c_str(), x.size())) {
352 throw std::runtime_error(
"date-times should follow the Internet Date/Time format in '" + path +
".values'");
360 }
else if (type ==
"list") {
361 auto names_ptr = has_names(map, path);
362 bool has_names = names_ptr != NULL;
364 const std::string values_name =
"values";
365 const auto& vals = extract_array(map, values_name, path);
366 auto ptr = Provisioner_::new_List(vals.size(), has_names);
369 for (
size_t i = 0; i < vals.size(); ++i) {
370 ptr->set(i, parse_object<Provisioner_>(vals[i].get(), ext, path +
".values[" + std::to_string(i) +
"]", version));
374 fill_names(names_ptr, ptr, path);
378 throw std::runtime_error(
"unknown object type '" + type +
"' at '" + path +
".type'");
426template<
class Provisioner_,
class Externals_>
428 std::unique_ptr<byteme::PerByteInterface<char> > pb;
434 auto contents = millijson::parse(*pb);
437 if (contents->type() == millijson::OBJECT) {
438 const auto& map =
static_cast<const millijson::Object*
>(contents.get())->value();
439 auto vIt = map.find(
"version");
440 if (vIt != map.end()) {
441 if (vIt->second->type() != millijson::STRING) {
442 throw std::runtime_error(
"expected a string in 'version'");
444 const auto& vstr =
static_cast<const millijson::String*
>(vIt->second.get())->value();
445 auto vraw = ritsuko::parse_version_string(vstr.c_str(), vstr.size(),
true);
446 version.major = vraw.major;
447 version.minor = vraw.minor;
451 ExternalTracker etrack(std::move(ext));
452 auto output = parse_object<Provisioner_>(contents.get(), etrack,
"", version);
454 if (options.
strict_list && output->type() != LIST) {
455 throw std::runtime_error(
"top-level object should represent an R list");
459 return ParsedList(std::move(output), std::move(version));
479template<
class Provisioner_,
class Externals_>
482 byteme::SomeFileReaderOptions sopt;
483 sopt.buffer_size = options.buffer_size;
507template<
class Provisioner_,
class Externals_>