|
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
std::pair< bool, Type_ > | choose_missing_integer_placeholder (Iterator start, Iterator end, Mask mask) |
|
template<class Iterator , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
std::pair< bool, Type_ > | choose_missing_integer_placeholder (Iterator start, Iterator end) |
|
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
std::pair< bool, Type_ > | choose_missing_float_placeholder (Iterator start, Iterator end, Mask mask, bool skip_nan) |
|
template<class Iterator , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
std::pair< bool, Type_ > | choose_missing_float_placeholder (Iterator start, Iterator end, bool skip_nan=false) |
|
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
IntegerExtremes | find_integer_extremes (Iterator start, Iterator end, Mask mask) |
|
template<class Iterator , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
IntegerExtremes | find_integer_extremes (Iterator start, Iterator end) |
|
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
FloatExtremes | find_float_extremes (Iterator start, Iterator end, Mask mask, bool skip_nan) |
|
template<class Iterator , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type > |
FloatExtremes | find_float_extremes (Iterator start, Iterator end, bool skip_nan=false) |
|
bool | is_date_prefix (const char *ptr) |
|
bool | is_date (const char *ptr, size_t len) |
|
bool | is_rfc3339_suffix (const char *ptr, size_t len) |
|
bool | is_rfc3339 (const char *ptr, size_t len) |
|
Version | parse_version_string (const char *version_string, size_t size, bool skip_patch=false) |
|
double | r_missing_value () |
|
template<typename Float_ > |
bool | are_floats_identical (const Float_ *x, const Float_ *y) |
|
Assorted helper functions for parsing and validation.
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type >
std::pair< bool, Type_ > ritsuko::choose_missing_float_placeholder |
( |
Iterator | start, |
|
|
Iterator | end, |
|
|
Mask | mask, |
|
|
bool | skip_nan ) |
Choose an appropriate placeholder for missing values in a floating-point dataset, after ignoring all masked values. This will try the various IEEE special values (NaN, Inf, -Inf) and then some type-specific boundaries (the minimum, the maximum, and for signed types, 0) before sorting the dataset and searching for an unused float.
- Template Parameters
-
Iterator_ | Forward iterator for floating-point values. |
Type_ | Float type pointed to by Iterator_ . |
- Parameters
-
start | Start of the dataset. |
end | End of the dataset. |
mask | Start of the mask vector. |
skip_nan | Whether to skip NaN as a potential placeholder. Useful in frameworks like R that need special consideration of NaN payloads. |
- Returns
- Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type >
std::pair< bool, Type_ > ritsuko::choose_missing_integer_placeholder |
( |
Iterator | start, |
|
|
Iterator | end, |
|
|
Mask | mask ) |
Choose an appropriate placeholder for missing values in an integer dataset, after ignoring all the masked values. This will try the various special values (the minimum, the maximum, and for signed types, 0) before sorting the dataset and searching for an unused integer value.
- Template Parameters
-
Iterator_ | Forward iterator for integer values. |
Mask_ | Random access iterator for mask values. |
Type_ | Integer type pointed to by Iterator_ . |
- Parameters
-
start | Start of the dataset. |
end | End of the dataset. |
mask | Start of the mask vector. This should have the same length as end - start ; each entry is true if the corresponding value of the integer dataset is masked, and false otherwise. |
- Returns
- Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type >
FloatExtremes ritsuko::find_float_extremes |
( |
Iterator | start, |
|
|
Iterator | end, |
|
|
Mask | mask, |
|
|
bool | skip_nan ) |
Check for the presence of extreme values in a floating-point dataset. This can be used to choose a missing placeholder value in an online fashion, by calling this function on blocks of the dataset; if any of the extreme values are absent from all blocks, they can be used as the missing value placeholder. By contrast, choose_missing_float_placeholder()
requires access to the full dataset.
- Template Parameters
-
Iterator_ | Forward iterator for float values. |
Mask_ | Random access iterator for mask values. |
Type_ | Float type pointed to by Iterator_ . |
- Parameters
-
start | Start of the dataset. |
end | End of the dataset. |
mask | Start of the mask vector. This should have the same length as end - start ; each entry is true if the corresponding value of the float dataset is masked, and false otherwise. |
skip_nan | Whether to skip searches for NaN. Useful in frameworks like R that need special consideration of NaN payloads. |
- Returns
- Whether extreme values are present in
[start, end)
. If skip_nan = true
, FloatExtremes::has_nan
is set to false and should be ignored. If Type_
is not an IEEE754-compliant float, users should ignore FloatExtremes::has_nan
, FloatExtremes::has_negative_inf
and FloatExtremes::has_positive_inf
.
template<class Iterator , class Mask , class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type, ::type >
IntegerExtremes ritsuko::find_integer_extremes |
( |
Iterator | start, |
|
|
Iterator | end, |
|
|
Mask | mask ) |
Check for the presence of extreme values in an integer dataset. This can be used to choose a missing placeholder value in an online fashion, by calling this function on blocks of the dataset; if any of the extreme values are absent from all blocks, they can be used as the missing value placeholder. By contrast, choose_missing_integer_placeholder()
requires access to the full dataset.
- Template Parameters
-
Iterator_ | Forward iterator for integer values. |
Mask_ | Random access iterator for mask values. |
Type_ | Integer type pointed to by Iterator_ . |
- Parameters
-
start | Start of the dataset. |
end | End of the dataset. |
mask | Start of the mask vector. This should have the same length as end - start ; each entry is true if the corresponding value of the integer dataset is masked, and false otherwise. |
- Returns
- Whether extreme values are present in
[start, end)
. If Type_
is unsigned, IntegerExtremes::has_lowest
and IntegerExtremes::has_zero
are the same.