fix and update (#677)

This commit is contained in:
qicosmos 2024-05-15 17:48:22 +08:00 committed by GitHub
parent 9968b5b2e9
commit 6e684c01ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 668 additions and 127 deletions

View File

@ -3,27 +3,48 @@
#include "dragonbox_to_chars.h"
#include "fast_float.h"
#include "iguana/define.h"
#include "itoa.hpp"
namespace iguana {
template <typename T>
struct is_char_type
: std::disjunction<std::is_same<T, char>, std::is_same<T, unsigned char>,
std::is_same<T, signed char>, std::is_same<T, wchar_t>,
: std::disjunction<std::is_same<T, char>, std::is_same<T, wchar_t>,
std::is_same<T, char16_t>, std::is_same<T, char32_t>> {};
inline void *to_chars_float(...) {
throw std::runtime_error("not allowed to invoke");
return {};
}
template <typename T, typename Ret = decltype(to_chars_float(
std::declval<T>(), std::declval<char *>()))>
using return_of_tochars = std::conditional_t<std::is_same_v<Ret, char *>,
std::true_type, std::false_type>;
// here std::true_type is used as a type , any other type is also ok.
using has_to_chars_float = iguana::return_of_tochars<std::true_type>;
namespace detail {
template <typename U>
// check_number==true: check if the string [first, last) is a legal number
template <bool check_number = true, typename U>
std::pair<const char *, std::errc> from_chars(const char *first,
const char *last,
U &value) noexcept {
const char *last, U &value) {
using T = std::decay_t<U>;
if constexpr (std::is_floating_point_v<T>) {
auto [p, ec] = fast_float::from_chars(first, last, value);
if constexpr (check_number) {
if (p != last || ec != std::errc{})
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
}
return {p, ec};
}
else {
auto [p, ec] = std::from_chars(first, last, value);
if constexpr (check_number) {
if (p != last || ec != std::errc{})
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
}
return {p, ec};
}
}
@ -33,7 +54,12 @@ template <typename T>
char *to_chars(char *buffer, T value) noexcept {
using U = std::decay_t<T>;
if constexpr (std::is_floating_point_v<U>) {
return jkj::dragonbox::to_chars(value, buffer);
if constexpr (has_to_chars_float::value) {
return static_cast<char *>(to_chars_float(value, buffer));
}
else {
return jkj::dragonbox::to_chars(value, buffer);
}
}
else if constexpr (std::is_signed_v<U> && (sizeof(U) >= 8)) {
return xtoa(value, buffer, 10, 1); // int64_t
@ -41,9 +67,13 @@ char *to_chars(char *buffer, T value) noexcept {
else if constexpr (std::is_unsigned_v<U> && (sizeof(U) >= 8)) {
return xtoa(value, buffer, 10, 0); // uint64_t
}
else if constexpr (std::is_integral_v<U> && !is_char_type<U>::value) {
else if constexpr (std::is_integral_v<U> && (sizeof(U) > 1)) {
return itoa_fwd(value, buffer); // only support more than 2 bytes intergal
}
else if constexpr (!is_char_type<U>::value) {
return itoa_fwd(static_cast<int>(value),
buffer); // only support more than 2 bytes intergal
}
else {
static_assert(!sizeof(U), "only support arithmetic type except char type");
}

View File

@ -67,6 +67,15 @@ template <typename T, typename... Us>
struct has_type<T, std::tuple<Us...>>
: std::disjunction<std::is_same<T, Us>...> {};
template <class T>
struct member_tratis {};
template <class T, class Owner>
struct member_tratis<T Owner::*> {
using owner_type = Owner;
using value_type = T;
};
template <typename T>
inline constexpr bool is_int64_v =
std::is_same_v<T, int64_t> || std::is_same_v<T, uint64_t>;

View File

@ -68,8 +68,8 @@ IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) {
if (size == 0)
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
const auto start = &*it;
auto [p, ec] = detail::from_chars(start, start + size, value);
if (ec != std::errc{})
auto [p, ec] = detail::from_chars<false>(start, start + size, value);
if (ec != std::errc{} || !can_follow_number(*p))
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
it += (p - &*it);
}
@ -82,9 +82,7 @@ IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) {
buffer[i] = *it++;
++i;
}
auto [p, ec] = detail::from_chars(buffer, buffer + i, value);
if (ec != std::errc{})
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
detail::from_chars(buffer, buffer + i, value);
}
}
@ -499,6 +497,44 @@ IGUANA_INLINE void skip_object_value(It &&it, It &&end) {
}
}
template <typename value_type, typename U, typename It>
IGUANA_INLINE bool from_json_variant_impl(U &value, It it, It end, It &temp_it,
It &temp_end) {
try {
value_type val;
from_json_impl(val, it, end);
value = val;
temp_it = it;
temp_end = end;
return true;
} catch (std::exception &ex) {
return false;
}
}
template <typename U, typename It, size_t... Idx>
IGUANA_INLINE void from_json_variant(U &value, It &it, It &end,
std::index_sequence<Idx...>) {
static_assert(!has_duplicate_type_v<std::remove_reference_t<U>>,
"don't allow same type in std::variant");
bool r = false;
It temp_it = it;
It temp_end = end;
((void)(!r && (r = from_json_variant_impl<
variant_element_t<Idx, std::remove_reference_t<U>>>(
value, it, end, temp_it, temp_end),
true)),
...);
it = temp_it;
end = temp_end;
}
template <typename U, typename It, std::enable_if_t<variant_v<U>, int> = 0>
IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) {
from_json_variant(value, it, end,
std::make_index_sequence<
std::variant_size_v<std::remove_reference_t<U>>>{});
}
} // namespace detail
template <typename T, typename It, std::enable_if_t<refletable_v<T>, int>>
@ -608,6 +644,31 @@ IGUANA_INLINE void from_json(T &value, const View &view) {
from_json(value, std::begin(view), std::end(view));
}
template <
auto member,
typename Parant = typename member_tratis<decltype(member)>::owner_type,
typename T>
IGUANA_INLINE void from_json(T &value, std::string_view str) {
constexpr size_t duplicate_count =
iguana::duplicate_count<std::remove_reference_t<Parant>, member>();
static_assert(duplicate_count != 1, "the member is not belong to the object");
static_assert(duplicate_count == 2, "has duplicate field name");
constexpr auto name = name_of<member>();
constexpr size_t index = index_of<member>();
constexpr size_t member_count = member_count_of<member>();
str = str.substr(str.find(name) + name.size());
size_t pos = str.find(":") + 1;
if constexpr (index == member_count - 1) { // last field
str = str.substr(pos, str.find("}") - pos + 1);
}
else {
str = str.substr(pos, str.find(",") - pos);
}
detail::from_json_impl(value.*member, std::begin(str), std::end(str));
}
template <typename T, typename View,
std::enable_if_t<json_view_v<View>, int> = 0>
IGUANA_INLINE void from_json(T &value, const View &view,

View File

@ -18,10 +18,7 @@ class numeric_str {
if (val_.empty())
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
T res;
auto [_, ec] =
detail::from_chars(val_.data(), val_.data() + val_.size(), res);
if (ec != std::errc{})
IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); }
detail::from_chars(val_.data(), val_.data() + val_.size(), res);
return res;
}
@ -214,4 +211,28 @@ IGUANA_INLINE bool is_numeric(char c) noexcept {
return static_cast<bool>(is_num[static_cast<unsigned int>(c)]);
}
// '\t' '\r' '\n' '"' '}' ']' ',' ' ' '\0'
IGUANA_INLINE bool can_follow_number(char c) noexcept {
static constexpr int can_follow_num[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, // 0
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 2
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, // 5
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, // 7
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F
};
return static_cast<bool>(can_follow_num[static_cast<unsigned int>(c)]);
}
} // namespace iguana

View File

@ -245,6 +245,8 @@ IGUANA_INLINE void to_json_impl(Stream &s, T &&t) {
template <bool Is_writing_escape, typename Stream, typename T,
std::enable_if_t<variant_v<T>, int>>
IGUANA_INLINE void to_json_impl(Stream &s, T &&t) {
static_assert(!has_duplicate_type_v<std::remove_reference_t<T>>,
"don't allow same type in std::variant");
std::visit(
[&s](auto value) {
to_json_impl<Is_writing_escape>(s, value);

View File

@ -910,6 +910,120 @@ constexpr const std::string_view get_name() {
return M::name();
}
namespace detail {
template <typename T, typename U>
constexpr bool get_index_imple(T ptr, U ele) {
if constexpr (std::is_same_v<decltype(ptr), decltype(ele)>) {
if (ele == ptr) {
return true;
}
else {
return false;
}
}
else {
return false;
}
}
template <typename T, typename Tuple, size_t... I>
constexpr size_t member_index_impl(T ptr, Tuple &tp,
std::index_sequence<I...>) {
bool r = false;
size_t index = 0;
((void)(!r && (r = get_index_imple(ptr, std::get<I>(tp)),
!r ? index++ : index, true)),
...);
return index;
}
template <typename T, typename Tuple>
constexpr size_t member_index(T ptr, Tuple &tp) {
return member_index_impl(
ptr, tp,
std::make_index_sequence<
std::tuple_size_v<std::decay_t<decltype(tp)>>>{});
}
} // namespace detail
template <auto member>
constexpr size_t index_of() {
using namespace detail;
using T = typename member_tratis<decltype(member)>::owner_type;
using M = Reflect_members<T>;
constexpr auto tp = M::apply_impl();
constexpr size_t Size = std::tuple_size_v<decltype(tp)>;
constexpr size_t index = member_index(member, tp);
static_assert(index < Size, "out of range");
return index;
}
template <auto... members>
constexpr std::array<size_t, sizeof...(members)> indexs_of() {
return std::array<size_t, sizeof...(members)>{index_of<members>()...};
}
template <auto member>
constexpr auto name_of() {
using T = typename member_tratis<decltype(member)>::owner_type;
using M = Reflect_members<T>;
constexpr auto s = M::arr()[index_of<member>()];
return std::string_view(s.data(), s.size());
}
template <auto... members>
constexpr std::array<std::string_view, sizeof...(members)> names_of() {
return std::array<std::string_view, sizeof...(members)>{
name_of<members>()...};
}
template <auto member>
constexpr auto member_count_of() {
using T = typename member_tratis<decltype(member)>::owner_type;
using M = Reflect_members<T>;
return M::value();
}
template <typename T, auto member>
constexpr size_t duplicate_count();
template <auto ptr, typename Member>
constexpr void check_duplicate(Member member, size_t &index) {
using value_type = typename member_tratis<decltype(member)>::value_type;
if (detail::get_index_imple(ptr, member)) {
index++;
}
if constexpr (is_reflection_v<value_type>) {
index += iguana::duplicate_count<value_type, ptr>();
}
}
template <typename T, auto member>
constexpr size_t duplicate_count() {
using M = Reflect_members<T>;
constexpr auto name = name_of<member>();
constexpr auto arr = M::arr();
constexpr auto tp = M::apply_impl();
size_t index = 0;
std::apply(
[&](auto... ele) {
(check_duplicate<member>(ele, index), ...);
},
tp);
for (auto &s : arr) {
if (s == name) {
index++;
break;
}
}
return index;
}
template <typename T>
constexpr const std::string_view get_fields() {
using M = Reflect_members<T>;

View File

@ -139,6 +139,10 @@ struct is_variant<std::variant<T...>> : std::true_type {};
template <typename T>
constexpr inline bool variant_v = is_variant<std::remove_cvref_t<T>>::value;
template <size_t Idx, typename T>
using variant_element_t = std::remove_reference_t<decltype(std::get<Idx>(
std::declval<std::remove_reference_t<T>>()))>;
template <typename T>
constexpr inline bool refletable_v = is_reflection_v<std::remove_cvref_t<T>>;
@ -201,10 +205,59 @@ inline constexpr auto has_qoute = [](uint64_t chunk) IGUANA__INLINE_LAMBDA {
0b0010001000100010001000100010001000100010001000100010001000100010);
};
template <bool is_xml_serialization = false, typename Stream, typename Ch>
IGUANA_INLINE void write_unicode_to_string(Ch& it, Stream& ss) {
static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
unsigned codepoint = 0;
if (!decode_utf8(it, codepoint))
IGUANA_UNLIKELY { throw std::runtime_error("illegal unicode character"); }
if constexpr (is_xml_serialization) {
ss.append("&#x");
}
else {
ss.push_back('\\');
ss.push_back('u');
}
if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
ss.push_back(hexDigits[(codepoint >> 12) & 15]);
ss.push_back(hexDigits[(codepoint >> 8) & 15]);
ss.push_back(hexDigits[(codepoint >> 4) & 15]);
ss.push_back(hexDigits[(codepoint)&15]);
}
else {
if (codepoint < 0x010000 || codepoint > 0x10FFFF)
IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); }
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
ss.push_back(hexDigits[(lead >> 12) & 15]);
ss.push_back(hexDigits[(lead >> 8) & 15]);
ss.push_back(hexDigits[(lead >> 4) & 15]);
ss.push_back(hexDigits[(lead)&15]);
if constexpr (is_xml_serialization) {
ss.append(";&#x");
}
else {
ss.push_back('\\');
ss.push_back('u');
}
ss.push_back(hexDigits[(trail >> 12) & 15]);
ss.push_back(hexDigits[(trail >> 8) & 15]);
ss.push_back(hexDigits[(trail >> 4) & 15]);
ss.push_back(hexDigits[(trail)&15]);
}
if constexpr (is_xml_serialization) {
ss.push_back(';');
}
}
// https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/writer.h
template <typename Ch, typename SizeType, typename Stream>
inline void write_string_with_escape(const Ch* it, SizeType length,
Stream& ss) {
IGUANA_INLINE void write_string_with_escape(const Ch* it, SizeType length,
Stream& ss) {
static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
static const char escape[256] = {
@ -227,40 +280,7 @@ inline void write_string_with_escape(const Ch* it, SizeType length,
std::advance(end, length);
while (it < end) {
if (static_cast<unsigned>(*it) >= 0x80)
IGUANA_UNLIKELY {
unsigned codepoint = 0;
if (!decode_utf8(it, codepoint))
IGUANA_UNLIKELY {
throw std::runtime_error("illegal unicode character");
}
ss.push_back('\\');
ss.push_back('u');
if (codepoint <= 0xD7FF ||
(codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
ss.push_back(hexDigits[(codepoint >> 12) & 15]);
ss.push_back(hexDigits[(codepoint >> 8) & 15]);
ss.push_back(hexDigits[(codepoint >> 4) & 15]);
ss.push_back(hexDigits[(codepoint)&15]);
}
else {
if (codepoint < 0x010000 || codepoint > 0x10FFFF)
IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); }
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
ss.push_back(hexDigits[(lead >> 12) & 15]);
ss.push_back(hexDigits[(lead >> 8) & 15]);
ss.push_back(hexDigits[(lead >> 4) & 15]);
ss.push_back(hexDigits[(lead)&15]);
ss.push_back('\\');
ss.push_back('u');
ss.push_back(hexDigits[(trail >> 12) & 15]);
ss.push_back(hexDigits[(trail >> 8) & 15]);
ss.push_back(hexDigits[(trail >> 4) & 15]);
ss.push_back(hexDigits[(trail)&15]);
}
}
IGUANA_UNLIKELY { write_unicode_to_string(it, ss); }
else if (escape[static_cast<unsigned char>(*it)])
IGUANA_UNLIKELY {
ss.push_back('\\');
@ -281,4 +301,41 @@ inline void write_string_with_escape(const Ch* it, SizeType length,
}
}
template <typename T, size_t N>
IGUANA_INLINE constexpr bool has_duplicate(const std::array<T, N>& arr) {
for (int i = 0; i < arr.size(); i++) {
for (int j = i + 1; j < arr.size(); j++) {
if (arr[i] == arr[j]) {
return true;
}
}
}
return false;
}
#if defined(__clang__) || defined(_MSC_VER) || \
(defined(__GNUC__) && __GNUC__ > 8)
template <typename... Types>
IGUANA_INLINE constexpr bool has_duplicate_type() {
std::array<std::string_view, sizeof...(Types)> arr{
iguana::type_string<Types>()...};
return has_duplicate(arr);
}
template <typename T>
struct has_duplicate_type_in_variant : std::false_type {};
template <typename... Us>
struct has_duplicate_type_in_variant<std::variant<Us...>> {
inline constexpr static bool value = has_duplicate_type<Us...>();
};
template <typename T>
constexpr inline bool has_duplicate_type_v =
has_duplicate_type_in_variant<T>::value;
#else
template <typename T>
constexpr inline bool has_duplicate_type_v = false;
#endif
} // namespace iguana

View File

@ -0,0 +1,8 @@
#pragma once
// Note: Update the version when release a new version.
// IGUANA_VERSION % 100 is the sub-minor version
// IGUANA_VERSION / 100 % 1000 is the minor version
// IGUANA_VERSION / 100000 is the major version
#define IGUANA_VERSION 100004 // 1.0.4

View File

@ -28,15 +28,26 @@ template <typename U, typename It, std::enable_if_t<plain_v<U>, int> = 0>
IGUANA_INLINE void parse_value(U &&value, It &&begin, It &&end) {
using T = std::decay_t<U>;
if constexpr (string_container_v<T>) {
value = T(&*begin, static_cast<size_t>(std::distance(begin, end)));
if constexpr (string_view_v<T>) {
value = T(&*begin, static_cast<size_t>(std::distance(begin, end)));
}
else {
// TODO: When not parsing the value in the attribute, it is not necessary
// to unescape'and "
value.clear();
auto pre = begin;
while (advance_until_character<'&'>(begin, end)) {
value.append(T(&*pre, static_cast<size_t>(std::distance(pre, begin))));
parse_escape_xml(value, begin, end);
pre = begin;
}
value.append(T(&*pre, static_cast<size_t>(std::distance(pre, begin))));
}
}
else if constexpr (num_v<T>) {
auto size = std::distance(begin, end);
const auto start = &*begin;
auto [p, ec] = detail::from_chars(start, start + size, value);
if (ec != std::errc{})
IGUANA_UNLIKELY
throw std::runtime_error("Failed to parse number");
detail::from_chars(start, start + size, value);
}
else if constexpr (char_v<T>) {
if (static_cast<size_t>(std::distance(begin, end)) != 1)
@ -90,9 +101,19 @@ IGUANA_INLINE void parse_attr(U &&value, It &&it, It &&end) {
parse_value(key, key_begin, key_end);
skip_sapces_and_newline(it, end);
match<'"'>(it, end);
auto value_begin = it;
auto value_end = skip_pass<'"'>(it, end);
auto value_begin = it + 1;
auto value_end = value_begin;
if (*it == '"')
IGUANA_LIKELY {
++it;
value_end = skip_pass<'"'>(it, end);
}
else if (*it == '\'') {
++it;
value_end = skip_pass<'\''>(it, end);
}
else
IGUANA_UNLIKELY { throw std::runtime_error("expected quote or apos"); }
value_type v;
parse_value(v, value_begin, value_end);
value.emplace(std::move(key), std::move(v));
@ -121,17 +142,8 @@ IGUANA_INLINE void parse_item(U &value, It &&it, It &&end,
match<'<'>(it, end);
if (*it == '?' || *it == '!')
IGUANA_UNLIKELY {
// skip <?
if (*(it + 1) == '[') {
--it;
return;
}
else {
skip_till<'>'>(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
}
--it;
return;
}
auto start = it;
skip_till_greater_or_space(it, end);
@ -223,24 +235,48 @@ IGUANA_INLINE void skip_object_value(It &&it, It &&end, std::string_view name) {
throw std::runtime_error("unclosed tag: " + std::string(name));
}
// skip <?...?>
template <typename It>
IGUANA_INLINE void skip_instructions(It &&it, It &&end) {
while (*(it - 1) != '?') {
++it;
skip_till<'>'>(it, end);
}
++it;
}
template <typename It>
IGUANA_INLINE void skip_cdata(It &&it, It &&end) {
++it;
skip_till<']'>(it, end);
++it;
match<']', '>'>(it, end);
}
template <typename It>
IGUANA_INLINE void skip_comment(It &&it, It &&end) {
while (*(it - 1) != '-' || *(it - 2) != '-') {
++it;
skip_till<'>'>(it, end);
}
++it;
}
// return true means reach the close tag
template <size_t cdata_idx, typename T, typename It,
std::enable_if_t<refletable_v<T>, int> = 0>
IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) {
skip_sapces_and_newline(it, end);
IGUANA_INLINE auto skip_till_close_tag(T &value, It &&it, It &&end) {
while (true) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '/')
IGUANA_UNLIKELY {
// </tag>
return true; // reach the close tag
// reach the close tag
return true;
}
else if (*it == '?')
IGUANA_UNLIKELY {
// <? ... ?>
skip_till<'>'>(it, end);
++it;
skip_sapces_and_newline(it, end);
skip_instructions(it, end);
continue;
}
else if (*it == '!')
@ -249,12 +285,7 @@ IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) {
if (*it == '[') {
// <![
if constexpr (cdata_idx == iguana::get_value<std::decay_t<T>>()) {
++it;
skip_till<']'>(it, end);
++it;
match<']', '>'>(it, end);
skip_sapces_and_newline(it, end);
continue;
skip_cdata(it, end);
}
else {
// if parse cdata
@ -274,23 +305,53 @@ IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) {
&*vb, static_cast<size_t>(std::distance(vb, ve)));
}
match<']', '>'>(it, end);
skip_sapces_and_newline(it, end);
continue;
}
}
else {
else if (*it == '-') {
// <!-- -->
// <!D
skip_comment(it, end);
}
else {
// <!D... >
skip_till<'>'>(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
}
continue;
}
return false;
}
}
template <typename It>
IGUANA_INLINE void skip_till_first_key(It &&it, It &&end) {
while (it != end) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '?')
IGUANA_UNLIKELY {
skip_instructions(it, end);
continue;
}
else if (*it == '!')
IGUANA_UNLIKELY {
++it;
if (*it == '-') {
// <!-- -->
skip_comment(it, end);
}
else {
// <!D... >
skip_till<'>'>(it, end);
++it;
}
continue;
}
else {
break;
}
}
}
template <typename T>
IGUANA_INLINE void check_required(std::string_view key_set) {
if constexpr (iguana::has_iguana_required_arr_v<T>) {
@ -313,7 +374,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
constexpr auto cdata_idx = get_type_index<is_cdata_t, U>();
skip_till<'>'>(it, end);
++it;
if (skip_till_key<cdata_idx>(value, it, end)) {
if (skip_till_close_tag<cdata_idx>(value, it, end)) {
match_close_tag(it, end, name);
return;
}
@ -344,7 +405,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
key_set.append(key).append(", ");
}
}
if (skip_till_key<cdata_idx>(value, it, end))
if (skip_till_close_tag<cdata_idx>(value, it, end))
IGUANA_UNLIKELY {
match_close_tag(it, end, name);
parse_done = true;
@ -389,7 +450,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
skip_object_value(it, end, key);
#endif
}
if (skip_till_key<cdata_idx>(value, it, end)) {
if (skip_till_close_tag<cdata_idx>(value, it, end)) {
match_close_tag(it, end, name);
check_required<U>(key_set);
return;
@ -405,17 +466,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
template <typename It, typename U, std::enable_if_t<attr_v<U>, int> = 0>
IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) {
while (it != end) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '?') {
skip_till<'>'>(it, end);
++it;
}
else {
break;
}
}
detail::skip_till_first_key(it, end);
auto start = it;
skip_till_greater_or_space(it, end);
std::string_view key =
@ -426,17 +477,7 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) {
template <typename It, typename U, std::enable_if_t<refletable_v<U>, int> = 0>
IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) {
while (it != end) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '?') {
skip_till<'>'>(it, end);
++it; // skip >
}
else {
break;
}
}
detail::skip_till_first_key(it, end);
auto start = it;
skip_till_greater_or_space(it, end);
std::string_view key =

View File

@ -2,8 +2,8 @@
#include "util.hpp"
namespace iguana {
template <typename T, typename map_type = std::unordered_map<std::string_view,
std::string_view>>
template <typename T,
typename map_type = std::unordered_map<std::string, std::string>>
class xml_attr_t {
public:
T &value() { return val_; }
@ -17,6 +17,10 @@ class xml_attr_t {
map_type attr_;
};
template <typename T>
using xml_attr_view_t =
xml_attr_t<T, std::map<std::string_view, std::string_view>>;
template <typename T = std::string_view,
std::enable_if_t<string_container_v<T>, int> = 0>
class xml_cdata_t {
@ -72,12 +76,24 @@ inline constexpr auto has_square_bracket =
0b0101110101011101010111010101110101011101010111010101110101011101);
};
inline constexpr auto has_and = [](uint64_t chunk) IGUANA__INLINE_LAMBDA {
return has_zero(
chunk ^
0b0010011000100110001001100010011000100110001001100010011000100110);
};
inline constexpr auto has_equal = [](uint64_t chunk) IGUANA__INLINE_LAMBDA {
return has_zero(
chunk ^
0b0011110100111101001111010011110100111101001111010011110100111101);
};
inline constexpr auto has_apos = [](uint64_t chunk) IGUANA__INLINE_LAMBDA {
return has_zero(
chunk ^
0b0010011100100111001001110010011100100111001001110010011100100111);
};
template <typename It>
IGUANA_INLINE void skip_sapces_and_newline(It &&it, It &&end) {
while (it != end && (static_cast<uint8_t>(*it) < 33)) {
@ -104,6 +120,35 @@ IGUANA_INLINE void match_close_tag(It &&it, It &&end, std::string_view key) {
// ++it;
}
// returns true if the specified character 'c' is found, false otherwise.
template <char c, typename It>
IGUANA_INLINE bool advance_until_character(It &&it, It &&end) {
static_assert(contiguous_iterator<std::decay_t<It>>);
if (std::distance(it, end) >= 7)
IGUANA_LIKELY {
const auto end_m7 = end - 7;
for (; it < end_m7; it += 8) {
const auto chunk = *reinterpret_cast<const uint64_t *>(&*it);
uint64_t test;
if constexpr (c == '&')
test = has_and(chunk);
else
static_assert(!c, "not support this character");
if (test != 0) {
it += (countr_zero(test) >> 3);
return true;
}
}
}
// Tail end of buffer. Should be rare we even get here
while (it < end) {
if (*it == c)
return true;
++it;
}
return false;
}
template <char c, typename It>
IGUANA_INLINE void skip_till(It &&it, It &&end) {
static_assert(contiguous_iterator<std::decay_t<It>>);
@ -126,6 +171,8 @@ IGUANA_INLINE void skip_till(It &&it, It &&end) {
test = has_square_bracket(chunk);
else if constexpr (c == '=')
test = has_equal(chunk);
else if constexpr (c == '\'')
test = has_apos(chunk);
else
static_assert(!c, "not support this character");
if (test != 0) {
@ -186,4 +233,104 @@ IGUANA_INLINE auto skip_pass(It &&it, It &&end) {
return res + 1;
}
template <char... C, typename It>
IGUANA_INLINE bool is_match(It &&it, const It &end) {
const auto n = static_cast<size_t>(std::distance(it, end));
if ((n < sizeof...(C)) || (... || (*it++ != C))) {
return false;
}
return true;
}
template <typename U, typename It, std::enable_if_t<string_v<U>, int> = 0>
IGUANA_INLINE void parse_escape_xml(U &value, It &&it, It &&end) {
static const unsigned char lookup_digits[256] = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255};
switch (*(it + 1)) {
// &amp; &apos;
case 'a':
if (is_match<'m', 'p', ';'>(it + 2, end)) {
value.push_back('&');
it += 5;
return;
}
if (is_match<'p', 'o', 's', ';'>(it + 2, end)) {
value.push_back('\'');
it += 6;
}
break;
// &quot;
case 'q':
if (is_match<'u', 'o', 't', ';'>(it + 2, end)) {
value.push_back('\"');
it += 6;
}
break;
// &gt;
case 'g':
if (is_match<'t', ';'>(it + 2, end)) {
value.push_back('>');
it += 4;
}
break;
// &lt;
case 'l':
if (is_match<'t', ';'>(it + 2, end)) {
value.push_back('<');
it += 4;
}
break;
case '#':
if (*(it + 2) == 'x') {
// &#x
unsigned long codepoint = 0;
it += 3;
while (true) {
auto digit = lookup_digits[static_cast<unsigned char>(*it)];
if (digit == 0xFF)
break;
codepoint = codepoint * 16 + digit;
++it;
}
encode_utf8(value, codepoint);
}
else {
unsigned long codepoint = 0;
it += 2;
while (true) {
auto digit = lookup_digits[static_cast<unsigned char>(*it)];
if (digit == 0xFF)
break;
codepoint = codepoint * 10 + digit;
++it;
}
encode_utf8(value, codepoint);
}
match<';'>(it, end);
break;
default:
// skip '&'
// loose policy: allow '&'
value.push_back(*(it++));
break;
}
}
} // namespace iguana

View File

@ -6,6 +6,58 @@
namespace iguana {
#ifdef XML_ATTR_USE_APOS
#define XML_ATTR_DELIMITER '\''
#else
#define XML_ATTR_DELIMITER '\"'
#endif
// TODO: improve by precaculate size
template <bool escape_quote_apos, typename Ch, typename SizeType,
typename Stream>
IGUANA_INLINE void render_string_with_escape_xml(const Ch *it, SizeType length,
Stream &ss) {
auto end = it;
std::advance(end, length);
while (it < end) {
#ifdef XML_ESCAPE_UNICODE
if (static_cast<unsigned>(*it) >= 0x80)
IGUANA_UNLIKELY {
write_unicode_to_string<true>(it, ss);
continue;
}
#endif
if constexpr (escape_quote_apos) {
if constexpr (XML_ATTR_DELIMITER == '\"') {
if (*it == '"')
IGUANA_UNLIKELY {
ss.append("&quot;");
++it;
continue;
}
}
else {
if (*it == '\'')
IGUANA_UNLIKELY {
ss.append("&apos;");
++it;
continue;
}
}
}
if (*it == '&')
IGUANA_UNLIKELY { ss.append("&amp;"); }
else if (*it == '>')
IGUANA_UNLIKELY { ss.append("&gt;"); }
else if (*it == '<')
IGUANA_UNLIKELY { ss.append("&lt;"); }
else {
ss.push_back(*it);
}
++it;
}
}
template <bool pretty, size_t spaces, typename Stream, typename T,
std::enable_if_t<sequence_container_v<T>, int> = 0>
IGUANA_INLINE void render_xml_value(Stream &ss, const T &value,
@ -39,10 +91,12 @@ IGUANA_INLINE void render_head(Stream &ss, std::string_view str) {
ss.push_back('>');
}
template <typename Stream, typename T, std::enable_if_t<plain_v<T>, int> = 0>
template <bool escape_quote_apos = false, typename Stream, typename T,
std::enable_if_t<plain_v<T>, int> = 0>
IGUANA_INLINE void render_value(Stream &ss, const T &value) {
if constexpr (string_container_v<T>) {
ss.append(value.data(), value.size());
render_string_with_escape_xml<escape_quote_apos>(value.data(), value.size(),
ss);
}
else if constexpr (num_v<T>) {
char temp[65];
@ -91,9 +145,9 @@ inline void render_xml_attr(Stream &ss, const T &value, std::string_view name) {
ss.push_back(' ');
render_value(ss, k);
ss.push_back('=');
ss.push_back('"');
render_value(ss, v);
ss.push_back('"');
ss.push_back(XML_ATTR_DELIMITER);
render_value<true>(ss, v);
ss.push_back(XML_ATTR_DELIMITER);
}
ss.push_back('>');
}

View File

@ -113,10 +113,7 @@ IGUANA_INLINE void parse_value(U &value, It &&value_begin, It &&value_end) {
IGUANA_UNLIKELY { return; }
auto size = std::distance(value_begin, value_end);
const auto start = &*value_begin;
auto [p, ec] = detail::from_chars(start, start + size, value);
if (ec != std::errc{})
IGUANA_UNLIKELY
throw std::runtime_error("Failed to parse number");
detail::from_chars(start, start + size, value);
}
// string_view should be used for string with ' " ?