Browse Source

Initial pass for generalized encoding support for parser

master
Jessica James 3 years ago
parent
commit
548d8c62e0
  1. 11
      src/common/parser/parser.cpp
  2. 148
      src/common/parsers/json.cpp
  3. 6
      src/common/serialize.cpp
  4. 84
      src/include/jessilib/parser.hpp
  5. 181
      src/include/jessilib/parsers/json.hpp
  6. 12
      src/include/jessilib/serialize.hpp
  7. 61
      src/include/jessilib/unicode_base.hpp
  8. 57
      src/test/parser.cpp
  9. 45
      src/test/parsers/json.cpp

11
src/common/parser/parser.cpp

@ -22,8 +22,8 @@
namespace jessilib {
object parser::deserialize(std::istream& in_stream) {
std::vector<char8_t> data;
object parser::deserialize_bytes(std::istream& in_stream, encoding in_read_encoding) {
std::vector<byte_type> data;
// Read entire stream into data
char buffer[1024];
@ -33,12 +33,13 @@ object parser::deserialize(std::istream& in_stream) {
}
// Pass data to deserialize
return deserialize(std::u8string_view{ &data.front(), data.size() });
return deserialize_bytes(bytes_view_type{ &data.front(), data.size() }, in_read_encoding);
}
void parser::serialize(std::ostream& in_stream, const object& in_object) {
void parser::serialize_bytes(std::ostream& in_stream, const object& in_object, encoding in_write_encoding) {
// TODO: replace this method
in_stream << jessilib::string_view_cast<char>(serialize(in_object));
auto bytes = serialize_bytes(in_object, in_write_encoding);
in_stream << bytes;
}
} // namespace jessilib

148
src/common/parsers/json.cpp

@ -17,141 +17,53 @@
*/
#include "parsers/json.hpp"
#include <charconv>
using namespace std::literals;
namespace jessilib {
template<typename CharT>
std::basic_string<CharT> make_json_string(std::u8string_view in_string) {
std::basic_string<CharT> result;
result.reserve(in_string.size() + 2);
result = '\"';
decode_result decode;
while ((decode = decode_codepoint(in_string)).units != 0) {
if (decode.codepoint == U'\\') { // backslash
result += '\\';
result += '\\';
}
else if (decode.codepoint == U'\"') { // quotation
result += '\\';
result += '\"';
}
else if (decode.codepoint < 0x20) { // control characters
result += '\\';
result += 'u';
result += '0';
result += '0';
// overwrite last 2 zeroes with correct hexadecimal sequence
char data[2]; // Will only ever use 2 chars
char* data_end = data + sizeof(data);
auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16);
if (to_chars_result.ptr == data) {
// No bytes written
result += '0';
result += '0';
}
else if (to_chars_result.ptr != data_end) {
// 1 byte written
result += '0';
result += data[0];
}
else {
// 2 bytes written
result += data[0];
result += data[1];
}
}
else {
// Valid UTF-8 sequence; copy it over
result.append(in_string.data(), decode.units);
}
in_string.remove_prefix(decode.units);
}
result += '\"';
return result;
}
object json_parser::deserialize(std::u8string_view in_data) {
object json_parser::deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) {
object result;
if (in_write_encoding == encoding::utf_8) {
std::u8string_view data_view = jessilib::string_view_cast<char8_t>(in_data);
deserialize_json<char8_t, true>(result, data_view);
return result;
}
std::u8string json_parser::serialize(const object& in_object) {
static const object::array_type s_null_array;
static const object::map_type s_null_map;
switch (in_object.type()) {
case object::type::null:
return u8"null"s;
case object::type::boolean:
if (in_object.get<bool>()) {
return u8"true"s;
else if (in_write_encoding == encoding::utf_16) {
std::u16string_view data_view = jessilib::string_view_cast<char16_t>(in_data);
deserialize_json<char16_t, true>(result, data_view);
}
return u8"false"s;
case object::type::integer:
return static_cast<std::u8string>(jessilib::string_view_cast<char8_t>(std::to_string(in_object.get<intmax_t>())));
case object::type::decimal:
return static_cast<std::u8string>(jessilib::string_view_cast<char8_t>(std::to_string(in_object.get<long double>())));
case object::type::text:
return make_json_string<char8_t>(in_object.get<std::u8string>());
case object::type::array: {
if (in_object.size() == 0) {
return u8"[]"s;
else if (in_write_encoding == encoding::utf_32) {
std::u32string_view data_view = jessilib::string_view_cast<char32_t>(in_data);
deserialize_json<char32_t, true>(result, data_view);
}
std::u8string result;
result = '[';
// Serialize all objects in array
for (auto& obj : in_object.get<object::array_type>(s_null_array)) {
result += json_parser::serialize(obj);
result += ',';
else if (in_write_encoding == encoding::wchar) {
std::wstring_view data_view = jessilib::string_view_cast<wchar_t>(in_data);
deserialize_json<wchar_t, true>(result, data_view);
}
// Return result
result.back() = ']';
return result;
}
case object::type::map: {
if (in_object.size() == 0) {
return u8"{}"s;
}
std::u8string result;
result = '{';
// Serialize all objects in map
for (auto& item : in_object.get<object::map_type>(s_null_map)) {
result += make_json_string<char8_t>(item.first);
result += ':';
result += json_parser::serialize(item.second);
result += ',';
else if (in_write_encoding == encoding::multibyte) {
// TODO: support without copying... somehow
auto u8_data = mbstring_to_ustring<char8_t>(jessilib::string_view_cast<char>(in_data));
std::u8string_view data_view = u8_data.second;
deserialize_json<char8_t, true>(result, data_view);
}
// Return result
result.back() = '}';
return result;
}
std::string json_parser::serialize_bytes(const object& in_object, encoding in_write_encoding) {
switch (in_write_encoding) {
case encoding::utf_8:
return serialize_impl<char8_t, char>(in_object);
case encoding::utf_16:
return serialize_impl<char16_t, char>(in_object);
case encoding::utf_32:
return serialize_impl<char16_t, char>(in_object);
case encoding::wchar:
return serialize_impl<char16_t, char>(in_object);
default:
throw std::invalid_argument{ "Invalid data type: " + std::to_string(static_cast<size_t>(in_object.type())) };
break;
}
return {};
}
} // namespace jessilib

6
src/common/serialize.cpp

@ -54,16 +54,16 @@ object deserialize_object(std::u8string_view in_data, const std::string& in_form
}
object deserialize_object(std::istream& in_stream, const std::string& in_format) {
return get_parser(in_format)->deserialize(in_stream);
return get_parser(in_format)->deserialize_bytes(in_stream, encoding::utf_8);
}
/** Serialization */
std::u8string serialize_object(const object& in_object, const std::string& in_format) {
return get_parser(in_format)->serialize(in_object);
return get_parser(in_format)->serialize<char8_t>(in_object);
}
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format) {
get_parser(in_format)->serialize(in_stream, in_object);
get_parser(in_format)->serialize_bytes(in_stream, in_object, encoding::utf_8);
}
} // namespace jessilib

84
src/include/jessilib/parser.hpp

@ -20,6 +20,7 @@
#include <memory>
#include "object.hpp"
#include "unicode_base.hpp"
#include "impl/parser_manager.hpp"
namespace jessilib {
@ -27,20 +28,50 @@ namespace jessilib {
class parser {
public:
virtual ~parser() = default;
using byte_type = uint8_t;
using bytes_view_type = std::basic_string_view<byte_type>;
/** Interface methods */
/**
* Deserializes an object directly from a stream
* Deserializes an object directly from a stream of bytes
* May throw: invalid_argument
*
* @param in_stream Stream to deserialize object from
* @return A valid (possibly null) object
*/
virtual object deserialize(std::istream& in_stream);
virtual object deserialize(std::u8string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings
virtual void serialize(std::ostream& in_stream, const object& in_object);
virtual std::u8string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings
virtual object deserialize_bytes(std::istream& in_stream, encoding in_read_encoding);
virtual object deserialize_bytes(bytes_view_type in_data, encoding in_read_encoding) = 0;
virtual void serialize_bytes(std::ostream& in_stream, const object& in_object, encoding in_write_encoding);
virtual std::string serialize_bytes(const object& in_object, encoding in_write_encoding) = 0;
template<typename CharT>
object deserialize(std::basic_string_view<CharT> in_text) {
bytes_view_type byte_view{ reinterpret_cast<const byte_type*>(in_text.data()), in_text.size() * sizeof(CharT) };
return deserialize_bytes(byte_view, default_encoding_info<CharT>::text_encoding);
}
// Perhaps this could be condensed down to a simple method such that: serialize(out_variant, in_object, encoding)?
virtual std::u8string serialize_u8(const object& in_object) = 0;
virtual std::u16string serialize_u16(const object& in_object) = 0;
virtual std::u32string serialize_u32(const object& in_object) = 0;
virtual std::wstring serialize_w(const object& in_object) = 0;
template<typename CharT>
std::basic_string<CharT> serialize(const object& in_object) {
if constexpr (std::is_same_v<CharT, char8_t>) {
return serialize_u8(in_object);
}
else if constexpr (std::is_same_v<CharT, char16_t>) {
return serialize_u16(in_object);
}
else if constexpr (std::is_same_v<CharT, char32_t>) {
return serialize_u32(in_object);
}
else if constexpr (std::is_same_v<CharT, wchar_t>) {
return serialize_w(in_object);
}
}
}; // parser
template<typename T>
@ -57,4 +88,47 @@ public:
impl::parser_manager::id m_id;
}; // parser_registration
template<typename OutCharT, typename ResultCharT = OutCharT>
std::basic_string<ResultCharT> simple_copy(std::u8string_view in_string) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
return { in_string.begin(), in_string.end() };
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_string into result _as if_ result were of OutCharT
for (OutCharT codepoint : in_string) {
// TODO: Assuming native for now, but we need to account for endianness later
return { reinterpret_cast<const char*>(&codepoint), sizeof(codepoint) };
}
}
// else // Invalid use of simple_copy
}
template<typename OutCharT, typename ResultCharT>
void simple_append(std::basic_string<ResultCharT>& out_string, std::u8string_view in_string) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
out_string.append(in_string.begin(), in_string.end());
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_string into result _as if_ result were of OutCharT
for (OutCharT codepoint : in_string) {
// TODO: Assuming native for now, but we need to account for endianness later
out_string.append(reinterpret_cast<const char*>(&codepoint), sizeof(codepoint));
}
}
// else // Invalid use of simple_append
}
template<typename OutCharT, typename ResultCharT>
void simple_append(std::basic_string<ResultCharT>& out_string, char8_t in_character) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
out_string += in_character;
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_character into result _as if_ result were of OutCharT
OutCharT codepoint = in_character;
// TODO: Assuming native for now, but we need to account for endianness later
out_string.append(reinterpret_cast<const char*>(&codepoint), sizeof(codepoint));
}
}
} // namespace jessilib

181
src/include/jessilib/parsers/json.hpp

@ -18,6 +18,7 @@
#pragma once
#include "fmt/xchar.h" // fmt::format
#include "jessilib/parser.hpp"
#include "jessilib/unicode.hpp" // join
#include "jessilib/unicode_syntax.hpp" // syntax trees
@ -29,8 +30,23 @@ namespace jessilib {
class json_parser : public parser {
public:
/** deserialize/serialize overrides */
virtual object deserialize(std::u8string_view in_data) override;
virtual std::u8string serialize(const object& in_object) override;
object deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) override;
std::string serialize_bytes(const object& in_object, encoding in_write_encoding) override;
std::u8string serialize_u8(const object& in_object) override { return serialize_impl<char8_t>(in_object); }
std::u16string serialize_u16(const object& in_object) override { return serialize_impl<char16_t>(in_object); }
std::u32string serialize_u32(const object& in_object) override { return serialize_impl<char32_t>(in_object); }
std::wstring serialize_w(const object& in_object) override { return serialize_impl<wchar_t>(in_object); }
template<typename CharT, typename ResultCharT = CharT>
std::basic_string<ResultCharT> serialize_impl(const object& in_object) {
std::basic_string<ResultCharT> result;
serialize_impl<CharT, ResultCharT>(result, in_object);
return result;
}
template<typename CharT, typename ResultCharT = CharT>
void serialize_impl(std::basic_string<ResultCharT>& out_string, const object& in_object);
};
/**
@ -451,4 +467,165 @@ bool deserialize_json(object& out_object, std::basic_string_view<CharT>& inout_r
(context, inout_read_view);
}
template<typename CharT, typename ResultCharT>
void make_json_string(std::basic_string<ResultCharT>& out_string, std::u8string_view in_string) {
using namespace std::literals;
out_string.reserve(out_string.size() + in_string.size() + 2);
simple_append<CharT, ResultCharT>(out_string, '\"');
decode_result decode;
while ((decode = decode_codepoint(in_string)).units != 0) {
if (decode.codepoint == U'\\') { // backslash
simple_append<CharT, ResultCharT>(out_string, u8"\\\\"sv);
}
else if (decode.codepoint == U'\"') { // quotation
simple_append<CharT, ResultCharT>(out_string, u8"\\\""sv);
}
else if (decode.codepoint < 0x20) { // control characters
simple_append<CharT, ResultCharT>(out_string, u8"\\u00"sv);
// overwrite last 2 zeroes with correct hexadecimal sequence
char data[2]; // Will only ever use 2 chars
char* data_end = data + sizeof(data);
auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16);
if (to_chars_result.ptr == data) {
// No bytes written
simple_append<CharT, ResultCharT>(out_string, u8"00"sv);
}
else if (to_chars_result.ptr != data_end) {
// 1 byte written
simple_append<CharT, ResultCharT>(out_string, '0');
simple_append<CharT, ResultCharT>(out_string, data[0]);
}
else {
// 2 bytes written
simple_append<CharT, ResultCharT>(out_string, std::u8string_view{ reinterpret_cast<char8_t*>(data), sizeof(data) });
}
}
else {
if constexpr (sizeof(CharT) == sizeof(char8_t) && sizeof(CharT) == sizeof(ResultCharT)) {
// Valid UTF-8 sequence; copy it over
out_string.append(reinterpret_cast<const ResultCharT*>(in_string.data()), decode.units);
}
else if constexpr (sizeof(CharT) == sizeof(ResultCharT)){
// Valid UTF-8 codepoint; append it
encode_codepoint(out_string, decode.codepoint);
}
else {
// Valid UTF-8 codepoint; encode & append it
encode_buffer_type<CharT> buffer;
size_t units_written = encode_codepoint(buffer, decode.codepoint);
out_string.append(reinterpret_cast<ResultCharT*>(buffer), units_written * sizeof(CharT));
}
}
in_string.remove_prefix(decode.units);
}
simple_append<CharT, ResultCharT>(out_string, '\"');
}
template<typename CharT>
static constexpr CharT empty_format_arg[3]{ '{', '}', 0 };
template<typename CharT, typename ResultCharT>
void json_parser::serialize_impl(std::basic_string<ResultCharT>& out_string, const object& in_object) {
using namespace std::literals;
static const object::array_type s_null_array;
static const object::map_type s_null_map;
switch (in_object.type()) {
case object::type::null:
simple_append<CharT, ResultCharT>(out_string, u8"null"sv);
return;
case object::type::boolean:
if (in_object.get<bool>()) {
simple_append<CharT, ResultCharT>(out_string, u8"true"sv);
return;
}
simple_append<CharT, ResultCharT>(out_string, u8"false"sv);
return;
case object::type::integer:
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string += fmt::format(empty_format_arg<ResultCharT>, in_object.get<intmax_t>());
}
else if constexpr (std::is_same_v<ResultCharT, char>){
auto encoded = fmt::format(empty_format_arg<CharT>, in_object.get<intmax_t>());
out_string.append(reinterpret_cast<ResultCharT*>(encoded.data()), encoded.size() * sizeof(CharT));
}
return;
case object::type::decimal:
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string += fmt::format(empty_format_arg<ResultCharT>, in_object.get<long double>());
}
else if constexpr (std::is_same_v<ResultCharT, char>){
auto encoded = fmt::format(empty_format_arg<CharT>, in_object.get<long double>());
out_string.append(reinterpret_cast<ResultCharT*>(encoded.data()), encoded.size() * sizeof(CharT));
}
return;
case object::type::text:
make_json_string<CharT, ResultCharT>(out_string, in_object.get<std::u8string>());
return;
case object::type::array: {
if (in_object.size() == 0) {
simple_append<CharT, ResultCharT>(out_string, u8"[]"sv);
}
simple_append<CharT, ResultCharT>(out_string, '[');
// Serialize all objects in array
for (auto& obj : in_object.get<object::array_type>(s_null_array)) {
json_parser::serialize_impl<CharT, ResultCharT>(out_string, obj);
simple_append<CharT, ResultCharT>(out_string, ',');
}
// Replace last comma with ']'
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string.back() = ']';
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
out_string.erase(out_string.size() - sizeof(CharT));
simple_append<CharT, ResultCharT>(out_string, ']');
}
// else // not supported
return;
}
case object::type::map: {
if (in_object.size() == 0) {
simple_append<CharT, ResultCharT>(out_string, u8"{}"sv);
}
simple_append<CharT, ResultCharT>(out_string, '{');
// Serialize all objects in map
for (auto& item : in_object.get<object::map_type>(s_null_map)) {
make_json_string<CharT, ResultCharT>(out_string, item.first);
simple_append<CharT, ResultCharT>(out_string, ':');
json_parser::serialize_impl<CharT, ResultCharT>(out_string, item.second);
simple_append<CharT, ResultCharT>(out_string, ',');
}
// Replace last comma with '}'
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string.back() = '}';
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
out_string.erase(out_string.size() - sizeof(CharT));
simple_append<CharT, ResultCharT>(out_string, '}');
}
// else // not supported
return;
}
default:
throw std::invalid_argument{ "Invalid data type: " + std::to_string(static_cast<size_t>(in_object.type())) };
}
}
} // namespace jessilib

12
src/include/jessilib/serialize.hpp

@ -29,13 +29,15 @@ public:
};
/** Deserialization */
object deserialize_object(const std::u8string& in_data, const std::string& in_format);
object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format);
object deserialize_object(std::u8string_view in_data, const std::string& in_format);
object deserialize_object(std::istream& in_stream, const std::string& in_format);
object deserialize_object(std::u16string_view in_data, const std::string& in_format);
object deserialize_object(std::u32string_view in_data, const std::string& in_format);
object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format);
//object deserialize_object(std::u8string_view in_data, const std::string& in_format);
object deserialize_object(std::istream& in_stream, const std::string& in_format); // TODO: add encoding param
/** Serialization */
std::u8string serialize_object(const object& in_object, const std::string& in_format);
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format);
std::u8string serialize_object(const object& in_object, const std::string& in_format); // TODO: templatize?
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format); // TODO: add encoding param
} // namespace jessilib

61
src/include/jessilib/unicode_base.hpp

@ -169,6 +169,67 @@ struct unicode_traits<wchar_t> : std::true_type {
template<typename CharT>
using encode_buffer_type = CharT[unicode_traits<CharT>::max_units_per_codepoint];
// enum representing the character encodings I intend to support
enum class encoding {
utf_8, // The most common and arguably superior encoding for files and networking protocols not in straight ASCII
utf_16,
utf_32,
wchar, // essentially only really for std::wcout / std::wcout
multibyte // essentially only really for std::cout / std::cin
};
template<encoding EncodingV>
struct encoding_info;
template<>
struct encoding_info<encoding::utf_8> {
using data_type = char8_t;
static constexpr encoding text_encoding = encoding::utf_8;
};
template<>
struct encoding_info<encoding::utf_16> {
using data_type = char16_t;
static constexpr encoding text_encoding = encoding::utf_16;
};
template<>
struct encoding_info<encoding::utf_32> {
using data_type = char32_t;
static constexpr encoding text_encoding = encoding::utf_32;
};
template<>
struct encoding_info<encoding::wchar> {
using data_type = wchar_t;
static constexpr encoding text_encoding = encoding::wchar;
};
template<>
struct encoding_info<encoding::multibyte> {
using data_type = char;
static constexpr encoding text_encoding = encoding::multibyte;
};
template<typename CharT>
struct default_encoding_info;
template<>
struct default_encoding_info<char8_t> : public encoding_info<encoding::utf_8> {
};
template<>
struct default_encoding_info<char16_t> : public encoding_info<encoding::utf_16> {
};
template<>
struct default_encoding_info<char32_t> : public encoding_info<encoding::utf_32> {
};
template<>
struct default_encoding_info<wchar_t> : public encoding_info<encoding::wchar> {
};
/** single-unit helper utilities */
char32_t fold(char32_t in_codepoint); // Folds codepoint for case-insensitive checks (not for human output)
constexpr int as_base(char32_t in_character, unsigned int base); // The value represented by in_character in terms of base if valid, -1 otherwise

57
src/test/parser.cpp

@ -30,14 +30,63 @@ using namespace std::literals;
class test_parser : public parser {
public:
/** deserialize/serialize overrides */
virtual object deserialize(std::u8string_view in_data) override {
return deserialize_impl(in_data);
object deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) override {
std::u8string u8_string;
switch (in_write_encoding) {
case encoding::utf_8:
u8_string = string_view_cast<char8_t>(in_data);
break;
case encoding::utf_16:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<char16_t>(in_data));
break;
case encoding::utf_32:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<char32_t>(in_data));
break;
case encoding::wchar:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<wchar_t>(in_data));
break;
case encoding::multibyte:
u8_string = mbstring_to_ustring<char8_t>(string_view_cast<char>(in_data)).second;
break;
}
virtual std::u8string serialize(const object& in_object) override {
return deserialize_impl(std::u8string_view{ u8_string });
}
std::string serialize_bytes(const object& in_object, encoding in_write_encoding) override {
std::u8string u8_serialized = serialize_impl(in_object);
switch (in_write_encoding) {
case encoding::utf_8:
return { u8_serialized.begin(), u8_serialized.end() };
case encoding::utf_16: {
auto casted = string_cast<char16_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(char16_t) };
}
case encoding::utf_32: {
auto casted = string_cast<char32_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(char32_t) };
}
case encoding::wchar: {
auto casted = string_cast<wchar_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(wchar_t) };
}
case encoding::multibyte:
return ustring_to_mbstring(u8_serialized).second;
}
return {};
}
virtual std::u8string serialize_u8(const object& in_object) override {
return serialize_impl(in_object);
}
std::u16string serialize_u16(const object& in_object) override { return string_cast<char16_t>(serialize_u8(in_object)); }
std::u32string serialize_u32(const object& in_object) override { return string_cast<char32_t>(serialize_u8(in_object)); }
std::wstring serialize_w(const object& in_object) override { return string_cast<wchar_t>(serialize_u8(in_object)); }
/** helpers */
static void reset() {
serialize_impl = &serialize_default;
@ -54,7 +103,7 @@ public:
}
static object deserialize_default(std::u8string_view in_data) {
return object{ string_view_cast<char8_t>(in_data) };
return object{ in_data };
}
/** static members */

45
src/test/parsers/json.cpp

@ -25,26 +25,26 @@ using namespace std::literals;
TEST(JsonParser, serialize_null) {
json_parser parser;
EXPECT_EQ(parser.serialize({}), u8"null");
EXPECT_EQ(parser.serialize<char8_t>({}), u8"null");
}
TEST(JsonParser, serialize_boolean) {
json_parser parser;
EXPECT_EQ(parser.serialize(true), u8"true");
EXPECT_EQ(parser.serialize(false), u8"false");
EXPECT_EQ(parser.serialize<char8_t>(true), u8"true");
EXPECT_EQ(parser.serialize<char8_t>(false), u8"false");
}
TEST(JsonParser, serialize_integer) {
json_parser parser;
EXPECT_EQ(parser.serialize(1234), u8"1234");
EXPECT_EQ(parser.serialize<char8_t>(1234), u8"1234");
}
TEST(JsonParser, serialize_decimal) {
json_parser parser;
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize(12.34).c_str())), 12.34);
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize(1234.0).c_str())), 1234.0);
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize<char8_t>(12.34).c_str())), 12.34);
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize<char8_t>(1234.0).c_str())), 1234.0);
}
// necessary due to some sort of bug with EXPECT_EQ on MSVC
@ -56,10 +56,15 @@ void expect_eq(LeftT in_left, RightT in_right) {
TEST(JsonParser, serialize_string) {
json_parser parser;
EXPECT_EQ(parser.serialize(u8"text"), u8R"json("text")json");
expect_eq(parser.serialize(u8"\"text\""), u8R"json("\"text\"")json");
expect_eq(parser.serialize(u8"\"te\x01xt\""), u8R"json("\"te\u0001xt\"")json");
expect_eq(parser.serialize(u8"\"te\x10xt\""), u8R"json("\"te\u0010xt\"")json");
EXPECT_EQ(parser.serialize<char8_t>(u8"text"), u8R"json("text")json");
expect_eq(parser.serialize<char8_t>(u8"\"text\""), u8R"json("\"text\"")json");
expect_eq(parser.serialize<char8_t>(u8"\"te\x01xt\""), u8R"json("\"te\u0001xt\"")json");
expect_eq(parser.serialize<char8_t>(u8"\"te\x10xt\""), u8R"json("\"te\u0010xt\"")json");
EXPECT_EQ(parser.serialize<char8_t>(u8"text"), u8R"json("text")json");
EXPECT_EQ(parser.serialize<char16_t>(u8"text"), uR"json("text")json");
EXPECT_EQ(parser.serialize<char32_t>(u8"text"), UR"json("text")json");
EXPECT_EQ(parser.serialize<wchar_t>(u8"text"), LR"json("text")json");
}
TEST(JsonParser, serialize_array) {
@ -71,7 +76,7 @@ TEST(JsonParser, serialize_array) {
object{}
};
EXPECT_EQ(parser.serialize(array),
EXPECT_EQ(parser.serialize<char8_t>(array),
u8R"json([true,1234,"text",null])json");
}
@ -84,7 +89,7 @@ TEST(JsonParser, serialize_map) {
obj[u8"some_string"] = u8"text";
obj[u8"some_null"];
EXPECT_EQ(parser.serialize(obj),
EXPECT_EQ(parser.serialize<char8_t>(obj),
u8R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json");
}
@ -143,14 +148,14 @@ TEST(JsonParser, deserialize_string) {
TEST(JsonParser, deserialize_array) {
json_parser parser;
constexpr const char8_t* json_data = u8R"json([
constexpr std::u8string_view json_data = u8R"json([
true,
false,
1234,
12.34,
0.1234,
"text"
])json";
])json"sv;
auto array = parser.deserialize(json_data).get<std::vector<object>>();
ASSERT_EQ(array.size(), 6U);
@ -165,7 +170,7 @@ TEST(JsonParser, deserialize_array) {
TEST(JsonParser, deserialize_array_nested) {
json_parser parser;
constexpr const char8_t* json_data = u8R"json([
constexpr std::u8string_view json_data = u8R"json([
true,
false,
1234
@ -178,7 +183,7 @@ TEST(JsonParser, deserialize_array_nested) {
12.34,
0.1234,
"text"
])json";
])json"sv;
auto array = parser.deserialize(json_data).get<std::vector<object>>();
ASSERT_EQ(array.size(), 9U);
@ -211,14 +216,14 @@ TEST(JsonParser, deserialize_array_nested) {
TEST(JsonParser, deserialize_map) {
json_parser parser;
constexpr const char8_t* json_data = u8R"json({
constexpr std::u8string_view json_data = u8R"json({
"some_true":true,
"some_false" : false,
"some_int": 1234,
"some_double" : 12.34,
"some_other_double" :0.1234,
"some_text" : "text"
})json";
})json"sv;
object obj = parser.deserialize(json_data);
EXPECT_EQ(obj.size(), 6U);
@ -233,7 +238,7 @@ TEST(JsonParser, deserialize_map) {
TEST(JsonParser, deserialize_map_nested) {
json_parser parser;
constexpr const char8_t* json_data = u8R"json({
constexpr std::u8string_view json_data = u8R"json({
"some_text" : "text",
"some_object" : {
"some_null_object": {}
@ -246,7 +251,7 @@ TEST(JsonParser, deserialize_map_nested) {
"makes toot": true
}},
"some other text":" asdf "
})json";
})json"sv;
object obj = parser.deserialize(json_data);
EXPECT_EQ(obj.size(), 4U);

Loading…
Cancel
Save