Compare commits

...

5 Commits

  1. 13
      src/common/parser/parser.cpp
  2. 147
      src/common/parsers/json.cpp
  3. 18
      src/common/serialize.cpp
  4. 2
      src/include/jessilib/http_query.hpp
  5. 84
      src/include/jessilib/parser.hpp
  6. 202
      src/include/jessilib/parsers/json.hpp
  7. 14
      src/include/jessilib/serialize.hpp
  8. 42
      src/include/jessilib/unicode.hpp
  9. 75
      src/include/jessilib/unicode_base.hpp
  10. 8
      src/include/jessilib/unicode_compare.hpp
  11. 9
      src/include/jessilib/unicode_syntax.hpp
  12. 86
      src/test/http_query.cpp
  13. 89
      src/test/parser.cpp
  14. 71
      src/test/parsers/json.cpp
  15. 14
      src/test/unicode.cpp
  16. 23
      src/test/unicode_sequence.cpp

13
src/common/parser/parser.cpp

@ -17,12 +17,13 @@
*/
#include "parser.hpp"
#include "unicode.hpp"
#include <istream>
namespace jessilib {
object parser::deserialize(std::istream& in_stream) {
std::vector<char> data;
object parser::deserialize_bytes(std::istream& in_stream, encoding in_read_encoding) {
std::vector<byte_type> data;
// Read entire stream into data
char buffer[1024];
@ -32,11 +33,13 @@ object parser::deserialize(std::istream& in_stream) {
}
// Pass data to deserialize
return deserialize(std::string_view{ &data.front(), data.size() });
return deserialize_bytes(bytes_view_type{ &data.front(), data.size() }, in_read_encoding);
}
void parser::serialize(std::ostream& in_stream, const object& in_object) {
in_stream << serialize(in_object);
void parser::serialize_bytes(std::ostream& in_stream, const object& in_object, encoding in_write_encoding) {
// TODO: replace this method
auto bytes = serialize_bytes(in_object, in_write_encoding);
in_stream << bytes;
}
} // namespace jessilib

147
src/common/parsers/json.cpp

@ -17,128 +17,53 @@
*/
#include "parsers/json.hpp"
#include <charconv>
using namespace std::literals;
namespace jessilib {
std::string make_json_string(std::u8string_view in_string) {
std::string result;
result.reserve(in_string.size() + 2);
result = '\"';
decode_result decode;
while ((decode = decode_codepoint(in_string)).units != 0) {
if (decode.codepoint == U'\\') { // backslash
result += '\\';
result += '\\';
}
else if (decode.codepoint == U'\"') { // quotation
result += '\\';
result += '\"';
}
else if (decode.codepoint < 0x20) { // control characters
result += "\\u0000"sv;
// overwrite last 2 zeroes with correct hexadecimal sequence
char* data_end = result.data() + result.size();
char* data = data_end - 2; // Will only ever use 2 chars
auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16);
if (to_chars_result.ec == std::errc{} && to_chars_result.ptr != data_end) {
// Only 1 byte written; shift it over
*to_chars_result.ptr = *(to_chars_result.ptr - 1);
// And fill in the zeroes
*(to_chars_result.ptr - 1) = '0';
}
}
else {
// Valid UTF-8 sequence; copy it over
result.append(reinterpret_cast<const char*>(in_string.data()), decode.units);
}
object json_parser::deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) {
object result;
in_string.remove_prefix(decode.units);
if (in_write_encoding == encoding::utf_8) {
std::u8string_view data_view = jessilib::string_view_cast<char8_t>(in_data);
deserialize_json<char8_t, true>(result, data_view);
}
else if (in_write_encoding == encoding::utf_16) {
std::u16string_view data_view = jessilib::string_view_cast<char16_t>(in_data);
deserialize_json<char16_t, true>(result, data_view);
}
else if (in_write_encoding == encoding::utf_32) {
std::u32string_view data_view = jessilib::string_view_cast<char32_t>(in_data);
deserialize_json<char32_t, true>(result, data_view);
}
else if (in_write_encoding == encoding::wchar) {
std::wstring_view data_view = jessilib::string_view_cast<wchar_t>(in_data);
deserialize_json<wchar_t, true>(result, data_view);
}
else if (in_write_encoding == encoding::multibyte) {
// TODO: support without copying... somehow
auto u8_data = mbstring_to_ustring<char8_t>(jessilib::string_view_cast<char>(in_data));
std::u8string_view data_view = u8_data.second;
deserialize_json<char8_t, true>(result, data_view);
}
result += '\"';
return result;
}
object json_parser::deserialize(std::string_view in_data) {
object result;
deserialize_json<char, true>(result, in_data);
return result;
}
std::string json_parser::serialize(const object& in_object) {
static const object::array_type s_null_array;
static const object::map_type s_null_map;
switch (in_object.type()) {
case object::type::null:
return "null"s;
case object::type::boolean:
if (in_object.get<bool>()) {
return "true"s;
}
return "false"s;
case object::type::integer:
return std::to_string(in_object.get<intmax_t>());
case object::type::decimal:
return std::to_string(in_object.get<long double>());
case object::type::text:
return make_json_string(in_object.get<std::u8string>());
case object::type::array: {
if (in_object.size() == 0) {
return "[]"s;
}
std::string result;
result = '[';
// Serialize all objects in array
for (auto& obj : in_object.get<object::array_type>(s_null_array)) {
result += json_parser::serialize(obj);
result += ',';
}
// Return result
result.back() = ']';
return result;
}
case object::type::map: {
if (in_object.size() == 0) {
return "{}"s;
}
std::string result;
result = '{';
// Serialize all objects in map
for (auto& item : in_object.get<object::map_type>(s_null_map)) {
result += make_json_string(item.first);
result += ":"sv;
result += json_parser::serialize(item.second);
result += ',';
}
// Return result
result.back() = '}';
return result;
}
std::string json_parser::serialize_bytes(const object& in_object, encoding in_write_encoding) {
switch (in_write_encoding) {
case encoding::utf_8:
return serialize_impl<char8_t, char>(in_object);
case encoding::utf_16:
return serialize_impl<char16_t, char>(in_object);
case encoding::utf_32:
return serialize_impl<char16_t, char>(in_object);
case encoding::wchar:
return serialize_impl<char16_t, char>(in_object);
default:
throw std::invalid_argument{ "Invalid data type: " + std::to_string(static_cast<size_t>(in_object.type())) };
break;
}
return {};
}
} // namespace jessilib

18
src/common/serialize.cpp

@ -41,29 +41,29 @@ std::shared_ptr<parser> get_parser(const std::string& in_format) {
}
/** Deserialization */
object deserialize_object(const std::string& in_data, const std::string& in_format) {
return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format);
object deserialize_object(const std::u8string& in_data, const std::string& in_format) {
return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format);
}
object deserialize_object(const std::vector<char>& in_data, const std::string& in_format) {
return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format);
object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format) {
return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format);
}
object deserialize_object(std::string_view in_data, const std::string& in_format) {
object deserialize_object(std::u8string_view in_data, const std::string& in_format) {
return get_parser(in_format)->deserialize(in_data);
}
object deserialize_object(std::istream& in_stream, const std::string& in_format) {
return get_parser(in_format)->deserialize(in_stream);
return get_parser(in_format)->deserialize_bytes(in_stream, encoding::utf_8);
}
/** Serialization */
std::string serialize_object(const object& in_object, const std::string& in_format) {
return get_parser(in_format)->serialize(in_object);
std::u8string serialize_object(const object& in_object, const std::string& in_format) {
return get_parser(in_format)->serialize<char8_t>(in_object);
}
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format) {
get_parser(in_format)->serialize(in_stream, in_object);
get_parser(in_format)->serialize_bytes(in_stream, in_object, encoding::utf_8);
}
} // namespace jessilib

2
src/include/jessilib/http_query.hpp

@ -166,7 +166,7 @@ constexpr bool deserialize_html_form(ContainerT& out_container, std::basic_strin
HTMLFormContext<CharT, ContainerT> context{ out_container, inout_string.data() };
constexpr auto& html_form_tree = html_form_root_tree<CharT, ContainerT>;
static_assert(is_sorted<char, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted");
static_assert(is_sorted<CharT, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted");
std::basic_string_view<CharT> read_view{ inout_string };
if (apply_syntax_tree<CharT, decltype(context), html_form_tree, std::size(html_form_tree), html_form_default_action>

84
src/include/jessilib/parser.hpp

@ -20,6 +20,7 @@
#include <memory>
#include "object.hpp"
#include "unicode_base.hpp"
#include "impl/parser_manager.hpp"
namespace jessilib {
@ -27,20 +28,50 @@ namespace jessilib {
class parser {
public:
virtual ~parser() = default;
using byte_type = uint8_t;
using bytes_view_type = std::basic_string_view<byte_type>;
/** Interface methods */
/**
* Deserializes an object directly from a stream
* Deserializes an object directly from a stream of bytes
* May throw: invalid_argument
*
* @param in_stream Stream to deserialize object from
* @return A valid (possibly null) object
*/
virtual object deserialize(std::istream& in_stream);
virtual object deserialize(std::string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings
virtual void serialize(std::ostream& in_stream, const object& in_object);
virtual std::string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings
virtual object deserialize_bytes(std::istream& in_stream, encoding in_read_encoding);
virtual object deserialize_bytes(bytes_view_type in_data, encoding in_read_encoding) = 0;
virtual void serialize_bytes(std::ostream& in_stream, const object& in_object, encoding in_write_encoding);
virtual std::string serialize_bytes(const object& in_object, encoding in_write_encoding) = 0;
template<typename CharT>
object deserialize(std::basic_string_view<CharT> in_text) {
bytes_view_type byte_view{ reinterpret_cast<const byte_type*>(in_text.data()), in_text.size() * sizeof(CharT) };
return deserialize_bytes(byte_view, default_encoding_info<CharT>::text_encoding);
}
// Perhaps this could be condensed down to a simple method such that: serialize(out_variant, in_object, encoding)?
virtual std::u8string serialize_u8(const object& in_object) = 0;
virtual std::u16string serialize_u16(const object& in_object) = 0;
virtual std::u32string serialize_u32(const object& in_object) = 0;
virtual std::wstring serialize_w(const object& in_object) = 0;
template<typename CharT>
std::basic_string<CharT> serialize(const object& in_object) {
if constexpr (std::is_same_v<CharT, char8_t>) {
return serialize_u8(in_object);
}
else if constexpr (std::is_same_v<CharT, char16_t>) {
return serialize_u16(in_object);
}
else if constexpr (std::is_same_v<CharT, char32_t>) {
return serialize_u32(in_object);
}
else if constexpr (std::is_same_v<CharT, wchar_t>) {
return serialize_w(in_object);
}
}
}; // parser
template<typename T>
@ -57,4 +88,47 @@ public:
impl::parser_manager::id m_id;
}; // parser_registration
template<typename OutCharT, typename ResultCharT = OutCharT>
std::basic_string<ResultCharT> simple_copy(std::u8string_view in_string) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
return { in_string.begin(), in_string.end() };
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_string into result _as if_ result were of OutCharT
for (OutCharT codepoint : in_string) {
// TODO: Assuming native for now, but we need to account for endianness later
return { reinterpret_cast<const char*>(&codepoint), sizeof(codepoint) };
}
}
// else // Invalid use of simple_copy
}
template<typename OutCharT, typename ResultCharT>
void simple_append(std::basic_string<ResultCharT>& out_string, std::u8string_view in_string) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
out_string.append(in_string.begin(), in_string.end());
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_string into result _as if_ result were of OutCharT
for (OutCharT codepoint : in_string) {
// TODO: Assuming native for now, but we need to account for endianness later
out_string.append(reinterpret_cast<const char*>(&codepoint), sizeof(codepoint));
}
}
// else // Invalid use of simple_append
}
template<typename OutCharT, typename ResultCharT>
void simple_append(std::basic_string<ResultCharT>& out_string, char8_t in_character) {
if constexpr (sizeof(OutCharT) == sizeof(ResultCharT)) {
out_string += in_character;
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
// Copy in_character into result _as if_ result were of OutCharT
OutCharT codepoint = in_character;
// TODO: Assuming native for now, but we need to account for endianness later
out_string.append(reinterpret_cast<const char*>(&codepoint), sizeof(codepoint));
}
}
} // namespace jessilib

202
src/include/jessilib/parsers/json.hpp

@ -18,6 +18,7 @@
#pragma once
#include "fmt/xchar.h" // fmt::format
#include "jessilib/parser.hpp"
#include "jessilib/unicode.hpp" // join
#include "jessilib/unicode_syntax.hpp" // syntax trees
@ -29,8 +30,23 @@ namespace jessilib {
class json_parser : public parser {
public:
/** deserialize/serialize overrides */
virtual object deserialize(std::string_view in_data) override;
virtual std::string serialize(const object& in_object) override;
object deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) override;
std::string serialize_bytes(const object& in_object, encoding in_write_encoding) override;
std::u8string serialize_u8(const object& in_object) override { return serialize_impl<char8_t>(in_object); }
std::u16string serialize_u16(const object& in_object) override { return serialize_impl<char16_t>(in_object); }
std::u32string serialize_u32(const object& in_object) override { return serialize_impl<char32_t>(in_object); }
std::wstring serialize_w(const object& in_object) override { return serialize_impl<wchar_t>(in_object); }
template<typename CharT, typename ResultCharT = CharT>
std::basic_string<ResultCharT> serialize_impl(const object& in_object) {
std::basic_string<ResultCharT> result;
serialize_impl<CharT, ResultCharT>(result, in_object);
return result;
}
template<typename CharT, typename ResultCharT = CharT>
void serialize_impl(std::basic_string<ResultCharT>& out_string, const object& in_object);
};
/**
@ -101,7 +117,9 @@ constexpr syntax_tree_member<CharT, ContextT> make_keyword_value_pair() {
// Unexpected character; throw if appropriate
if constexpr (ContextT::use_exceptions) {
using namespace std::literals;
throw std::invalid_argument{ jessilib::join<std::string>("Invalid JSON data; unexpected token: '"sv, inout_read_view, "' when parsing null"sv) };
throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
inout_read_view,
u8"' when parsing null"sv) };
}
return std::numeric_limits<size_t>::max();
@ -164,7 +182,8 @@ size_t string_start_action(ContextT& inout_context, std::basic_string_view<CharT
if constexpr (ContextT::use_exceptions) {
using namespace std::literals;
throw std::invalid_argument {
jessilib::join_mbstring("Invalid JSON data; invalid token or end of string: "sv, std::u8string_view{ string_data })
jessilib::join_mbstring(u8"Invalid JSON data; invalid token or end of string: "sv,
std::u8string_view{ string_data })
};
}
@ -239,7 +258,9 @@ size_t array_start_action(ContextT& inout_context, std::basic_string_view<CharT>
// Invalid JSON!
if constexpr (ContextT::use_exceptions) {
using namespace std::literals;
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data: expected ',' or ']', instead encountered: "sv, inout_read_view) };
throw std::invalid_argument{ jessilib::join_mbstring(
u8"Invalid JSON data: expected ',' or ']', instead encountered: "sv,
inout_read_view) };
}
return std::numeric_limits<size_t>::max();
@ -285,9 +306,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view<Cha
// Assert that we've reached the start of a key
if (front != '\"') {
if constexpr (ContextT::use_exceptions) {
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data; unexpected token: '"sv,
throw std::invalid_argument{
jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
decode_codepoint(inout_read_view).codepoint,
"' when parsing object map (expected '\"' instead)"sv) };
u8"' when parsing object map (expected '\"' instead)"sv) };
}
return std::numeric_limits<size_t>::max();
@ -312,9 +334,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view<Cha
}
front = inout_read_view.front();
if (front != ':') {
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data; unexpected token: '"sv,
throw std::invalid_argument{
jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
decode_codepoint(inout_read_view).codepoint,
"' when parsing map key (expected ':' instead)"sv) };
u8"' when parsing map key (expected ':' instead)"sv) };
}
inout_read_view.remove_prefix(1); // strip ':'
@ -444,4 +467,165 @@ bool deserialize_json(object& out_object, std::basic_string_view<CharT>& inout_r
(context, inout_read_view);
}
template<typename CharT, typename ResultCharT>
void make_json_string(std::basic_string<ResultCharT>& out_string, std::u8string_view in_string) {
using namespace std::literals;
out_string.reserve(out_string.size() + in_string.size() + 2);
simple_append<CharT, ResultCharT>(out_string, '\"');
decode_result decode;
while ((decode = decode_codepoint(in_string)).units != 0) {
if (decode.codepoint == U'\\') { // backslash
simple_append<CharT, ResultCharT>(out_string, u8"\\\\"sv);
}
else if (decode.codepoint == U'\"') { // quotation
simple_append<CharT, ResultCharT>(out_string, u8"\\\""sv);
}
else if (decode.codepoint < 0x20) { // control characters
simple_append<CharT, ResultCharT>(out_string, u8"\\u00"sv);
// overwrite last 2 zeroes with correct hexadecimal sequence
char data[2]; // Will only ever use 2 chars
char* data_end = data + sizeof(data);
auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16);
if (to_chars_result.ptr == data) {
// No bytes written
simple_append<CharT, ResultCharT>(out_string, u8"00"sv);
}
else if (to_chars_result.ptr != data_end) {
// 1 byte written
simple_append<CharT, ResultCharT>(out_string, '0');
simple_append<CharT, ResultCharT>(out_string, data[0]);
}
else {
// 2 bytes written
simple_append<CharT, ResultCharT>(out_string, std::u8string_view{ reinterpret_cast<char8_t*>(data), sizeof(data) });
}
}
else {
if constexpr (sizeof(CharT) == sizeof(char8_t) && sizeof(CharT) == sizeof(ResultCharT)) {
// Valid UTF-8 sequence; copy it over
out_string.append(reinterpret_cast<const ResultCharT*>(in_string.data()), decode.units);
}
else if constexpr (sizeof(CharT) == sizeof(ResultCharT)){
// Valid UTF-8 codepoint; append it
encode_codepoint(out_string, decode.codepoint);
}
else {
// Valid UTF-8 codepoint; encode & append it
encode_buffer_type<CharT> buffer;
size_t units_written = encode_codepoint(buffer, decode.codepoint);
out_string.append(reinterpret_cast<ResultCharT*>(buffer), units_written * sizeof(CharT));
}
}
in_string.remove_prefix(decode.units);
}
simple_append<CharT, ResultCharT>(out_string, '\"');
}
template<typename CharT>
static constexpr CharT empty_format_arg[3]{ '{', '}', 0 };
template<typename CharT, typename ResultCharT>
void json_parser::serialize_impl(std::basic_string<ResultCharT>& out_string, const object& in_object) {
using namespace std::literals;
static const object::array_type s_null_array;
static const object::map_type s_null_map;
switch (in_object.type()) {
case object::type::null:
simple_append<CharT, ResultCharT>(out_string, u8"null"sv);
return;
case object::type::boolean:
if (in_object.get<bool>()) {
simple_append<CharT, ResultCharT>(out_string, u8"true"sv);
return;
}
simple_append<CharT, ResultCharT>(out_string, u8"false"sv);
return;
case object::type::integer:
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string += fmt::format(empty_format_arg<ResultCharT>, in_object.get<intmax_t>());
}
else if constexpr (std::is_same_v<ResultCharT, char>){
auto encoded = fmt::format(empty_format_arg<CharT>, in_object.get<intmax_t>());
out_string.append(reinterpret_cast<ResultCharT*>(encoded.data()), encoded.size() * sizeof(CharT));
}
return;
case object::type::decimal:
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string += fmt::format(empty_format_arg<ResultCharT>, in_object.get<long double>());
}
else if constexpr (std::is_same_v<ResultCharT, char>){
auto encoded = fmt::format(empty_format_arg<CharT>, in_object.get<long double>());
out_string.append(reinterpret_cast<ResultCharT*>(encoded.data()), encoded.size() * sizeof(CharT));
}
return;
case object::type::text:
make_json_string<CharT, ResultCharT>(out_string, in_object.get<std::u8string>());
return;
case object::type::array: {
if (in_object.size() == 0) {
simple_append<CharT, ResultCharT>(out_string, u8"[]"sv);
}
simple_append<CharT, ResultCharT>(out_string, '[');
// Serialize all objects in array
for (auto& obj : in_object.get<object::array_type>(s_null_array)) {
json_parser::serialize_impl<CharT, ResultCharT>(out_string, obj);
simple_append<CharT, ResultCharT>(out_string, ',');
}
// Replace last comma with ']'
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string.back() = ']';
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
out_string.erase(out_string.size() - sizeof(CharT));
simple_append<CharT, ResultCharT>(out_string, ']');
}
// else // not supported
return;
}
case object::type::map: {
if (in_object.size() == 0) {
simple_append<CharT, ResultCharT>(out_string, u8"{}"sv);
}
simple_append<CharT, ResultCharT>(out_string, '{');
// Serialize all objects in map
for (auto& item : in_object.get<object::map_type>(s_null_map)) {
make_json_string<CharT, ResultCharT>(out_string, item.first);
simple_append<CharT, ResultCharT>(out_string, ':');
json_parser::serialize_impl<CharT, ResultCharT>(out_string, item.second);
simple_append<CharT, ResultCharT>(out_string, ',');
}
// Replace last comma with '}'
if constexpr (sizeof(CharT) == sizeof(ResultCharT)) {
out_string.back() = '}';
}
else if constexpr (std::is_same_v<ResultCharT, char>) {
out_string.erase(out_string.size() - sizeof(CharT));
simple_append<CharT, ResultCharT>(out_string, '}');
}
// else // not supported
return;
}
default:
throw std::invalid_argument{ "Invalid data type: " + std::to_string(static_cast<size_t>(in_object.type())) };
}
}
} // namespace jessilib

14
src/include/jessilib/serialize.hpp

@ -29,13 +29,15 @@ public:
};
/** Deserialization */
object deserialize_object(const std::string& in_data, const std::string& in_format);
object deserialize_object(const std::vector<char>& in_data, const std::string& in_format);
object deserialize_object(std::string_view in_data, const std::string& in_format);
object deserialize_object(std::istream& in_stream, const std::string& in_format);
object deserialize_object(std::u8string_view in_data, const std::string& in_format);
object deserialize_object(std::u16string_view in_data, const std::string& in_format);
object deserialize_object(std::u32string_view in_data, const std::string& in_format);
object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format);
//object deserialize_object(std::u8string_view in_data, const std::string& in_format);
object deserialize_object(std::istream& in_stream, const std::string& in_format); // TODO: add encoding param
/** Serialization */
std::string serialize_object(const object& in_object, const std::string& in_format);
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format);
std::u8string serialize_object(const object& in_object, const std::string& in_format); // TODO: templatize?
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format); // TODO: add encoding param
} // namespace jessilib

42
src/include/jessilib/unicode.hpp

@ -19,6 +19,7 @@
#pragma once
#include <cuchar>
#include <climits>
#include "unicode_compare.hpp"
namespace jessilib {
@ -112,24 +113,30 @@ bool is_valid(const InT& in_string) {
template<typename OutCharT, typename InT>
std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) {
using InCharT = typename impl_unicode::is_string<InT>::type;
size_t in_string_bytes = in_string.size() * sizeof(InCharT);
if constexpr (sizeof(OutCharT) > sizeof(InCharT)) {
// The output type is larger than the input type; verify no partial codepoints
if (in_string_bytes % sizeof(OutCharT) != 0) {
// This cannot be used to produce a valid result
return {};
}
if constexpr (sizeof(InCharT) == sizeof(OutCharT)) {
return { reinterpret_cast<const OutCharT*>(in_string.data()), in_string.size() };
}
else {
size_t in_string_bytes = in_string.size() * sizeof(InCharT);
if constexpr (sizeof(OutCharT) > sizeof(InCharT)) {
// The output type is larger than the input type; verify no partial codepoints
if (in_string_bytes % sizeof(OutCharT) != 0) {
// This cannot be used to produce a valid result
return {};
}
}
size_t out_string_units = in_string_bytes / sizeof(OutCharT);
const OutCharT* data_begin = reinterpret_cast<const OutCharT*>(in_string.data());
return { data_begin, out_string_units };
size_t out_string_units = in_string_bytes / sizeof(OutCharT);
const OutCharT* data_begin = reinterpret_cast<const OutCharT*>(in_string.data());
return { data_begin, out_string_units };
}
}
template<typename OutCharT, typename InT>
std::basic_string<OutCharT> string_cast(const InT& in_string) {
static_assert(impl_unicode::is_string<InT>::value == true);
using InCharT = typename impl_unicode::is_string<InT>::type;
using InCharT = std::remove_cvref_t<typename impl_unicode::is_string<InT>::type>;
using InEquivalentT = typename unicode_traits<InCharT>::equivalent_type;
using InViewT = std::basic_string_view<InCharT>;
using OutT = std::basic_string<OutCharT>;
@ -223,9 +230,9 @@ std::pair<bool, std::string> ustring_to_mbstring(std::basic_string_view<CharT> i
while ((decode = decode_codepoint(in_string)).units != 0) {
in_string.remove_prefix(decode.units);
char buffer[MB_CUR_MAX]; // MB_LEN_MAX
char buffer[MB_LEN_MAX]; // MB_LEN_MAX is constant, MB_CUR_MAX is not, and C++ doesn't have VLAs
size_t bytes_written = std::c32rtomb(buffer, decode.codepoint, &mbstate);
if (bytes_written > MB_CUR_MAX) {
if (bytes_written > MB_LEN_MAX) {
// Invalid codepoint; return
result.first = false;
return result;
@ -238,6 +245,11 @@ std::pair<bool, std::string> ustring_to_mbstring(std::basic_string_view<CharT> i
return result;
}
template<typename CharT>
std::pair<bool, std::string> ustring_to_mbstring(const std::basic_string<CharT>& in_string) {
return ustring_to_mbstring(std::basic_string_view<CharT>{ in_string });
}
/**
* Searches a string for a specified substring
*
@ -392,7 +404,7 @@ size_t findi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<
ADAPT_BASIC_STRING(findi)
using find_if_predicate_type = bool(*)(char32_t, char*, size_t);
/*using find_if_predicate_type = bool(*)(char32_t, char*, size_t);
inline void find_if(std::basic_string<char>& in_string, find_if_predicate_type in_predicate) {
using CharT = char;
CharT* ptr = in_string.data();
@ -422,7 +434,7 @@ inline void find_if(std::basic_string_view<char>& in_string, find_if_view_predic
in_string_view.remove_prefix(decode.units);
ptr += decode.units;
}
}
}*/
namespace impl_join {

75
src/include/jessilib/unicode_base.hpp

@ -70,7 +70,7 @@ constexpr size_t encode_codepoint(CharT* out_buffer, char32_t in_codepoint);
std::u8string encode_codepoint_u8(char32_t in_codepoint);
std::u16string encode_codepoint_u16(char32_t in_codepoint);
std::u32string encode_codepoint_u32(char32_t in_codepoint);
std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32
std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32W
/** decode_codepoint */
@ -130,15 +130,21 @@ constexpr decode_result decode_surrogate_pair(char16_t in_high_surrogate, char16
template<typename CharT>
struct unicode_traits : std::false_type {};
#ifdef JESSILIB_CHAR_AS_UTF8
template<>
struct unicode_traits<char> : std::true_type {
using equivalent_type = char8_t; // DEPRECATE
static constexpr size_t max_units_per_codepoint = 4;
};
#endif // JESSILIB_CHAR_AS_UTF8
template<>
struct unicode_traits<char8_t> : std::true_type {
using equivalent_type = char; // DEPRECATE
#ifdef JESSILIB_CHAR_AS_UTF8
using equivalent_type = char;
#else // JESSILIB_CHAR_AS_UTF8
using equivalent_type = char8_t;
#endif // JESSILIB_CHAR_AS_UTF8
static constexpr size_t max_units_per_codepoint = 4;
};
@ -163,6 +169,67 @@ struct unicode_traits<wchar_t> : std::true_type {
template<typename CharT>
using encode_buffer_type = CharT[unicode_traits<CharT>::max_units_per_codepoint];
// enum representing the character encodings I intend to support
enum class encoding {
utf_8, // The most common and arguably superior encoding for files and networking protocols not in straight ASCII
utf_16,
utf_32,
wchar, // essentially only really for std::wcout / std::wcout
multibyte // essentially only really for std::cout / std::cin
};
template<encoding EncodingV>
struct encoding_info;
template<>
struct encoding_info<encoding::utf_8> {
using data_type = char8_t;
static constexpr encoding text_encoding = encoding::utf_8;
};
template<>
struct encoding_info<encoding::utf_16> {
using data_type = char16_t;
static constexpr encoding text_encoding = encoding::utf_16;
};
template<>
struct encoding_info<encoding::utf_32> {
using data_type = char32_t;
static constexpr encoding text_encoding = encoding::utf_32;
};
template<>
struct encoding_info<encoding::wchar> {
using data_type = wchar_t;
static constexpr encoding text_encoding = encoding::wchar;
};
template<>
struct encoding_info<encoding::multibyte> {
using data_type = char;
static constexpr encoding text_encoding = encoding::multibyte;
};
template<typename CharT>
struct default_encoding_info;
template<>
struct default_encoding_info<char8_t> : public encoding_info<encoding::utf_8> {
};
template<>
struct default_encoding_info<char16_t> : public encoding_info<encoding::utf_16> {
};
template<>
struct default_encoding_info<char32_t> : public encoding_info<encoding::utf_32> {
};
template<>
struct default_encoding_info<wchar_t> : public encoding_info<encoding::wchar> {
};
/** single-unit helper utilities */
char32_t fold(char32_t in_codepoint); // Folds codepoint for case-insensitive checks (not for human output)
constexpr int as_base(char32_t in_character, unsigned int base); // The value represented by in_character in terms of base if valid, -1 otherwise
@ -338,9 +405,11 @@ constexpr size_t encode_codepoint_utf(T& out_destination, char32_t in_codepoint)
else if constexpr (std::is_same_v<CharT, wchar_t>) {
return encode_codepoint_w<T>(out_destination, in_codepoint);
}
#ifdef JESSILIB_CHAR_AS_UTF8
else if constexpr (std::is_same_v<CharT, char>) {
return encode_codepoint_utf8<CharT, T>(out_destination, in_codepoint);
}
#endif // JESSILIB_CHAR_AS_UTF8
}
template<typename CharT>
@ -476,9 +545,11 @@ constexpr decode_result decode_codepoint(std::basic_string_view<CharT> in_string
return decode_codepoint_utf32<wchar_t>(in_string);
}
}
#ifdef JESSILIB_CHAR_AS_UTF8
else if constexpr (std::is_same_v<CharT, char>) {
return decode_codepoint_utf8(in_string);
}
#endif // JESSILIB_CHAR_AS_UTF8
}
template<typename CharT>

8
src/include/jessilib/unicode_compare.hpp

@ -285,6 +285,7 @@ struct text_hash {
return hash;
}
#ifdef JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size());
}
@ -292,6 +293,7 @@ struct text_hash {
auto operator()(std::basic_string_view<char> in_key) const noexcept {
return hash(in_key.data(), in_key.data() + in_key.size());
}
#endif // JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size());
@ -321,10 +323,12 @@ struct text_hash {
struct text_equal {
using is_transparent = std::true_type;
#ifdef JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
}
#endif // JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
@ -370,6 +374,7 @@ struct text_hashi {
return hash;
}
#ifdef JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size());
}
@ -377,6 +382,7 @@ struct text_hashi {
auto operator()(std::basic_string_view<char> in_key) const noexcept {
return hash(in_key.data(), in_key.data() + in_key.size());
}
#endif // JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size());
@ -406,10 +412,12 @@ struct text_hashi {
struct text_equali {
using is_transparent = std::true_type;
#ifdef JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
}
#endif // JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {

9
src/include/jessilib/unicode_syntax.hpp

@ -25,7 +25,7 @@
#pragma once
#include "unicode_base.hpp"
#include "unicode.hpp"
namespace jessilib {
@ -76,10 +76,9 @@ template<typename CharT, typename ContextT, bool UseExceptionsV = false>
size_t fail_action(decode_result, ContextT&, std::basic_string_view<CharT>& in_read_view) {
using namespace std::literals;
if constexpr (UseExceptionsV) {
std::string exception = "Invalid parse data; unexpected token: '"s;
jessilib::encode_codepoint(exception, in_read_view.front());
exception += "' when parsing data";
throw std::invalid_argument{ exception };
throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid parse data; unexpected token: '"sv,
jessilib::decode_codepoint(in_read_view).codepoint,
u8"' when parsing data"sv) };
}
return std::numeric_limits<size_t>::max();
}

86
src/test/http_query.cpp

@ -22,20 +22,20 @@
using namespace std::literals;
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::u8string
#if defined(__cpp_lib_constexpr_string) && (__GNUC__ >= 12 || _MSC_VER >= 1929)
constexpr std::string query_constexpr(std::string_view in_expression) {
std::string result{ static_cast<std::string>(in_expression) };
constexpr std::u8string query_constexpr(std::u8string_view in_expression) {
std::u8string result{ static_cast<std::u8string>(in_expression) };
jessilib::deserialize_http_query(result);
return result;
}
ASSERT_COMPILES_CONSTEXPR(return query_constexpr("test"s) == "test"s);
ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first+second"s) == "first second"s);
ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first%20second"s) == "first second"s);
ASSERT_COMPILES_CONSTEXPR(return query_constexpr(u8"test"s) == u8"test"s);
ASSERT_COMPILES_CONSTEXPR(return query_constexpr(u8"first+second"s) == u8"first second"s);
ASSERT_COMPILES_CONSTEXPR(return query_constexpr(u8"first%20second"s) == u8"first second"s);
#endif // __cpp_lib_constexpr_string
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
using utf8_char_types = ::testing::Types<char, char8_t>;
using char_types = ::testing::Types</*char,*/ char8_t, char16_t, char32_t>;
using utf8_char_types = ::testing::Types</*char,*/ char8_t>;
template<typename T>
class QuerySequenceTest : public ::testing::Test {
@ -162,77 +162,77 @@ TYPED_TEST(QuerySequenceTest, invalids_2len_trailing) {
}
TEST(HtmlFormParser, empty) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text;
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text;
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.empty());
EXPECT_TRUE(parsed_result.empty());
}
TEST(HtmlFormParser, one_key) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "key";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"key";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_EQ(query_text, "key");
EXPECT_EQ(query_text, u8"key");
EXPECT_EQ(parsed_result.size(), 1);
EXPECT_EQ(parsed_result[0].first, query_text);
EXPECT_TRUE(parsed_result[0].second.empty());
}
TEST(HtmlFormParser, one_key_and_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "key=value";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"key=value";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvalue"));
EXPECT_TRUE(query_text.starts_with(u8"keyvalue"));
EXPECT_EQ(parsed_result.size(), 1);
EXPECT_EQ(parsed_result[0].first, "key");
EXPECT_EQ(parsed_result[0].second, "value");
EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, u8"value");
}
TEST(HtmlFormParser, one_key_and_value_trailing) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "key=value&";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"key=value&";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvalue"));
EXPECT_TRUE(query_text.starts_with(u8"keyvalue"));
EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key");
EXPECT_EQ(parsed_result[0].second, "value");
EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_TRUE(parsed_result[1].first.empty());
EXPECT_TRUE(parsed_result[1].second.empty());
}
TEST(HtmlFormParser, two_key_one_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "key=value&second_key";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"key=value&second_key";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_key"));
EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_key"));
EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key");
EXPECT_EQ(parsed_result[0].second, "value");
EXPECT_EQ(parsed_result[1].first, "second_key");
EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_TRUE(parsed_result[1].second.empty());
}
TEST(HtmlFormParser, two_key_two_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "key=value&second_key=second=value";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"key=value&second_key=second=value";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_keysecond=value"));
EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_keysecond=value"));
EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key");
EXPECT_EQ(parsed_result[0].second, "value");
EXPECT_EQ(parsed_result[1].first, "second_key");
EXPECT_EQ(parsed_result[1].second, "second=value");
EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_EQ(parsed_result[1].second, u8"second=value");
}
TEST(HtmlFormParser, some_sequences) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
std::string query_text = "k+y=va+u%20&%73econd%5Fke%79=second_valu%65";
std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::u8string query_text = u8"k+y=va+u%20&%73econd%5Fke%79=second_valu%65";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("k yva u second_keysecond_value"));
EXPECT_TRUE(query_text.starts_with(u8"k yva u second_keysecond_value"));
EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "k y");
EXPECT_EQ(parsed_result[0].second, "va u ");
EXPECT_EQ(parsed_result[1].first, "second_key");
EXPECT_EQ(parsed_result[1].second, "second_value");
EXPECT_EQ(parsed_result[0].first, u8"k y");
EXPECT_EQ(parsed_result[0].second, u8"va u ");
EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_EQ(parsed_result[1].second, u8"second_value");
}

89
src/test/parser.cpp

@ -30,14 +30,63 @@ using namespace std::literals;
class test_parser : public parser {
public:
/** deserialize/serialize overrides */
virtual object deserialize(std::string_view in_data) override {
return deserialize_impl(in_data);
object deserialize_bytes(bytes_view_type in_data, encoding in_write_encoding) override {
std::u8string u8_string;
switch (in_write_encoding) {
case encoding::utf_8:
u8_string = string_view_cast<char8_t>(in_data);
break;
case encoding::utf_16:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<char16_t>(in_data));
break;
case encoding::utf_32:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<char32_t>(in_data));
break;
case encoding::wchar:
u8_string = jessilib::string_cast<char8_t>(string_view_cast<wchar_t>(in_data));
break;
case encoding::multibyte:
u8_string = mbstring_to_ustring<char8_t>(string_view_cast<char>(in_data)).second;
break;
}
return deserialize_impl(std::u8string_view{ u8_string });
}
virtual std::string serialize(const object& in_object) override {
std::string serialize_bytes(const object& in_object, encoding in_write_encoding) override {
std::u8string u8_serialized = serialize_impl(in_object);
switch (in_write_encoding) {
case encoding::utf_8:
return { u8_serialized.begin(), u8_serialized.end() };
case encoding::utf_16: {
auto casted = string_cast<char16_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(char16_t) };
}
case encoding::utf_32: {
auto casted = string_cast<char32_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(char32_t) };
}
case encoding::wchar: {
auto casted = string_cast<wchar_t>(u8_serialized);
return { reinterpret_cast<const char*>(casted.data()), casted.size() * sizeof(wchar_t) };
}
case encoding::multibyte:
return ustring_to_mbstring(u8_serialized).second;
}
return {};
}
virtual std::u8string serialize_u8(const object& in_object) override {
return serialize_impl(in_object);
}
std::u16string serialize_u16(const object& in_object) override { return string_cast<char16_t>(serialize_u8(in_object)); }
std::u32string serialize_u32(const object& in_object) override { return string_cast<char32_t>(serialize_u8(in_object)); }
std::wstring serialize_w(const object& in_object) override { return string_cast<wchar_t>(serialize_u8(in_object)); }
/** helpers */
static void reset() {
serialize_impl = &serialize_default;
@ -45,26 +94,26 @@ public:
}
/** default serialize/deserialize implementations */
static std::string serialize_default(const object& in_object) {
static std::u8string serialize_default(const object& in_object) {
if (in_object.has<std::u8string>()) {
return string_cast<char>(in_object.get<std::u8string>());
return in_object.get<std::u8string>();
}
return static_cast<std::string>(DEFAULT_SERIALIZE_RESULT);
return static_cast<std::u8string>(DEFAULT_SERIALIZE_RESULT);
}
static object deserialize_default(std::string_view in_data) {
return object{ string_view_cast<char8_t>(in_data) };
static object deserialize_default(std::u8string_view in_data) {
return object{ in_data };
}
/** static members */
static constexpr std::string_view DEFAULT_SERIALIZE_RESULT = "serialize_result"sv;
static std::function<std::string(const object&)> serialize_impl;
static std::function<object(std::string_view)> deserialize_impl;
static constexpr std::u8string_view DEFAULT_SERIALIZE_RESULT = u8"serialize_result"sv;
static std::function<std::u8string(const object&)> serialize_impl;
static std::function<object(std::u8string_view)> deserialize_impl;
};
std::function<std::string(const object&)> test_parser::serialize_impl{ &serialize_default };
std::function<object(std::string_view)> test_parser::deserialize_impl{ &deserialize_default };
std::function<std::u8string(const object&)> test_parser::serialize_impl{ &serialize_default };
std::function<object(std::u8string_view)> test_parser::deserialize_impl{ &deserialize_default };
parser_registration<test_parser> test_parser_registration{ "test" };
@ -84,30 +133,30 @@ class ParserTest : public base_test {
TEST_F(ParserTest, find_parser) {
EXPECT_NO_THROW(serialize_object(u8"test_data", "test"));
EXPECT_NO_THROW(deserialize_object("test_data"sv, "test"));
EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test"));
EXPECT_THROW(serialize_object(u8"test_data", "invalid_format_test"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "invalid_format_test"), format_not_available);
EXPECT_THROW(deserialize_object(u8"test_data"sv, "invalid_format_test"), format_not_available);
}
TEST_F(ParserTest, temp_parser) {
EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available);
{
parser_registration<test_parser> test_tmp_registration{ "test_tmp" };
EXPECT_NO_THROW(serialize_object(u8"test_data", "test_tmp"));
EXPECT_NO_THROW(deserialize_object("test_data"sv, "test_tmp"));
EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test_tmp"));
}
EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available);
}
TEST_F(ParserTest, serialize) {
EXPECT_EQ(serialize_object(u8"test_data", "test"), "test_data");
EXPECT_EQ(serialize_object(u8"test_data", "test"), u8"test_data");
}
TEST_F(ParserTest, deserialize) {
EXPECT_EQ(deserialize_object("test_data"sv, "test").get<std::u8string>(), u8"test_data");
EXPECT_EQ(deserialize_object(u8"test_data"sv, "test").get<std::u8string>(), u8"test_data");
}

71
src/test/parsers/json.cpp

@ -25,26 +25,26 @@ using namespace std::literals;
TEST(JsonParser, serialize_null) {
json_parser parser;
EXPECT_EQ(parser.serialize({}), "null");
EXPECT_EQ(parser.serialize<char8_t>({}), u8"null");
}
TEST(JsonParser, serialize_boolean) {
json_parser parser;
EXPECT_EQ(parser.serialize(true), "true");
EXPECT_EQ(parser.serialize(false), "false");
EXPECT_EQ(parser.serialize<char8_t>(true), u8"true");
EXPECT_EQ(parser.serialize<char8_t>(false), u8"false");
}
TEST(JsonParser, serialize_integer) {
json_parser parser;
EXPECT_EQ(parser.serialize(1234), "1234");
EXPECT_EQ(parser.serialize<char8_t>(1234), u8"1234");
}
TEST(JsonParser, serialize_decimal) {
json_parser parser;
EXPECT_DOUBLE_EQ(std::atof(parser.serialize(12.34).c_str()), 12.34);
EXPECT_DOUBLE_EQ(std::atof(parser.serialize(1234.0).c_str()), 1234.0);
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize<char8_t>(12.34).c_str())), 12.34);
EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize<char8_t>(1234.0).c_str())), 1234.0);
}
// necessary due to some sort of bug with EXPECT_EQ on MSVC
@ -56,10 +56,15 @@ void expect_eq(LeftT in_left, RightT in_right) {
TEST(JsonParser, serialize_string) {
json_parser parser;
EXPECT_EQ(parser.serialize(u8"text"), R"json("text")json");
expect_eq(parser.serialize(u8"\"text\""), R"json("\"text\"")json");
expect_eq(parser.serialize(u8"\"te\x01xt\""), R"json("\"te\u0001xt\"")json");
expect_eq(parser.serialize(u8"\"te\x10xt\""), R"json("\"te\u0010xt\"")json");
EXPECT_EQ(parser.serialize<char8_t>(u8"text"), u8R"json("text")json");
expect_eq(parser.serialize<char8_t>(u8"\"text\""), u8R"json("\"text\"")json");
expect_eq(parser.serialize<char8_t>(u8"\"te\x01xt\""), u8R"json("\"te\u0001xt\"")json");
expect_eq(parser.serialize<char8_t>(u8"\"te\x10xt\""), u8R"json("\"te\u0010xt\"")json");
EXPECT_EQ(parser.serialize<char8_t>(u8"text"), u8R"json("text")json");
EXPECT_EQ(parser.serialize<char16_t>(u8"text"), uR"json("text")json");
EXPECT_EQ(parser.serialize<char32_t>(u8"text"), UR"json("text")json");
EXPECT_EQ(parser.serialize<wchar_t>(u8"text"), LR"json("text")json");
}
TEST(JsonParser, serialize_array) {
@ -71,8 +76,8 @@ TEST(JsonParser, serialize_array) {
object{}
};
EXPECT_EQ(parser.serialize(array),
R"json([true,1234,"text",null])json");
EXPECT_EQ(parser.serialize<char8_t>(array),
u8R"json([true,1234,"text",null])json");
}
TEST(JsonParser, serialize_map) {
@ -84,44 +89,44 @@ TEST(JsonParser, serialize_map) {
obj[u8"some_string"] = u8"text";
obj[u8"some_null"];
EXPECT_EQ(parser.serialize(obj),
R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json");
EXPECT_EQ(parser.serialize<char8_t>(obj),
u8R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json");
}
TEST(JsonParser, deserialize_null) {
json_parser parser;
EXPECT_EQ(parser.deserialize("null"sv), object{});
EXPECT_EQ(parser.deserialize(u8"null"sv), object{});
}
TEST(JsonParser, deserialize_boolean) {
json_parser parser;
EXPECT_EQ(parser.deserialize("true"sv), true);
EXPECT_EQ(parser.deserialize("false"sv), false);
EXPECT_EQ(parser.deserialize(u8"true"sv), true);
EXPECT_EQ(parser.deserialize(u8"false"sv), false);
}
TEST(JsonParser, deserialize_integer) {
json_parser parser;
EXPECT_EQ(parser.deserialize("1234"sv), 1234);
EXPECT_EQ(parser.deserialize("-1234"sv), -1234);
EXPECT_EQ(parser.deserialize(u8"1234"sv), 1234);
EXPECT_EQ(parser.deserialize(u8"-1234"sv), -1234);
}
TEST(JsonParser, deserialize_decimal) {
json_parser parser;
EXPECT_DOUBLE_EQ(parser.deserialize("12.34"sv).get<double>(), 12.34);
EXPECT_DOUBLE_EQ(parser.deserialize("1234."sv).get<double>(), 1234.0);
EXPECT_DOUBLE_EQ(parser.deserialize("0.1234"sv).get<double>(), 0.1234);
EXPECT_THROW(parser.deserialize(".1234"sv), std::invalid_argument);
EXPECT_DOUBLE_EQ(parser.deserialize("-12.34"sv).get<double>(), -12.34);
EXPECT_DOUBLE_EQ(parser.deserialize(u8"12.34"sv).get<double>(), 12.34);
EXPECT_DOUBLE_EQ(parser.deserialize(u8"1234."sv).get<double>(), 1234.0);
EXPECT_DOUBLE_EQ(parser.deserialize(u8"0.1234"sv).get<double>(), 0.1234);
EXPECT_THROW(parser.deserialize(u8".1234"sv), std::invalid_argument);
EXPECT_DOUBLE_EQ(parser.deserialize(u8"-12.34"sv).get<double>(), -12.34);
}
TEST(JsonParser, deserialize_string) {
json_parser parser;
EXPECT_EQ(parser.deserialize(R"json("text")json"sv), u8"text");
EXPECT_EQ(parser.deserialize(u8R"json("text")json"sv), u8"text");
object obj;
std::u8string_view u8text = u8R"json("text")json"sv;
@ -143,14 +148,14 @@ TEST(JsonParser, deserialize_string) {
TEST(JsonParser, deserialize_array) {
json_parser parser;
constexpr const char* json_data = R"json([
constexpr std::u8string_view json_data = u8R"json([
true,
false,
1234,
12.34,
0.1234,
"text"
])json";
])json"sv;
auto array = parser.deserialize(json_data).get<std::vector<object>>();
ASSERT_EQ(array.size(), 6U);
@ -165,7 +170,7 @@ TEST(JsonParser, deserialize_array) {
TEST(JsonParser, deserialize_array_nested) {
json_parser parser;
constexpr const char* json_data = R"json([
constexpr std::u8string_view json_data = u8R"json([
true,
false,
1234
@ -178,7 +183,7 @@ TEST(JsonParser, deserialize_array_nested) {
12.34,
0.1234,
"text"
])json";
])json"sv;
auto array = parser.deserialize(json_data).get<std::vector<object>>();
ASSERT_EQ(array.size(), 9U);
@ -211,14 +216,14 @@ TEST(JsonParser, deserialize_array_nested) {
TEST(JsonParser, deserialize_map) {
json_parser parser;
constexpr const char* json_data = R"json({
constexpr std::u8string_view json_data = u8R"json({
"some_true":true,
"some_false" : false,
"some_int": 1234,
"some_double" : 12.34,
"some_other_double" :0.1234,
"some_text" : "text"
})json";
})json"sv;
object obj = parser.deserialize(json_data);
EXPECT_EQ(obj.size(), 6U);
@ -233,7 +238,7 @@ TEST(JsonParser, deserialize_map) {
TEST(JsonParser, deserialize_map_nested) {
json_parser parser;
constexpr const char* json_data = R"json({
constexpr std::u8string_view json_data = u8R"json({
"some_text" : "text",
"some_object" : {
"some_null_object": {}
@ -246,7 +251,7 @@ TEST(JsonParser, deserialize_map_nested) {
"makes toot": true
}},
"some other text":" asdf "
})json";
})json"sv;
object obj = parser.deserialize(json_data);
EXPECT_EQ(obj.size(), 4U);

14
src/test/unicode.cpp

@ -128,12 +128,18 @@ TEST(UTF32Test, decode_codepoint) {
DECODE_CODEPOINT_TEST(U"\U0001F604"sv, U'\U0001F604', 1U);
}
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
#ifdef JESSILIB_CHAR_AS_UTF8
using char_type_combos = ::testing::Types<
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>,
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#else // JESSILIB_CHAR_AS_UTF8
using char_type_combos = ::testing::Types<
std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#endif // JESSILIB_CHAR_AS_UTF8
template<typename T>
class UnicodeFullTest : public ::testing::Test {
@ -157,9 +163,9 @@ TYPED_TEST(UnicodeFullTest, string_cast) {
}
TEST(UTF8Test, string_view_cast) {
auto abcd_str = jessilib::string_cast<char8_t>(U"ABCD");
auto view = string_view_cast<char>(abcd_str);
EXPECT_TRUE(equals(view, abcd_str));
std::string_view abcd_str = "ABCD";
auto view = string_view_cast<char8_t>(abcd_str);
EXPECT_TRUE(equals(view, u8"ABCD"sv));
}
/** equals */

23
src/test/unicode_sequence.cpp

@ -34,13 +34,20 @@ ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("test"s) == "test"s);
ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("\\r\\n"s) == "\r\n"s);
#endif // __cpp_lib_constexpr_string
#ifdef JESSILIB_CHAR_AS_UTF8
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
using utf8_char_types = ::testing::Types<char, char8_t>;
using char_type_combos = ::testing::Types<
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>,
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#else // JESSILIB_CHAR_AS_UTF8
using char_types = ::testing::Types<char8_t, char16_t, char32_t>;
using char_type_combos = ::testing::Types<
std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#endif // JESSILIB_CHAR_AS_UTF8
template<typename T>
class UnicodeSequenceTest : public ::testing::Test {
@ -51,8 +58,8 @@ TYPED_TEST_SUITE(UnicodeSequenceTest, char_types);
constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing
#define TEST_CPP_SEQUENCE(expr) \
{ auto parsed_string = jessilib::string_cast<TypeParam>(#expr); \
auto normal_string = jessilib::string_cast<TypeParam>(expr); \
{ auto parsed_string = jessilib::string_cast<TypeParam>(reinterpret_cast<const char8_t*>(#expr)); \
auto normal_string = jessilib::string_cast<TypeParam>(reinterpret_cast<const char8_t*>(expr)); \
parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \
jessilib::apply_cpp_escape_sequences(parsed_string); \
EXPECT_EQ(parsed_string, normal_string); }
@ -130,7 +137,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 2; ++min_length) {
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -145,7 +152,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 4; ++min_length) {
// "\x0" -> "\xffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -161,7 +168,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 8; ++min_length) {
// "\x0" -> "\x0010ffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -175,7 +182,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
TYPED_TEST(UnicodeSequenceTest, cpp_u16) {
// "u0000" -> "uffff" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\u");
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>(u8"\\u");
parsed_string += make_hex_string<TypeParam>(codepoint, 4);
jessilib::apply_cpp_escape_sequences(parsed_string);
@ -188,7 +195,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_u16) {
TYPED_TEST(UnicodeSequenceTest, cpp_u32) {
// "U00000000" -> "U000100FF" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= MAX_LOOP_CODEPOINT; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\U");
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>(u8"\\U");
parsed_string += make_hex_string<TypeParam>(codepoint, 8);
jessilib::apply_cpp_escape_sequences(parsed_string);

Loading…
Cancel
Save