Browse Source

Initial pass for removing 'char' support from unicode methods, expected to be replaced by another mechanism

master
Jessica James 3 years ago
parent
commit
3a12fc56f8
  1. 8
      src/common/parser/parser.cpp
  2. 67
      src/common/parsers/json.cpp
  3. 14
      src/common/serialize.cpp
  4. 2
      src/include/jessilib/http_query.hpp
  5. 4
      src/include/jessilib/parser.hpp
  6. 25
      src/include/jessilib/parsers/json.hpp
  7. 10
      src/include/jessilib/serialize.hpp
  8. 32
      src/include/jessilib/unicode.hpp
  9. 14
      src/include/jessilib/unicode_base.hpp
  10. 8
      src/include/jessilib/unicode_compare.hpp
  11. 9
      src/include/jessilib/unicode_syntax.hpp
  12. 80
      src/test/http_query.cpp
  13. 36
      src/test/parser.cpp
  14. 54
      src/test/parsers/json.cpp
  15. 14
      src/test/unicode.cpp
  16. 23
      src/test/unicode_sequence.cpp

8
src/common/parser/parser.cpp

@ -17,12 +17,13 @@
*/ */
#include "parser.hpp" #include "parser.hpp"
#include "unicode.hpp"
#include <istream> #include <istream>
namespace jessilib { namespace jessilib {
object parser::deserialize(std::istream& in_stream) { object parser::deserialize(std::istream& in_stream) {
std::vector<char> data; std::vector<char8_t> data;
// Read entire stream into data // Read entire stream into data
char buffer[1024]; char buffer[1024];
@ -32,11 +33,12 @@ object parser::deserialize(std::istream& in_stream) {
} }
// Pass data to deserialize // Pass data to deserialize
return deserialize(std::string_view{ &data.front(), data.size() }); return deserialize(std::u8string_view{ &data.front(), data.size() });
} }
void parser::serialize(std::ostream& in_stream, const object& in_object) { void parser::serialize(std::ostream& in_stream, const object& in_object) {
in_stream << serialize(in_object); // TODO: replace this method
in_stream << jessilib::string_view_cast<char>(serialize(in_object));
} }
} // namespace jessilib } // namespace jessilib

67
src/common/parsers/json.cpp

@ -23,8 +23,9 @@ using namespace std::literals;
namespace jessilib { namespace jessilib {
std::string make_json_string(std::u8string_view in_string) { template<typename CharT>
std::string result; std::basic_string<CharT> make_json_string(std::u8string_view in_string) {
std::basic_string<CharT> result;
result.reserve(in_string.size() + 2); result.reserve(in_string.size() + 2);
result = '\"'; result = '\"';
@ -39,23 +40,34 @@ std::string make_json_string(std::u8string_view in_string) {
result += '\"'; result += '\"';
} }
else if (decode.codepoint < 0x20) { // control characters else if (decode.codepoint < 0x20) { // control characters
result += "\\u0000"sv; result += '\\';
result += 'u';
result += '0';
result += '0';
// overwrite last 2 zeroes with correct hexadecimal sequence // overwrite last 2 zeroes with correct hexadecimal sequence
char* data_end = result.data() + result.size(); char data[2]; // Will only ever use 2 chars
char* data = data_end - 2; // Will only ever use 2 chars char* data_end = data + sizeof(data);
auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16); auto to_chars_result = std::to_chars(data, data_end, static_cast<uint32_t>(decode.codepoint), 16);
if (to_chars_result.ec == std::errc{} && to_chars_result.ptr != data_end) { if (to_chars_result.ptr == data) {
// Only 1 byte written; shift it over // No bytes written
*to_chars_result.ptr = *(to_chars_result.ptr - 1); result += '0';
result += '0';
// And fill in the zeroes }
*(to_chars_result.ptr - 1) = '0'; else if (to_chars_result.ptr != data_end) {
// 1 byte written
result += '0';
result += data[0];
}
else {
// 2 bytes written
result += data[0];
result += data[1];
} }
} }
else { else {
// Valid UTF-8 sequence; copy it over // Valid UTF-8 sequence; copy it over
result.append(reinterpret_cast<const char*>(in_string.data()), decode.units); result.append(in_string.data(), decode.units);
} }
in_string.remove_prefix(decode.units); in_string.remove_prefix(decode.units);
@ -67,41 +79,42 @@ std::string make_json_string(std::u8string_view in_string) {
object json_parser::deserialize(std::string_view in_data) { object json_parser::deserialize(std::u8string_view in_data) {
object result; object result;
deserialize_json<char, true>(result, in_data); std::u8string_view data_view = jessilib::string_view_cast<char8_t>(in_data);
deserialize_json<char8_t, true>(result, data_view);
return result; return result;
} }
std::string json_parser::serialize(const object& in_object) { std::u8string json_parser::serialize(const object& in_object) {
static const object::array_type s_null_array; static const object::array_type s_null_array;
static const object::map_type s_null_map; static const object::map_type s_null_map;
switch (in_object.type()) { switch (in_object.type()) {
case object::type::null: case object::type::null:
return "null"s; return u8"null"s;
case object::type::boolean: case object::type::boolean:
if (in_object.get<bool>()) { if (in_object.get<bool>()) {
return "true"s; return u8"true"s;
} }
return "false"s; return u8"false"s;
case object::type::integer: case object::type::integer:
return std::to_string(in_object.get<intmax_t>()); return static_cast<std::u8string>(jessilib::string_view_cast<char8_t>(std::to_string(in_object.get<intmax_t>())));
case object::type::decimal: case object::type::decimal:
return std::to_string(in_object.get<long double>()); return static_cast<std::u8string>(jessilib::string_view_cast<char8_t>(std::to_string(in_object.get<long double>())));
case object::type::text: case object::type::text:
return make_json_string(in_object.get<std::u8string>()); return make_json_string<char8_t>(in_object.get<std::u8string>());
case object::type::array: { case object::type::array: {
if (in_object.size() == 0) { if (in_object.size() == 0) {
return "[]"s; return u8"[]"s;
} }
std::string result; std::u8string result;
result = '['; result = '[';
// Serialize all objects in array // Serialize all objects in array
@ -117,16 +130,16 @@ std::string json_parser::serialize(const object& in_object) {
case object::type::map: { case object::type::map: {
if (in_object.size() == 0) { if (in_object.size() == 0) {
return "{}"s; return u8"{}"s;
} }
std::string result; std::u8string result;
result = '{'; result = '{';
// Serialize all objects in map // Serialize all objects in map
for (auto& item : in_object.get<object::map_type>(s_null_map)) { for (auto& item : in_object.get<object::map_type>(s_null_map)) {
result += make_json_string(item.first); result += make_json_string<char8_t>(item.first);
result += ":"sv; result += ':';
result += json_parser::serialize(item.second); result += json_parser::serialize(item.second);
result += ','; result += ',';
} }

14
src/common/serialize.cpp

@ -41,15 +41,15 @@ std::shared_ptr<parser> get_parser(const std::string& in_format) {
} }
/** Deserialization */ /** Deserialization */
object deserialize_object(const std::string& in_data, const std::string& in_format) { object deserialize_object(const std::u8string& in_data, const std::string& in_format) {
return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format); return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format);
} }
object deserialize_object(const std::vector<char>& in_data, const std::string& in_format) { object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format) {
return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format); return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format);
} }
object deserialize_object(std::string_view in_data, const std::string& in_format) { object deserialize_object(std::u8string_view in_data, const std::string& in_format) {
return get_parser(in_format)->deserialize(in_data); return get_parser(in_format)->deserialize(in_data);
} }
@ -58,7 +58,7 @@ object deserialize_object(std::istream& in_stream, const std::string& in_format)
} }
/** Serialization */ /** Serialization */
std::string serialize_object(const object& in_object, const std::string& in_format) { std::u8string serialize_object(const object& in_object, const std::string& in_format) {
return get_parser(in_format)->serialize(in_object); return get_parser(in_format)->serialize(in_object);
} }
@ -66,4 +66,4 @@ void serialize_object(std::ostream& in_stream, const object& in_object, const st
get_parser(in_format)->serialize(in_stream, in_object); get_parser(in_format)->serialize(in_stream, in_object);
} }
} // namespace jessilib } // namespace jessilib

2
src/include/jessilib/http_query.hpp

@ -166,7 +166,7 @@ constexpr bool deserialize_html_form(ContainerT& out_container, std::basic_strin
HTMLFormContext<CharT, ContainerT> context{ out_container, inout_string.data() }; HTMLFormContext<CharT, ContainerT> context{ out_container, inout_string.data() };
constexpr auto& html_form_tree = html_form_root_tree<CharT, ContainerT>; constexpr auto& html_form_tree = html_form_root_tree<CharT, ContainerT>;
static_assert(is_sorted<char, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted"); static_assert(is_sorted<CharT, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted");
std::basic_string_view<CharT> read_view{ inout_string }; std::basic_string_view<CharT> read_view{ inout_string };
if (apply_syntax_tree<CharT, decltype(context), html_form_tree, std::size(html_form_tree), html_form_default_action> if (apply_syntax_tree<CharT, decltype(context), html_form_tree, std::size(html_form_tree), html_form_default_action>

4
src/include/jessilib/parser.hpp

@ -38,9 +38,9 @@ public:
* @return A valid (possibly null) object * @return A valid (possibly null) object
*/ */
virtual object deserialize(std::istream& in_stream); virtual object deserialize(std::istream& in_stream);
virtual object deserialize(std::string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings virtual object deserialize(std::u8string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings
virtual void serialize(std::ostream& in_stream, const object& in_object); virtual void serialize(std::ostream& in_stream, const object& in_object);
virtual std::string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings virtual std::u8string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings
}; // parser }; // parser
template<typename T> template<typename T>

25
src/include/jessilib/parsers/json.hpp

@ -29,8 +29,8 @@ namespace jessilib {
class json_parser : public parser { class json_parser : public parser {
public: public:
/** deserialize/serialize overrides */ /** deserialize/serialize overrides */
virtual object deserialize(std::string_view in_data) override; virtual object deserialize(std::u8string_view in_data) override;
virtual std::string serialize(const object& in_object) override; virtual std::u8string serialize(const object& in_object) override;
}; };
/** /**
@ -101,7 +101,9 @@ constexpr syntax_tree_member<CharT, ContextT> make_keyword_value_pair() {
// Unexpected character; throw if appropriate // Unexpected character; throw if appropriate
if constexpr (ContextT::use_exceptions) { if constexpr (ContextT::use_exceptions) {
using namespace std::literals; using namespace std::literals;
throw std::invalid_argument{ jessilib::join<std::string>("Invalid JSON data; unexpected token: '"sv, inout_read_view, "' when parsing null"sv) }; throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
inout_read_view,
u8"' when parsing null"sv) };
} }
return std::numeric_limits<size_t>::max(); return std::numeric_limits<size_t>::max();
@ -164,7 +166,8 @@ size_t string_start_action(ContextT& inout_context, std::basic_string_view<CharT
if constexpr (ContextT::use_exceptions) { if constexpr (ContextT::use_exceptions) {
using namespace std::literals; using namespace std::literals;
throw std::invalid_argument { throw std::invalid_argument {
jessilib::join_mbstring("Invalid JSON data; invalid token or end of string: "sv, std::u8string_view{ string_data }) jessilib::join_mbstring(u8"Invalid JSON data; invalid token or end of string: "sv,
std::u8string_view{ string_data })
}; };
} }
@ -239,7 +242,9 @@ size_t array_start_action(ContextT& inout_context, std::basic_string_view<CharT>
// Invalid JSON! // Invalid JSON!
if constexpr (ContextT::use_exceptions) { if constexpr (ContextT::use_exceptions) {
using namespace std::literals; using namespace std::literals;
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data: expected ',' or ']', instead encountered: "sv, inout_read_view) }; throw std::invalid_argument{ jessilib::join_mbstring(
u8"Invalid JSON data: expected ',' or ']', instead encountered: "sv,
inout_read_view) };
} }
return std::numeric_limits<size_t>::max(); return std::numeric_limits<size_t>::max();
@ -285,9 +290,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view<Cha
// Assert that we've reached the start of a key // Assert that we've reached the start of a key
if (front != '\"') { if (front != '\"') {
if constexpr (ContextT::use_exceptions) { if constexpr (ContextT::use_exceptions) {
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data; unexpected token: '"sv, throw std::invalid_argument{
jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
decode_codepoint(inout_read_view).codepoint, decode_codepoint(inout_read_view).codepoint,
"' when parsing object map (expected '\"' instead)"sv) }; u8"' when parsing object map (expected '\"' instead)"sv) };
} }
return std::numeric_limits<size_t>::max(); return std::numeric_limits<size_t>::max();
@ -312,9 +318,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view<Cha
} }
front = inout_read_view.front(); front = inout_read_view.front();
if (front != ':') { if (front != ':') {
throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data; unexpected token: '"sv, throw std::invalid_argument{
jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv,
decode_codepoint(inout_read_view).codepoint, decode_codepoint(inout_read_view).codepoint,
"' when parsing map key (expected ':' instead)"sv) }; u8"' when parsing map key (expected ':' instead)"sv) };
} }
inout_read_view.remove_prefix(1); // strip ':' inout_read_view.remove_prefix(1); // strip ':'

10
src/include/jessilib/serialize.hpp

@ -29,13 +29,13 @@ public:
}; };
/** Deserialization */ /** Deserialization */
object deserialize_object(const std::string& in_data, const std::string& in_format); object deserialize_object(const std::u8string& in_data, const std::string& in_format);
object deserialize_object(const std::vector<char>& in_data, const std::string& in_format); object deserialize_object(const std::vector<char8_t>& in_data, const std::string& in_format);
object deserialize_object(std::string_view in_data, const std::string& in_format); object deserialize_object(std::u8string_view in_data, const std::string& in_format);
object deserialize_object(std::istream& in_stream, const std::string& in_format); object deserialize_object(std::istream& in_stream, const std::string& in_format);
/** Serialization */ /** Serialization */
std::string serialize_object(const object& in_object, const std::string& in_format); std::u8string serialize_object(const object& in_object, const std::string& in_format);
void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format); void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format);
} // namespace jessilib } // namespace jessilib

32
src/include/jessilib/unicode.hpp

@ -112,24 +112,30 @@ bool is_valid(const InT& in_string) {
template<typename OutCharT, typename InT> template<typename OutCharT, typename InT>
std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) { std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) {
using InCharT = typename impl_unicode::is_string<InT>::type; using InCharT = typename impl_unicode::is_string<InT>::type;
size_t in_string_bytes = in_string.size() * sizeof(InCharT);
if constexpr (sizeof(OutCharT) > sizeof(InCharT)) { if constexpr (sizeof(InCharT) == sizeof(OutCharT)) {
// The output type is larger than the input type; verify no partial codepoints return { reinterpret_cast<const OutCharT*>(in_string.data()), in_string.size() };
if (in_string_bytes % sizeof(OutCharT) != 0) {
// This cannot be used to produce a valid result
return {};
}
} }
else {
size_t in_string_bytes = in_string.size() * sizeof(InCharT);
if constexpr (sizeof(OutCharT) > sizeof(InCharT)) {
// The output type is larger than the input type; verify no partial codepoints
if (in_string_bytes % sizeof(OutCharT) != 0) {
// This cannot be used to produce a valid result
return {};
}
}
size_t out_string_units = in_string_bytes / sizeof(OutCharT); size_t out_string_units = in_string_bytes / sizeof(OutCharT);
const OutCharT* data_begin = reinterpret_cast<const OutCharT*>(in_string.data()); const OutCharT* data_begin = reinterpret_cast<const OutCharT*>(in_string.data());
return { data_begin, out_string_units }; return { data_begin, out_string_units };
}
} }
template<typename OutCharT, typename InT> template<typename OutCharT, typename InT>
std::basic_string<OutCharT> string_cast(const InT& in_string) { std::basic_string<OutCharT> string_cast(const InT& in_string) {
static_assert(impl_unicode::is_string<InT>::value == true); static_assert(impl_unicode::is_string<InT>::value == true);
using InCharT = typename impl_unicode::is_string<InT>::type; using InCharT = std::remove_cvref_t<typename impl_unicode::is_string<InT>::type>;
using InEquivalentT = typename unicode_traits<InCharT>::equivalent_type; using InEquivalentT = typename unicode_traits<InCharT>::equivalent_type;
using InViewT = std::basic_string_view<InCharT>; using InViewT = std::basic_string_view<InCharT>;
using OutT = std::basic_string<OutCharT>; using OutT = std::basic_string<OutCharT>;
@ -392,7 +398,7 @@ size_t findi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<
ADAPT_BASIC_STRING(findi) ADAPT_BASIC_STRING(findi)
using find_if_predicate_type = bool(*)(char32_t, char*, size_t); /*using find_if_predicate_type = bool(*)(char32_t, char*, size_t);
inline void find_if(std::basic_string<char>& in_string, find_if_predicate_type in_predicate) { inline void find_if(std::basic_string<char>& in_string, find_if_predicate_type in_predicate) {
using CharT = char; using CharT = char;
CharT* ptr = in_string.data(); CharT* ptr = in_string.data();
@ -422,7 +428,7 @@ inline void find_if(std::basic_string_view<char>& in_string, find_if_view_predic
in_string_view.remove_prefix(decode.units); in_string_view.remove_prefix(decode.units);
ptr += decode.units; ptr += decode.units;
} }
} }*/
namespace impl_join { namespace impl_join {

14
src/include/jessilib/unicode_base.hpp

@ -70,7 +70,7 @@ constexpr size_t encode_codepoint(CharT* out_buffer, char32_t in_codepoint);
std::u8string encode_codepoint_u8(char32_t in_codepoint); std::u8string encode_codepoint_u8(char32_t in_codepoint);
std::u16string encode_codepoint_u16(char32_t in_codepoint); std::u16string encode_codepoint_u16(char32_t in_codepoint);
std::u32string encode_codepoint_u32(char32_t in_codepoint); std::u32string encode_codepoint_u32(char32_t in_codepoint);
std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32 std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32W
/** decode_codepoint */ /** decode_codepoint */
@ -130,15 +130,21 @@ constexpr decode_result decode_surrogate_pair(char16_t in_high_surrogate, char16
template<typename CharT> template<typename CharT>
struct unicode_traits : std::false_type {}; struct unicode_traits : std::false_type {};
#ifdef JESSILIB_CHAR_AS_UTF8
template<> template<>
struct unicode_traits<char> : std::true_type { struct unicode_traits<char> : std::true_type {
using equivalent_type = char8_t; // DEPRECATE using equivalent_type = char8_t; // DEPRECATE
static constexpr size_t max_units_per_codepoint = 4; static constexpr size_t max_units_per_codepoint = 4;
}; };
#endif // JESSILIB_CHAR_AS_UTF8
template<> template<>
struct unicode_traits<char8_t> : std::true_type { struct unicode_traits<char8_t> : std::true_type {
using equivalent_type = char; // DEPRECATE #ifdef JESSILIB_CHAR_AS_UTF8
using equivalent_type = char;
#else // JESSILIB_CHAR_AS_UTF8
using equivalent_type = char8_t;
#endif // JESSILIB_CHAR_AS_UTF8
static constexpr size_t max_units_per_codepoint = 4; static constexpr size_t max_units_per_codepoint = 4;
}; };
@ -338,9 +344,11 @@ constexpr size_t encode_codepoint_utf(T& out_destination, char32_t in_codepoint)
else if constexpr (std::is_same_v<CharT, wchar_t>) { else if constexpr (std::is_same_v<CharT, wchar_t>) {
return encode_codepoint_w<T>(out_destination, in_codepoint); return encode_codepoint_w<T>(out_destination, in_codepoint);
} }
#ifdef JESSILIB_CHAR_AS_UTF8
else if constexpr (std::is_same_v<CharT, char>) { else if constexpr (std::is_same_v<CharT, char>) {
return encode_codepoint_utf8<CharT, T>(out_destination, in_codepoint); return encode_codepoint_utf8<CharT, T>(out_destination, in_codepoint);
} }
#endif // JESSILIB_CHAR_AS_UTF8
} }
template<typename CharT> template<typename CharT>
@ -476,9 +484,11 @@ constexpr decode_result decode_codepoint(std::basic_string_view<CharT> in_string
return decode_codepoint_utf32<wchar_t>(in_string); return decode_codepoint_utf32<wchar_t>(in_string);
} }
} }
#ifdef JESSILIB_CHAR_AS_UTF8
else if constexpr (std::is_same_v<CharT, char>) { else if constexpr (std::is_same_v<CharT, char>) {
return decode_codepoint_utf8(in_string); return decode_codepoint_utf8(in_string);
} }
#endif // JESSILIB_CHAR_AS_UTF8
} }
template<typename CharT> template<typename CharT>

8
src/include/jessilib/unicode_compare.hpp

@ -285,6 +285,7 @@ struct text_hash {
return hash; return hash;
} }
#ifdef JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8 auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
} }
@ -292,6 +293,7 @@ struct text_hash {
auto operator()(std::basic_string_view<char> in_key) const noexcept { auto operator()(std::basic_string_view<char> in_key) const noexcept {
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
} }
#endif // JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8 auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
@ -321,10 +323,12 @@ struct text_hash {
struct text_equal { struct text_equal {
using is_transparent = std::true_type; using is_transparent = std::true_type;
#ifdef JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT> template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs); return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
} }
#endif // JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT> template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
@ -370,6 +374,7 @@ struct text_hashi {
return hash; return hash;
} }
#ifdef JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8 auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
} }
@ -377,6 +382,7 @@ struct text_hashi {
auto operator()(std::basic_string_view<char> in_key) const noexcept { auto operator()(std::basic_string_view<char> in_key) const noexcept {
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
} }
#endif // JESSILIB_CHAR_AS_UTF8
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8 auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
return hash(in_key.data(), in_key.data() + in_key.size()); return hash(in_key.data(), in_key.data() + in_key.size());
@ -406,10 +412,12 @@ struct text_hashi {
struct text_equali { struct text_equali {
using is_transparent = std::true_type; using is_transparent = std::true_type;
#ifdef JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT> template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs); return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
} }
#endif // JESSILIB_CHAR_AS_UTF8
template<typename LhsCharT, typename RhsCharT> template<typename LhsCharT, typename RhsCharT>
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {

9
src/include/jessilib/unicode_syntax.hpp

@ -25,7 +25,7 @@
#pragma once #pragma once
#include "unicode_base.hpp" #include "unicode.hpp"
namespace jessilib { namespace jessilib {
@ -76,10 +76,9 @@ template<typename CharT, typename ContextT, bool UseExceptionsV = false>
size_t fail_action(decode_result, ContextT&, std::basic_string_view<CharT>& in_read_view) { size_t fail_action(decode_result, ContextT&, std::basic_string_view<CharT>& in_read_view) {
using namespace std::literals; using namespace std::literals;
if constexpr (UseExceptionsV) { if constexpr (UseExceptionsV) {
std::string exception = "Invalid parse data; unexpected token: '"s; throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid parse data; unexpected token: '"sv,
jessilib::encode_codepoint(exception, in_read_view.front()); jessilib::decode_codepoint(in_read_view).codepoint,
exception += "' when parsing data"; u8"' when parsing data"sv) };
throw std::invalid_argument{ exception };
} }
return std::numeric_limits<size_t>::max(); return std::numeric_limits<size_t>::max();
} }

80
src/test/http_query.cpp

@ -22,10 +22,10 @@
using namespace std::literals; using namespace std::literals;
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string // Compile-time tests for constexpr on compilers which support C++20 constexpr std::u8string
#if defined(__cpp_lib_constexpr_string) && (__GNUC__ >= 12 || _MSC_VER >= 1929) #if defined(__cpp_lib_constexpr_string) && (__GNUC__ >= 12 || _MSC_VER >= 1929)
constexpr std::string query_constexpr(std::string_view in_expression) { constexpr std::u8string query_constexpr(std::u8string_view in_expression) {
std::string result{ static_cast<std::string>(in_expression) }; std::u8string result{ static_cast<std::u8string>(in_expression) };
jessilib::deserialize_http_query(result); jessilib::deserialize_http_query(result);
return result; return result;
} }
@ -34,8 +34,8 @@ ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first+second"s) == "first seco
ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first%20second"s) == "first second"s); ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first%20second"s) == "first second"s);
#endif // __cpp_lib_constexpr_string #endif // __cpp_lib_constexpr_string
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>; using char_types = ::testing::Types</*char,*/ char8_t, char16_t, char32_t>;
using utf8_char_types = ::testing::Types<char, char8_t>; using utf8_char_types = ::testing::Types</*char,*/ char8_t>;
template<typename T> template<typename T>
class QuerySequenceTest : public ::testing::Test { class QuerySequenceTest : public ::testing::Test {
@ -162,77 +162,77 @@ TYPED_TEST(QuerySequenceTest, invalids_2len_trailing) {
} }
TEST(HtmlFormParser, empty) { TEST(HtmlFormParser, empty) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text; std::u8string query_text;
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.empty()); EXPECT_TRUE(query_text.empty());
EXPECT_TRUE(parsed_result.empty()); EXPECT_TRUE(parsed_result.empty());
} }
TEST(HtmlFormParser, one_key) { TEST(HtmlFormParser, one_key) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "key"; std::u8string query_text = u8"key";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_EQ(query_text, "key"); EXPECT_EQ(query_text, u8"key");
EXPECT_EQ(parsed_result.size(), 1); EXPECT_EQ(parsed_result.size(), 1);
EXPECT_EQ(parsed_result[0].first, query_text); EXPECT_EQ(parsed_result[0].first, query_text);
EXPECT_TRUE(parsed_result[0].second.empty()); EXPECT_TRUE(parsed_result[0].second.empty());
} }
TEST(HtmlFormParser, one_key_and_value) { TEST(HtmlFormParser, one_key_and_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "key=value"; std::u8string query_text = u8"key=value";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvalue")); EXPECT_TRUE(query_text.starts_with(u8"keyvalue"));
EXPECT_EQ(parsed_result.size(), 1); EXPECT_EQ(parsed_result.size(), 1);
EXPECT_EQ(parsed_result[0].first, "key"); EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, "value"); EXPECT_EQ(parsed_result[0].second, u8"value");
} }
TEST(HtmlFormParser, one_key_and_value_trailing) { TEST(HtmlFormParser, one_key_and_value_trailing) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "key=value&"; std::u8string query_text = u8"key=value&";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvalue")); EXPECT_TRUE(query_text.starts_with(u8"keyvalue"));
EXPECT_EQ(parsed_result.size(), 2); EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key"); EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, "value"); EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_TRUE(parsed_result[1].first.empty()); EXPECT_TRUE(parsed_result[1].first.empty());
EXPECT_TRUE(parsed_result[1].second.empty()); EXPECT_TRUE(parsed_result[1].second.empty());
} }
TEST(HtmlFormParser, two_key_one_value) { TEST(HtmlFormParser, two_key_one_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "key=value&second_key"; std::u8string query_text = u8"key=value&second_key";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_key")); EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_key"));
EXPECT_EQ(parsed_result.size(), 2); EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key"); EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, "value"); EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_EQ(parsed_result[1].first, "second_key"); EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_TRUE(parsed_result[1].second.empty()); EXPECT_TRUE(parsed_result[1].second.empty());
} }
TEST(HtmlFormParser, two_key_two_value) { TEST(HtmlFormParser, two_key_two_value) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "key=value&second_key=second=value"; std::u8string query_text = u8"key=value&second_key=second=value";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_keysecond=value")); EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_keysecond=value"));
EXPECT_EQ(parsed_result.size(), 2); EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "key"); EXPECT_EQ(parsed_result[0].first, u8"key");
EXPECT_EQ(parsed_result[0].second, "value"); EXPECT_EQ(parsed_result[0].second, u8"value");
EXPECT_EQ(parsed_result[1].first, "second_key"); EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_EQ(parsed_result[1].second, "second=value"); EXPECT_EQ(parsed_result[1].second, u8"second=value");
} }
TEST(HtmlFormParser, some_sequences) { TEST(HtmlFormParser, some_sequences) {
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; std::vector<std::pair<std::u8string_view, std::u8string_view>> parsed_result;
std::string query_text = "k+y=va+u%20&%73econd%5Fke%79=second_valu%65"; std::u8string query_text = u8"k+y=va+u%20&%73econd%5Fke%79=second_valu%65";
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
EXPECT_TRUE(query_text.starts_with("k yva u second_keysecond_value")); EXPECT_TRUE(query_text.starts_with(u8"k yva u second_keysecond_value"));
EXPECT_EQ(parsed_result.size(), 2); EXPECT_EQ(parsed_result.size(), 2);
EXPECT_EQ(parsed_result[0].first, "k y"); EXPECT_EQ(parsed_result[0].first, u8"k y");
EXPECT_EQ(parsed_result[0].second, "va u "); EXPECT_EQ(parsed_result[0].second, u8"va u ");
EXPECT_EQ(parsed_result[1].first, "second_key"); EXPECT_EQ(parsed_result[1].first, u8"second_key");
EXPECT_EQ(parsed_result[1].second, "second_value"); EXPECT_EQ(parsed_result[1].second, u8"second_value");
} }

36
src/test/parser.cpp

@ -30,11 +30,11 @@ using namespace std::literals;
class test_parser : public parser { class test_parser : public parser {
public: public:
/** deserialize/serialize overrides */ /** deserialize/serialize overrides */
virtual object deserialize(std::string_view in_data) override { virtual object deserialize(std::u8string_view in_data) override {
return deserialize_impl(in_data); return deserialize_impl(in_data);
} }
virtual std::string serialize(const object& in_object) override { virtual std::u8string serialize(const object& in_object) override {
return serialize_impl(in_object); return serialize_impl(in_object);
} }
@ -45,26 +45,26 @@ public:
} }
/** default serialize/deserialize implementations */ /** default serialize/deserialize implementations */
static std::string serialize_default(const object& in_object) { static std::u8string serialize_default(const object& in_object) {
if (in_object.has<std::u8string>()) { if (in_object.has<std::u8string>()) {
return string_cast<char>(in_object.get<std::u8string>()); return in_object.get<std::u8string>();
} }
return static_cast<std::string>(DEFAULT_SERIALIZE_RESULT); return static_cast<std::u8string>(DEFAULT_SERIALIZE_RESULT);
} }
static object deserialize_default(std::string_view in_data) { static object deserialize_default(std::u8string_view in_data) {
return object{ string_view_cast<char8_t>(in_data) }; return object{ string_view_cast<char8_t>(in_data) };
} }
/** static members */ /** static members */
static constexpr std::string_view DEFAULT_SERIALIZE_RESULT = "serialize_result"sv; static constexpr std::u8string_view DEFAULT_SERIALIZE_RESULT = u8"serialize_result"sv;
static std::function<std::string(const object&)> serialize_impl; static std::function<std::u8string(const object&)> serialize_impl;
static std::function<object(std::string_view)> deserialize_impl; static std::function<object(std::u8string_view)> deserialize_impl;
}; };
std::function<std::string(const object&)> test_parser::serialize_impl{ &serialize_default }; std::function<std::u8string(const object&)> test_parser::serialize_impl{ &serialize_default };
std::function<object(std::string_view)> test_parser::deserialize_impl{ &deserialize_default }; std::function<object(std::u8string_view)> test_parser::deserialize_impl{ &deserialize_default };
parser_registration<test_parser> test_parser_registration{ "test" }; parser_registration<test_parser> test_parser_registration{ "test" };
@ -84,30 +84,30 @@ class ParserTest : public base_test {
TEST_F(ParserTest, find_parser) { TEST_F(ParserTest, find_parser) {
EXPECT_NO_THROW(serialize_object(u8"test_data", "test")); EXPECT_NO_THROW(serialize_object(u8"test_data", "test"));
EXPECT_NO_THROW(deserialize_object("test_data"sv, "test")); EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test"));
EXPECT_THROW(serialize_object(u8"test_data", "invalid_format_test"), format_not_available); EXPECT_THROW(serialize_object(u8"test_data", "invalid_format_test"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "invalid_format_test"), format_not_available); EXPECT_THROW(deserialize_object(u8"test_data"sv, "invalid_format_test"), format_not_available);
} }
TEST_F(ParserTest, temp_parser) { TEST_F(ParserTest, temp_parser) {
EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available); EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available); EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available);
{ {
parser_registration<test_parser> test_tmp_registration{ "test_tmp" }; parser_registration<test_parser> test_tmp_registration{ "test_tmp" };
EXPECT_NO_THROW(serialize_object(u8"test_data", "test_tmp")); EXPECT_NO_THROW(serialize_object(u8"test_data", "test_tmp"));
EXPECT_NO_THROW(deserialize_object("test_data"sv, "test_tmp")); EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test_tmp"));
} }
EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available); EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available);
EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available); EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available);
} }
TEST_F(ParserTest, serialize) { TEST_F(ParserTest, serialize) {
EXPECT_EQ(serialize_object(u8"test_data", "test"), "test_data"); EXPECT_EQ(serialize_object(u8"test_data", "test"), u8"test_data");
} }
TEST_F(ParserTest, deserialize) { TEST_F(ParserTest, deserialize) {
EXPECT_EQ(deserialize_object("test_data"sv, "test").get<std::u8string>(), u8"test_data"); EXPECT_EQ(deserialize_object(u8"test_data"sv, "test").get<std::u8string>(), u8"test_data");
} }

54
src/test/parsers/json.cpp

@ -25,26 +25,26 @@ using namespace std::literals;
TEST(JsonParser, serialize_null) { TEST(JsonParser, serialize_null) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.serialize({}), "null"); EXPECT_EQ(parser.serialize({}), u8"null");
} }
TEST(JsonParser, serialize_boolean) { TEST(JsonParser, serialize_boolean) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.serialize(true), "true"); EXPECT_EQ(parser.serialize(true), u8"true");
EXPECT_EQ(parser.serialize(false), "false"); EXPECT_EQ(parser.serialize(false), u8"false");
} }
TEST(JsonParser, serialize_integer) { TEST(JsonParser, serialize_integer) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.serialize(1234), "1234"); EXPECT_EQ(parser.serialize(1234), u8"1234");
} }
TEST(JsonParser, serialize_decimal) { TEST(JsonParser, serialize_decimal) {
json_parser parser; json_parser parser;
EXPECT_DOUBLE_EQ(std::atof(parser.serialize(12.34).c_str()), 12.34); EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize(12.34).c_str())), 12.34);
EXPECT_DOUBLE_EQ(std::atof(parser.serialize(1234.0).c_str()), 1234.0); EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast<const char*>(parser.serialize(1234.0).c_str())), 1234.0);
} }
// necessary due to some sort of bug with EXPECT_EQ on MSVC // necessary due to some sort of bug with EXPECT_EQ on MSVC
@ -56,10 +56,10 @@ void expect_eq(LeftT in_left, RightT in_right) {
TEST(JsonParser, serialize_string) { TEST(JsonParser, serialize_string) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.serialize(u8"text"), R"json("text")json"); EXPECT_EQ(parser.serialize(u8"text"), u8R"json("text")json");
expect_eq(parser.serialize(u8"\"text\""), R"json("\"text\"")json"); expect_eq(parser.serialize(u8"\"text\""), u8R"json("\"text\"")json");
expect_eq(parser.serialize(u8"\"te\x01xt\""), R"json("\"te\u0001xt\"")json"); expect_eq(parser.serialize(u8"\"te\x01xt\""), u8R"json("\"te\u0001xt\"")json");
expect_eq(parser.serialize(u8"\"te\x10xt\""), R"json("\"te\u0010xt\"")json"); expect_eq(parser.serialize(u8"\"te\x10xt\""), u8R"json("\"te\u0010xt\"")json");
} }
TEST(JsonParser, serialize_array) { TEST(JsonParser, serialize_array) {
@ -72,7 +72,7 @@ TEST(JsonParser, serialize_array) {
}; };
EXPECT_EQ(parser.serialize(array), EXPECT_EQ(parser.serialize(array),
R"json([true,1234,"text",null])json"); u8R"json([true,1234,"text",null])json");
} }
TEST(JsonParser, serialize_map) { TEST(JsonParser, serialize_map) {
@ -85,43 +85,43 @@ TEST(JsonParser, serialize_map) {
obj[u8"some_null"]; obj[u8"some_null"];
EXPECT_EQ(parser.serialize(obj), EXPECT_EQ(parser.serialize(obj),
R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json"); u8R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json");
} }
TEST(JsonParser, deserialize_null) { TEST(JsonParser, deserialize_null) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.deserialize("null"sv), object{}); EXPECT_EQ(parser.deserialize(u8"null"sv), object{});
} }
TEST(JsonParser, deserialize_boolean) { TEST(JsonParser, deserialize_boolean) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.deserialize("true"sv), true); EXPECT_EQ(parser.deserialize(u8"true"sv), true);
EXPECT_EQ(parser.deserialize("false"sv), false); EXPECT_EQ(parser.deserialize(u8"false"sv), false);
} }
TEST(JsonParser, deserialize_integer) { TEST(JsonParser, deserialize_integer) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.deserialize("1234"sv), 1234); EXPECT_EQ(parser.deserialize(u8"1234"sv), 1234);
EXPECT_EQ(parser.deserialize("-1234"sv), -1234); EXPECT_EQ(parser.deserialize(u8"-1234"sv), -1234);
} }
TEST(JsonParser, deserialize_decimal) { TEST(JsonParser, deserialize_decimal) {
json_parser parser; json_parser parser;
EXPECT_DOUBLE_EQ(parser.deserialize("12.34"sv).get<double>(), 12.34); EXPECT_DOUBLE_EQ(parser.deserialize(u8"12.34"sv).get<double>(), 12.34);
EXPECT_DOUBLE_EQ(parser.deserialize("1234."sv).get<double>(), 1234.0); EXPECT_DOUBLE_EQ(parser.deserialize(u8"1234."sv).get<double>(), 1234.0);
EXPECT_DOUBLE_EQ(parser.deserialize("0.1234"sv).get<double>(), 0.1234); EXPECT_DOUBLE_EQ(parser.deserialize(u8"0.1234"sv).get<double>(), 0.1234);
EXPECT_THROW(parser.deserialize(".1234"sv), std::invalid_argument); EXPECT_THROW(parser.deserialize(u8".1234"sv), std::invalid_argument);
EXPECT_DOUBLE_EQ(parser.deserialize("-12.34"sv).get<double>(), -12.34); EXPECT_DOUBLE_EQ(parser.deserialize(u8"-12.34"sv).get<double>(), -12.34);
} }
TEST(JsonParser, deserialize_string) { TEST(JsonParser, deserialize_string) {
json_parser parser; json_parser parser;
EXPECT_EQ(parser.deserialize(R"json("text")json"sv), u8"text"); EXPECT_EQ(parser.deserialize(u8R"json("text")json"sv), u8"text");
object obj; object obj;
std::u8string_view u8text = u8R"json("text")json"sv; std::u8string_view u8text = u8R"json("text")json"sv;
@ -143,7 +143,7 @@ TEST(JsonParser, deserialize_string) {
TEST(JsonParser, deserialize_array) { TEST(JsonParser, deserialize_array) {
json_parser parser; json_parser parser;
constexpr const char* json_data = R"json([ constexpr const char8_t* json_data = u8R"json([
true, true,
false, false,
1234, 1234,
@ -165,7 +165,7 @@ TEST(JsonParser, deserialize_array) {
TEST(JsonParser, deserialize_array_nested) { TEST(JsonParser, deserialize_array_nested) {
json_parser parser; json_parser parser;
constexpr const char* json_data = R"json([ constexpr const char8_t* json_data = u8R"json([
true, true,
false, false,
1234 1234
@ -211,7 +211,7 @@ TEST(JsonParser, deserialize_array_nested) {
TEST(JsonParser, deserialize_map) { TEST(JsonParser, deserialize_map) {
json_parser parser; json_parser parser;
constexpr const char* json_data = R"json({ constexpr const char8_t* json_data = u8R"json({
"some_true":true, "some_true":true,
"some_false" : false, "some_false" : false,
"some_int": 1234, "some_int": 1234,
@ -233,7 +233,7 @@ TEST(JsonParser, deserialize_map) {
TEST(JsonParser, deserialize_map_nested) { TEST(JsonParser, deserialize_map_nested) {
json_parser parser; json_parser parser;
constexpr const char* json_data = R"json({ constexpr const char8_t* json_data = u8R"json({
"some_text" : "text", "some_text" : "text",
"some_object" : { "some_object" : {
"some_null_object": {} "some_null_object": {}

14
src/test/unicode.cpp

@ -128,12 +128,18 @@ TEST(UTF32Test, decode_codepoint) {
DECODE_CODEPOINT_TEST(U"\U0001F604"sv, U'\U0001F604', 1U); DECODE_CODEPOINT_TEST(U"\U0001F604"sv, U'\U0001F604', 1U);
} }
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>; #ifdef JESSILIB_CHAR_AS_UTF8
using char_type_combos = ::testing::Types< using char_type_combos = ::testing::Types<
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>, std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>,
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>, std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>, std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>; std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#else // JESSILIB_CHAR_AS_UTF8
using char_type_combos = ::testing::Types<
std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#endif // JESSILIB_CHAR_AS_UTF8
template<typename T> template<typename T>
class UnicodeFullTest : public ::testing::Test { class UnicodeFullTest : public ::testing::Test {
@ -157,9 +163,9 @@ TYPED_TEST(UnicodeFullTest, string_cast) {
} }
TEST(UTF8Test, string_view_cast) { TEST(UTF8Test, string_view_cast) {
auto abcd_str = jessilib::string_cast<char8_t>(U"ABCD"); std::string_view abcd_str = "ABCD";
auto view = string_view_cast<char>(abcd_str); auto view = string_view_cast<char8_t>(abcd_str);
EXPECT_TRUE(equals(view, abcd_str)); EXPECT_TRUE(equals(view, u8"ABCD"sv));
} }
/** equals */ /** equals */

23
src/test/unicode_sequence.cpp

@ -34,13 +34,20 @@ ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("test"s) == "test"s);
ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("\\r\\n"s) == "\r\n"s); ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("\\r\\n"s) == "\r\n"s);
#endif // __cpp_lib_constexpr_string #endif // __cpp_lib_constexpr_string
#ifdef JESSILIB_CHAR_AS_UTF8
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>; using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
using utf8_char_types = ::testing::Types<char, char8_t>;
using char_type_combos = ::testing::Types< using char_type_combos = ::testing::Types<
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>, std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>,
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>, std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>, std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>; std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#else // JESSILIB_CHAR_AS_UTF8
using char_types = ::testing::Types<char8_t, char16_t, char32_t>;
using char_type_combos = ::testing::Types<
std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
#endif // JESSILIB_CHAR_AS_UTF8
template<typename T> template<typename T>
class UnicodeSequenceTest : public ::testing::Test { class UnicodeSequenceTest : public ::testing::Test {
@ -51,8 +58,8 @@ TYPED_TEST_SUITE(UnicodeSequenceTest, char_types);
constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing
#define TEST_CPP_SEQUENCE(expr) \ #define TEST_CPP_SEQUENCE(expr) \
{ auto parsed_string = jessilib::string_cast<TypeParam>(#expr); \ { auto parsed_string = jessilib::string_cast<TypeParam>(reinterpret_cast<const char8_t*>(#expr)); \
auto normal_string = jessilib::string_cast<TypeParam>(expr); \ auto normal_string = jessilib::string_cast<TypeParam>(reinterpret_cast<const char8_t*>(expr)); \
parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \ parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \
jessilib::apply_cpp_escape_sequences(parsed_string); \ jessilib::apply_cpp_escape_sequences(parsed_string); \
EXPECT_EQ(parsed_string, normal_string); } EXPECT_EQ(parsed_string, normal_string); }
@ -130,7 +137,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) { for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string; std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 2; ++min_length) { for (size_t min_length = 0; min_length <= 2; ++min_length) {
parsed_string = jessilib::string_cast<TypeParam>("\\x"); parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string); jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -145,7 +152,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
std::basic_string<TypeParam> parsed_string; std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 4; ++min_length) { for (size_t min_length = 0; min_length <= 4; ++min_length) {
// "\x0" -> "\xffff" // "\x0" -> "\xffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x"); parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string); jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -161,7 +168,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
std::basic_string<TypeParam> parsed_string; std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 8; ++min_length) { for (size_t min_length = 0; min_length <= 8; ++min_length) {
// "\x0" -> "\x0010ffff" // "\x0" -> "\x0010ffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x"); parsed_string = jessilib::string_cast<TypeParam>(u8"\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string); jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
@ -175,7 +182,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
TYPED_TEST(UnicodeSequenceTest, cpp_u16) { TYPED_TEST(UnicodeSequenceTest, cpp_u16) {
// "u0000" -> "uffff" with & without leading zeroes // "u0000" -> "uffff" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\u"); std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>(u8"\\u");
parsed_string += make_hex_string<TypeParam>(codepoint, 4); parsed_string += make_hex_string<TypeParam>(codepoint, 4);
jessilib::apply_cpp_escape_sequences(parsed_string); jessilib::apply_cpp_escape_sequences(parsed_string);
@ -188,7 +195,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_u16) {
TYPED_TEST(UnicodeSequenceTest, cpp_u32) { TYPED_TEST(UnicodeSequenceTest, cpp_u32) {
// "U00000000" -> "U000100FF" with & without leading zeroes // "U00000000" -> "U000100FF" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= MAX_LOOP_CODEPOINT; ++codepoint) { for (unsigned int codepoint = 0; codepoint <= MAX_LOOP_CODEPOINT; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\U"); std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>(u8"\\U");
parsed_string += make_hex_string<TypeParam>(codepoint, 8); parsed_string += make_hex_string<TypeParam>(codepoint, 8);
jessilib::apply_cpp_escape_sequences(parsed_string); jessilib::apply_cpp_escape_sequences(parsed_string);

Loading…
Cancel
Save