From 3a12fc56f8beb8e1aa3bccb3b1cd9fcb7aaa761c Mon Sep 17 00:00:00 2001 From: Jessica James Date: Sun, 12 Dec 2021 18:45:38 -0600 Subject: [PATCH] Initial pass for removing 'char' support from unicode methods, expected to be replaced by another mechanism --- src/common/parser/parser.cpp | 8 ++- src/common/parsers/json.cpp | 67 ++++++++++++-------- src/common/serialize.cpp | 14 ++--- src/include/jessilib/http_query.hpp | 2 +- src/include/jessilib/parser.hpp | 4 +- src/include/jessilib/parsers/json.hpp | 25 +++++--- src/include/jessilib/serialize.hpp | 10 +-- src/include/jessilib/unicode.hpp | 32 ++++++---- src/include/jessilib/unicode_base.hpp | 14 ++++- src/include/jessilib/unicode_compare.hpp | 8 +++ src/include/jessilib/unicode_syntax.hpp | 9 ++- src/test/http_query.cpp | 80 ++++++++++++------------ src/test/parser.cpp | 36 +++++------ src/test/parsers/json.cpp | 54 ++++++++-------- src/test/unicode.cpp | 14 +++-- src/test/unicode_sequence.cpp | 23 ++++--- 16 files changed, 229 insertions(+), 171 deletions(-) diff --git a/src/common/parser/parser.cpp b/src/common/parser/parser.cpp index d364052..39ab6e8 100644 --- a/src/common/parser/parser.cpp +++ b/src/common/parser/parser.cpp @@ -17,12 +17,13 @@ */ #include "parser.hpp" +#include "unicode.hpp" #include namespace jessilib { object parser::deserialize(std::istream& in_stream) { - std::vector data; + std::vector data; // Read entire stream into data char buffer[1024]; @@ -32,11 +33,12 @@ object parser::deserialize(std::istream& in_stream) { } // Pass data to deserialize - return deserialize(std::string_view{ &data.front(), data.size() }); + return deserialize(std::u8string_view{ &data.front(), data.size() }); } void parser::serialize(std::ostream& in_stream, const object& in_object) { - in_stream << serialize(in_object); + // TODO: replace this method + in_stream << jessilib::string_view_cast(serialize(in_object)); } } // namespace jessilib diff --git a/src/common/parsers/json.cpp b/src/common/parsers/json.cpp index d27503d..4d616ee 100644 --- a/src/common/parsers/json.cpp +++ b/src/common/parsers/json.cpp @@ -23,8 +23,9 @@ using namespace std::literals; namespace jessilib { -std::string make_json_string(std::u8string_view in_string) { - std::string result; +template +std::basic_string make_json_string(std::u8string_view in_string) { + std::basic_string result; result.reserve(in_string.size() + 2); result = '\"'; @@ -39,23 +40,34 @@ std::string make_json_string(std::u8string_view in_string) { result += '\"'; } else if (decode.codepoint < 0x20) { // control characters - result += "\\u0000"sv; + result += '\\'; + result += 'u'; + result += '0'; + result += '0'; // overwrite last 2 zeroes with correct hexadecimal sequence - char* data_end = result.data() + result.size(); - char* data = data_end - 2; // Will only ever use 2 chars + char data[2]; // Will only ever use 2 chars + char* data_end = data + sizeof(data); auto to_chars_result = std::to_chars(data, data_end, static_cast(decode.codepoint), 16); - if (to_chars_result.ec == std::errc{} && to_chars_result.ptr != data_end) { - // Only 1 byte written; shift it over - *to_chars_result.ptr = *(to_chars_result.ptr - 1); - - // And fill in the zeroes - *(to_chars_result.ptr - 1) = '0'; + if (to_chars_result.ptr == data) { + // No bytes written + result += '0'; + result += '0'; + } + else if (to_chars_result.ptr != data_end) { + // 1 byte written + result += '0'; + result += data[0]; + } + else { + // 2 bytes written + result += data[0]; + result += data[1]; } } else { // Valid UTF-8 sequence; copy it over - result.append(reinterpret_cast(in_string.data()), decode.units); + result.append(in_string.data(), decode.units); } in_string.remove_prefix(decode.units); @@ -67,41 +79,42 @@ std::string make_json_string(std::u8string_view in_string) { -object json_parser::deserialize(std::string_view in_data) { +object json_parser::deserialize(std::u8string_view in_data) { object result; - deserialize_json(result, in_data); + std::u8string_view data_view = jessilib::string_view_cast(in_data); + deserialize_json(result, data_view); return result; } -std::string json_parser::serialize(const object& in_object) { +std::u8string json_parser::serialize(const object& in_object) { static const object::array_type s_null_array; static const object::map_type s_null_map; switch (in_object.type()) { case object::type::null: - return "null"s; + return u8"null"s; case object::type::boolean: if (in_object.get()) { - return "true"s; + return u8"true"s; } - return "false"s; + return u8"false"s; case object::type::integer: - return std::to_string(in_object.get()); + return static_cast(jessilib::string_view_cast(std::to_string(in_object.get()))); case object::type::decimal: - return std::to_string(in_object.get()); + return static_cast(jessilib::string_view_cast(std::to_string(in_object.get()))); case object::type::text: - return make_json_string(in_object.get()); + return make_json_string(in_object.get()); case object::type::array: { if (in_object.size() == 0) { - return "[]"s; + return u8"[]"s; } - std::string result; + std::u8string result; result = '['; // Serialize all objects in array @@ -117,16 +130,16 @@ std::string json_parser::serialize(const object& in_object) { case object::type::map: { if (in_object.size() == 0) { - return "{}"s; + return u8"{}"s; } - std::string result; + std::u8string result; result = '{'; // Serialize all objects in map for (auto& item : in_object.get(s_null_map)) { - result += make_json_string(item.first); - result += ":"sv; + result += make_json_string(item.first); + result += ':'; result += json_parser::serialize(item.second); result += ','; } diff --git a/src/common/serialize.cpp b/src/common/serialize.cpp index 5839122..1254347 100644 --- a/src/common/serialize.cpp +++ b/src/common/serialize.cpp @@ -41,15 +41,15 @@ std::shared_ptr get_parser(const std::string& in_format) { } /** Deserialization */ -object deserialize_object(const std::string& in_data, const std::string& in_format) { - return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format); +object deserialize_object(const std::u8string& in_data, const std::string& in_format) { + return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format); } -object deserialize_object(const std::vector& in_data, const std::string& in_format) { - return deserialize_object(std::string_view{ &in_data.front(), in_data.size() }, in_format); +object deserialize_object(const std::vector& in_data, const std::string& in_format) { + return deserialize_object(std::u8string_view{ &in_data.front(), in_data.size() }, in_format); } -object deserialize_object(std::string_view in_data, const std::string& in_format) { +object deserialize_object(std::u8string_view in_data, const std::string& in_format) { return get_parser(in_format)->deserialize(in_data); } @@ -58,7 +58,7 @@ object deserialize_object(std::istream& in_stream, const std::string& in_format) } /** Serialization */ -std::string serialize_object(const object& in_object, const std::string& in_format) { +std::u8string serialize_object(const object& in_object, const std::string& in_format) { return get_parser(in_format)->serialize(in_object); } @@ -66,4 +66,4 @@ void serialize_object(std::ostream& in_stream, const object& in_object, const st get_parser(in_format)->serialize(in_stream, in_object); } -} // namespace jessilib \ No newline at end of file +} // namespace jessilib diff --git a/src/include/jessilib/http_query.hpp b/src/include/jessilib/http_query.hpp index d733fab..923f3c9 100644 --- a/src/include/jessilib/http_query.hpp +++ b/src/include/jessilib/http_query.hpp @@ -166,7 +166,7 @@ constexpr bool deserialize_html_form(ContainerT& out_container, std::basic_strin HTMLFormContext context{ out_container, inout_string.data() }; constexpr auto& html_form_tree = html_form_root_tree; - static_assert(is_sorted(), "Tree must be pre-sorted"); + static_assert(is_sorted(), "Tree must be pre-sorted"); std::basic_string_view read_view{ inout_string }; if (apply_syntax_tree diff --git a/src/include/jessilib/parser.hpp b/src/include/jessilib/parser.hpp index eb56c46..cfd529e 100644 --- a/src/include/jessilib/parser.hpp +++ b/src/include/jessilib/parser.hpp @@ -38,9 +38,9 @@ public: * @return A valid (possibly null) object */ virtual object deserialize(std::istream& in_stream); - virtual object deserialize(std::string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings + virtual object deserialize(std::u8string_view in_data) = 0; // TODO: serialize from arbitrary unicode strings virtual void serialize(std::ostream& in_stream, const object& in_object); - virtual std::string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings + virtual std::u8string serialize(const object& in_object) = 0; // TODO: serialize to arbitrary unicode strings }; // parser template diff --git a/src/include/jessilib/parsers/json.hpp b/src/include/jessilib/parsers/json.hpp index 3202f81..0ede3a2 100644 --- a/src/include/jessilib/parsers/json.hpp +++ b/src/include/jessilib/parsers/json.hpp @@ -29,8 +29,8 @@ namespace jessilib { class json_parser : public parser { public: /** deserialize/serialize overrides */ - virtual object deserialize(std::string_view in_data) override; - virtual std::string serialize(const object& in_object) override; + virtual object deserialize(std::u8string_view in_data) override; + virtual std::u8string serialize(const object& in_object) override; }; /** @@ -101,7 +101,9 @@ constexpr syntax_tree_member make_keyword_value_pair() { // Unexpected character; throw if appropriate if constexpr (ContextT::use_exceptions) { using namespace std::literals; - throw std::invalid_argument{ jessilib::join("Invalid JSON data; unexpected token: '"sv, inout_read_view, "' when parsing null"sv) }; + throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid JSON data; unexpected token: '"sv, + inout_read_view, + u8"' when parsing null"sv) }; } return std::numeric_limits::max(); @@ -164,7 +166,8 @@ size_t string_start_action(ContextT& inout_context, std::basic_string_view // Invalid JSON! if constexpr (ContextT::use_exceptions) { using namespace std::literals; - throw std::invalid_argument{ jessilib::join_mbstring("Invalid JSON data: expected ',' or ']', instead encountered: "sv, inout_read_view) }; + throw std::invalid_argument{ jessilib::join_mbstring( + u8"Invalid JSON data: expected ',' or ']', instead encountered: "sv, + inout_read_view) }; } return std::numeric_limits::max(); @@ -285,9 +290,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view::max(); @@ -312,9 +318,10 @@ size_t make_map_start_action(ContextT& inout_context, std::basic_string_view& in_data, const std::string& in_format); -object deserialize_object(std::string_view in_data, const std::string& in_format); +object deserialize_object(const std::u8string& in_data, const std::string& in_format); +object deserialize_object(const std::vector& in_data, const std::string& in_format); +object deserialize_object(std::u8string_view in_data, const std::string& in_format); object deserialize_object(std::istream& in_stream, const std::string& in_format); /** Serialization */ -std::string serialize_object(const object& in_object, const std::string& in_format); +std::u8string serialize_object(const object& in_object, const std::string& in_format); void serialize_object(std::ostream& in_stream, const object& in_object, const std::string& in_format); -} // namespace jessilib \ No newline at end of file +} // namespace jessilib diff --git a/src/include/jessilib/unicode.hpp b/src/include/jessilib/unicode.hpp index 14ca215..6b74baa 100644 --- a/src/include/jessilib/unicode.hpp +++ b/src/include/jessilib/unicode.hpp @@ -112,24 +112,30 @@ bool is_valid(const InT& in_string) { template std::basic_string_view string_view_cast(const InT& in_string) { using InCharT = typename impl_unicode::is_string::type; - size_t in_string_bytes = in_string.size() * sizeof(InCharT); - if constexpr (sizeof(OutCharT) > sizeof(InCharT)) { - // The output type is larger than the input type; verify no partial codepoints - if (in_string_bytes % sizeof(OutCharT) != 0) { - // This cannot be used to produce a valid result - return {}; - } + + if constexpr (sizeof(InCharT) == sizeof(OutCharT)) { + return { reinterpret_cast(in_string.data()), in_string.size() }; } + else { + size_t in_string_bytes = in_string.size() * sizeof(InCharT); + if constexpr (sizeof(OutCharT) > sizeof(InCharT)) { + // The output type is larger than the input type; verify no partial codepoints + if (in_string_bytes % sizeof(OutCharT) != 0) { + // This cannot be used to produce a valid result + return {}; + } + } - size_t out_string_units = in_string_bytes / sizeof(OutCharT); - const OutCharT* data_begin = reinterpret_cast(in_string.data()); - return { data_begin, out_string_units }; + size_t out_string_units = in_string_bytes / sizeof(OutCharT); + const OutCharT* data_begin = reinterpret_cast(in_string.data()); + return { data_begin, out_string_units }; + } } template std::basic_string string_cast(const InT& in_string) { static_assert(impl_unicode::is_string::value == true); - using InCharT = typename impl_unicode::is_string::type; + using InCharT = std::remove_cvref_t::type>; using InEquivalentT = typename unicode_traits::equivalent_type; using InViewT = std::basic_string_view; using OutT = std::basic_string; @@ -392,7 +398,7 @@ size_t findi(std::basic_string_view in_string, std::basic_string_view< ADAPT_BASIC_STRING(findi) -using find_if_predicate_type = bool(*)(char32_t, char*, size_t); +/*using find_if_predicate_type = bool(*)(char32_t, char*, size_t); inline void find_if(std::basic_string& in_string, find_if_predicate_type in_predicate) { using CharT = char; CharT* ptr = in_string.data(); @@ -422,7 +428,7 @@ inline void find_if(std::basic_string_view& in_string, find_if_view_predic in_string_view.remove_prefix(decode.units); ptr += decode.units; } -} +}*/ namespace impl_join { diff --git a/src/include/jessilib/unicode_base.hpp b/src/include/jessilib/unicode_base.hpp index d3c08ab..350d2ed 100644 --- a/src/include/jessilib/unicode_base.hpp +++ b/src/include/jessilib/unicode_base.hpp @@ -70,7 +70,7 @@ constexpr size_t encode_codepoint(CharT* out_buffer, char32_t in_codepoint); std::u8string encode_codepoint_u8(char32_t in_codepoint); std::u16string encode_codepoint_u16(char32_t in_codepoint); std::u32string encode_codepoint_u32(char32_t in_codepoint); -std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32 +std::wstring encode_codepoint_w(char32_t in_codepoint); // ASSUMES UTF-16 OR UTF-32W /** decode_codepoint */ @@ -130,15 +130,21 @@ constexpr decode_result decode_surrogate_pair(char16_t in_high_surrogate, char16 template struct unicode_traits : std::false_type {}; +#ifdef JESSILIB_CHAR_AS_UTF8 template<> struct unicode_traits : std::true_type { using equivalent_type = char8_t; // DEPRECATE static constexpr size_t max_units_per_codepoint = 4; }; +#endif // JESSILIB_CHAR_AS_UTF8 template<> struct unicode_traits : std::true_type { - using equivalent_type = char; // DEPRECATE +#ifdef JESSILIB_CHAR_AS_UTF8 + using equivalent_type = char; +#else // JESSILIB_CHAR_AS_UTF8 + using equivalent_type = char8_t; +#endif // JESSILIB_CHAR_AS_UTF8 static constexpr size_t max_units_per_codepoint = 4; }; @@ -338,9 +344,11 @@ constexpr size_t encode_codepoint_utf(T& out_destination, char32_t in_codepoint) else if constexpr (std::is_same_v) { return encode_codepoint_w(out_destination, in_codepoint); } +#ifdef JESSILIB_CHAR_AS_UTF8 else if constexpr (std::is_same_v) { return encode_codepoint_utf8(out_destination, in_codepoint); } +#endif // JESSILIB_CHAR_AS_UTF8 } template @@ -476,9 +484,11 @@ constexpr decode_result decode_codepoint(std::basic_string_view in_string return decode_codepoint_utf32(in_string); } } +#ifdef JESSILIB_CHAR_AS_UTF8 else if constexpr (std::is_same_v) { return decode_codepoint_utf8(in_string); } +#endif // JESSILIB_CHAR_AS_UTF8 } template diff --git a/src/include/jessilib/unicode_compare.hpp b/src/include/jessilib/unicode_compare.hpp index 97a4c31..1ae511d 100644 --- a/src/include/jessilib/unicode_compare.hpp +++ b/src/include/jessilib/unicode_compare.hpp @@ -285,6 +285,7 @@ struct text_hash { return hash; } +#ifdef JESSILIB_CHAR_AS_UTF8 auto operator()(const std::basic_string& in_key) const noexcept { // ASSUMES UTF-8 return hash(in_key.data(), in_key.data() + in_key.size()); } @@ -292,6 +293,7 @@ struct text_hash { auto operator()(std::basic_string_view in_key) const noexcept { return hash(in_key.data(), in_key.data() + in_key.size()); } +#endif // JESSILIB_CHAR_AS_UTF8 auto operator()(const std::basic_string& in_key) const noexcept { // ASSUMES UTF-8 return hash(in_key.data(), in_key.data() + in_key.size()); @@ -321,10 +323,12 @@ struct text_hash { struct text_equal { using is_transparent = std::true_type; +#ifdef JESSILIB_CHAR_AS_UTF8 template bool operator()(std::basic_string_view in_lhs, std::basic_string_view in_rhs) const noexcept { return equals(in_lhs, in_rhs); } +#endif // JESSILIB_CHAR_AS_UTF8 template bool operator()(std::basic_string_view in_lhs, const std::basic_string& in_rhs) const noexcept { @@ -370,6 +374,7 @@ struct text_hashi { return hash; } +#ifdef JESSILIB_CHAR_AS_UTF8 auto operator()(const std::basic_string& in_key) const noexcept { // ASSUMES UTF-8 return hash(in_key.data(), in_key.data() + in_key.size()); } @@ -377,6 +382,7 @@ struct text_hashi { auto operator()(std::basic_string_view in_key) const noexcept { return hash(in_key.data(), in_key.data() + in_key.size()); } +#endif // JESSILIB_CHAR_AS_UTF8 auto operator()(const std::basic_string& in_key) const noexcept { // ASSUMES UTF-8 return hash(in_key.data(), in_key.data() + in_key.size()); @@ -406,10 +412,12 @@ struct text_hashi { struct text_equali { using is_transparent = std::true_type; +#ifdef JESSILIB_CHAR_AS_UTF8 template bool operator()(std::basic_string_view in_lhs, std::basic_string_view in_rhs) const noexcept { return equalsi(in_lhs, in_rhs); } +#endif // JESSILIB_CHAR_AS_UTF8 template bool operator()(std::basic_string_view in_lhs, const std::basic_string& in_rhs) const noexcept { diff --git a/src/include/jessilib/unicode_syntax.hpp b/src/include/jessilib/unicode_syntax.hpp index 3571215..ad84760 100644 --- a/src/include/jessilib/unicode_syntax.hpp +++ b/src/include/jessilib/unicode_syntax.hpp @@ -25,7 +25,7 @@ #pragma once -#include "unicode_base.hpp" +#include "unicode.hpp" namespace jessilib { @@ -76,10 +76,9 @@ template size_t fail_action(decode_result, ContextT&, std::basic_string_view& in_read_view) { using namespace std::literals; if constexpr (UseExceptionsV) { - std::string exception = "Invalid parse data; unexpected token: '"s; - jessilib::encode_codepoint(exception, in_read_view.front()); - exception += "' when parsing data"; - throw std::invalid_argument{ exception }; + throw std::invalid_argument{ jessilib::join_mbstring(u8"Invalid parse data; unexpected token: '"sv, + jessilib::decode_codepoint(in_read_view).codepoint, + u8"' when parsing data"sv) }; } return std::numeric_limits::max(); } diff --git a/src/test/http_query.cpp b/src/test/http_query.cpp index f885a83..7337c5d 100644 --- a/src/test/http_query.cpp +++ b/src/test/http_query.cpp @@ -22,10 +22,10 @@ using namespace std::literals; -// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string +// Compile-time tests for constexpr on compilers which support C++20 constexpr std::u8string #if defined(__cpp_lib_constexpr_string) && (__GNUC__ >= 12 || _MSC_VER >= 1929) -constexpr std::string query_constexpr(std::string_view in_expression) { - std::string result{ static_cast(in_expression) }; +constexpr std::u8string query_constexpr(std::u8string_view in_expression) { + std::u8string result{ static_cast(in_expression) }; jessilib::deserialize_http_query(result); return result; } @@ -34,8 +34,8 @@ ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first+second"s) == "first seco ASSERT_COMPILES_CONSTEXPR(return query_constexpr("first%20second"s) == "first second"s); #endif // __cpp_lib_constexpr_string -using char_types = ::testing::Types; -using utf8_char_types = ::testing::Types; +using char_types = ::testing::Types; +using utf8_char_types = ::testing::Types; template class QuerySequenceTest : public ::testing::Test { @@ -162,77 +162,77 @@ TYPED_TEST(QuerySequenceTest, invalids_2len_trailing) { } TEST(HtmlFormParser, empty) { - std::vector> parsed_result; - std::string query_text; + std::vector> parsed_result; + std::u8string query_text; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); EXPECT_TRUE(query_text.empty()); EXPECT_TRUE(parsed_result.empty()); } TEST(HtmlFormParser, one_key) { - std::vector> parsed_result; - std::string query_text = "key"; + std::vector> parsed_result; + std::u8string query_text = u8"key"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_EQ(query_text, "key"); + EXPECT_EQ(query_text, u8"key"); EXPECT_EQ(parsed_result.size(), 1); EXPECT_EQ(parsed_result[0].first, query_text); EXPECT_TRUE(parsed_result[0].second.empty()); } TEST(HtmlFormParser, one_key_and_value) { - std::vector> parsed_result; - std::string query_text = "key=value"; + std::vector> parsed_result; + std::u8string query_text = u8"key=value"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_TRUE(query_text.starts_with("keyvalue")); + EXPECT_TRUE(query_text.starts_with(u8"keyvalue")); EXPECT_EQ(parsed_result.size(), 1); - EXPECT_EQ(parsed_result[0].first, "key"); - EXPECT_EQ(parsed_result[0].second, "value"); + EXPECT_EQ(parsed_result[0].first, u8"key"); + EXPECT_EQ(parsed_result[0].second, u8"value"); } TEST(HtmlFormParser, one_key_and_value_trailing) { - std::vector> parsed_result; - std::string query_text = "key=value&"; + std::vector> parsed_result; + std::u8string query_text = u8"key=value&"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_TRUE(query_text.starts_with("keyvalue")); + EXPECT_TRUE(query_text.starts_with(u8"keyvalue")); EXPECT_EQ(parsed_result.size(), 2); - EXPECT_EQ(parsed_result[0].first, "key"); - EXPECT_EQ(parsed_result[0].second, "value"); + EXPECT_EQ(parsed_result[0].first, u8"key"); + EXPECT_EQ(parsed_result[0].second, u8"value"); EXPECT_TRUE(parsed_result[1].first.empty()); EXPECT_TRUE(parsed_result[1].second.empty()); } TEST(HtmlFormParser, two_key_one_value) { - std::vector> parsed_result; - std::string query_text = "key=value&second_key"; + std::vector> parsed_result; + std::u8string query_text = u8"key=value&second_key"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_TRUE(query_text.starts_with("keyvaluesecond_key")); + EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_key")); EXPECT_EQ(parsed_result.size(), 2); - EXPECT_EQ(parsed_result[0].first, "key"); - EXPECT_EQ(parsed_result[0].second, "value"); - EXPECT_EQ(parsed_result[1].first, "second_key"); + EXPECT_EQ(parsed_result[0].first, u8"key"); + EXPECT_EQ(parsed_result[0].second, u8"value"); + EXPECT_EQ(parsed_result[1].first, u8"second_key"); EXPECT_TRUE(parsed_result[1].second.empty()); } TEST(HtmlFormParser, two_key_two_value) { - std::vector> parsed_result; - std::string query_text = "key=value&second_key=second=value"; + std::vector> parsed_result; + std::u8string query_text = u8"key=value&second_key=second=value"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_TRUE(query_text.starts_with("keyvaluesecond_keysecond=value")); + EXPECT_TRUE(query_text.starts_with(u8"keyvaluesecond_keysecond=value")); EXPECT_EQ(parsed_result.size(), 2); - EXPECT_EQ(parsed_result[0].first, "key"); - EXPECT_EQ(parsed_result[0].second, "value"); - EXPECT_EQ(parsed_result[1].first, "second_key"); - EXPECT_EQ(parsed_result[1].second, "second=value"); + EXPECT_EQ(parsed_result[0].first, u8"key"); + EXPECT_EQ(parsed_result[0].second, u8"value"); + EXPECT_EQ(parsed_result[1].first, u8"second_key"); + EXPECT_EQ(parsed_result[1].second, u8"second=value"); } TEST(HtmlFormParser, some_sequences) { - std::vector> parsed_result; - std::string query_text = "k+y=va+u%20&%73econd%5Fke%79=second_valu%65"; + std::vector> parsed_result; + std::u8string query_text = u8"k+y=va+u%20&%73econd%5Fke%79=second_valu%65"; EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); - EXPECT_TRUE(query_text.starts_with("k yva u second_keysecond_value")); + EXPECT_TRUE(query_text.starts_with(u8"k yva u second_keysecond_value")); EXPECT_EQ(parsed_result.size(), 2); - EXPECT_EQ(parsed_result[0].first, "k y"); - EXPECT_EQ(parsed_result[0].second, "va u "); - EXPECT_EQ(parsed_result[1].first, "second_key"); - EXPECT_EQ(parsed_result[1].second, "second_value"); + EXPECT_EQ(parsed_result[0].first, u8"k y"); + EXPECT_EQ(parsed_result[0].second, u8"va u "); + EXPECT_EQ(parsed_result[1].first, u8"second_key"); + EXPECT_EQ(parsed_result[1].second, u8"second_value"); } diff --git a/src/test/parser.cpp b/src/test/parser.cpp index 8427ee4..ee20f8b 100644 --- a/src/test/parser.cpp +++ b/src/test/parser.cpp @@ -30,11 +30,11 @@ using namespace std::literals; class test_parser : public parser { public: /** deserialize/serialize overrides */ - virtual object deserialize(std::string_view in_data) override { + virtual object deserialize(std::u8string_view in_data) override { return deserialize_impl(in_data); } - virtual std::string serialize(const object& in_object) override { + virtual std::u8string serialize(const object& in_object) override { return serialize_impl(in_object); } @@ -45,26 +45,26 @@ public: } /** default serialize/deserialize implementations */ - static std::string serialize_default(const object& in_object) { + static std::u8string serialize_default(const object& in_object) { if (in_object.has()) { - return string_cast(in_object.get()); + return in_object.get(); } - return static_cast(DEFAULT_SERIALIZE_RESULT); + return static_cast(DEFAULT_SERIALIZE_RESULT); } - static object deserialize_default(std::string_view in_data) { + static object deserialize_default(std::u8string_view in_data) { return object{ string_view_cast(in_data) }; } /** static members */ - static constexpr std::string_view DEFAULT_SERIALIZE_RESULT = "serialize_result"sv; - static std::function serialize_impl; - static std::function deserialize_impl; + static constexpr std::u8string_view DEFAULT_SERIALIZE_RESULT = u8"serialize_result"sv; + static std::function serialize_impl; + static std::function deserialize_impl; }; -std::function test_parser::serialize_impl{ &serialize_default }; -std::function test_parser::deserialize_impl{ &deserialize_default }; +std::function test_parser::serialize_impl{ &serialize_default }; +std::function test_parser::deserialize_impl{ &deserialize_default }; parser_registration test_parser_registration{ "test" }; @@ -84,30 +84,30 @@ class ParserTest : public base_test { TEST_F(ParserTest, find_parser) { EXPECT_NO_THROW(serialize_object(u8"test_data", "test")); - EXPECT_NO_THROW(deserialize_object("test_data"sv, "test")); + EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test")); EXPECT_THROW(serialize_object(u8"test_data", "invalid_format_test"), format_not_available); - EXPECT_THROW(deserialize_object("test_data"sv, "invalid_format_test"), format_not_available); + EXPECT_THROW(deserialize_object(u8"test_data"sv, "invalid_format_test"), format_not_available); } TEST_F(ParserTest, temp_parser) { EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available); - EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available); + EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available); { parser_registration test_tmp_registration{ "test_tmp" }; EXPECT_NO_THROW(serialize_object(u8"test_data", "test_tmp")); - EXPECT_NO_THROW(deserialize_object("test_data"sv, "test_tmp")); + EXPECT_NO_THROW(deserialize_object(u8"test_data"sv, "test_tmp")); } EXPECT_THROW(serialize_object(u8"test_data", "test_tmp"), format_not_available); - EXPECT_THROW(deserialize_object("test_data"sv, "test_tmp"), format_not_available); + EXPECT_THROW(deserialize_object(u8"test_data"sv, "test_tmp"), format_not_available); } TEST_F(ParserTest, serialize) { - EXPECT_EQ(serialize_object(u8"test_data", "test"), "test_data"); + EXPECT_EQ(serialize_object(u8"test_data", "test"), u8"test_data"); } TEST_F(ParserTest, deserialize) { - EXPECT_EQ(deserialize_object("test_data"sv, "test").get(), u8"test_data"); + EXPECT_EQ(deserialize_object(u8"test_data"sv, "test").get(), u8"test_data"); } diff --git a/src/test/parsers/json.cpp b/src/test/parsers/json.cpp index 514c4d6..3bacba4 100644 --- a/src/test/parsers/json.cpp +++ b/src/test/parsers/json.cpp @@ -25,26 +25,26 @@ using namespace std::literals; TEST(JsonParser, serialize_null) { json_parser parser; - EXPECT_EQ(parser.serialize({}), "null"); + EXPECT_EQ(parser.serialize({}), u8"null"); } TEST(JsonParser, serialize_boolean) { json_parser parser; - EXPECT_EQ(parser.serialize(true), "true"); - EXPECT_EQ(parser.serialize(false), "false"); + EXPECT_EQ(parser.serialize(true), u8"true"); + EXPECT_EQ(parser.serialize(false), u8"false"); } TEST(JsonParser, serialize_integer) { json_parser parser; - EXPECT_EQ(parser.serialize(1234), "1234"); + EXPECT_EQ(parser.serialize(1234), u8"1234"); } TEST(JsonParser, serialize_decimal) { json_parser parser; - EXPECT_DOUBLE_EQ(std::atof(parser.serialize(12.34).c_str()), 12.34); - EXPECT_DOUBLE_EQ(std::atof(parser.serialize(1234.0).c_str()), 1234.0); + EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast(parser.serialize(12.34).c_str())), 12.34); + EXPECT_DOUBLE_EQ(std::atof(reinterpret_cast(parser.serialize(1234.0).c_str())), 1234.0); } // necessary due to some sort of bug with EXPECT_EQ on MSVC @@ -56,10 +56,10 @@ void expect_eq(LeftT in_left, RightT in_right) { TEST(JsonParser, serialize_string) { json_parser parser; - EXPECT_EQ(parser.serialize(u8"text"), R"json("text")json"); - expect_eq(parser.serialize(u8"\"text\""), R"json("\"text\"")json"); - expect_eq(parser.serialize(u8"\"te\x01xt\""), R"json("\"te\u0001xt\"")json"); - expect_eq(parser.serialize(u8"\"te\x10xt\""), R"json("\"te\u0010xt\"")json"); + EXPECT_EQ(parser.serialize(u8"text"), u8R"json("text")json"); + expect_eq(parser.serialize(u8"\"text\""), u8R"json("\"text\"")json"); + expect_eq(parser.serialize(u8"\"te\x01xt\""), u8R"json("\"te\u0001xt\"")json"); + expect_eq(parser.serialize(u8"\"te\x10xt\""), u8R"json("\"te\u0010xt\"")json"); } TEST(JsonParser, serialize_array) { @@ -72,7 +72,7 @@ TEST(JsonParser, serialize_array) { }; EXPECT_EQ(parser.serialize(array), - R"json([true,1234,"text",null])json"); + u8R"json([true,1234,"text",null])json"); } TEST(JsonParser, serialize_map) { @@ -85,43 +85,43 @@ TEST(JsonParser, serialize_map) { obj[u8"some_null"]; EXPECT_EQ(parser.serialize(obj), - R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json"); + u8R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json"); } TEST(JsonParser, deserialize_null) { json_parser parser; - EXPECT_EQ(parser.deserialize("null"sv), object{}); + EXPECT_EQ(parser.deserialize(u8"null"sv), object{}); } TEST(JsonParser, deserialize_boolean) { json_parser parser; - EXPECT_EQ(parser.deserialize("true"sv), true); - EXPECT_EQ(parser.deserialize("false"sv), false); + EXPECT_EQ(parser.deserialize(u8"true"sv), true); + EXPECT_EQ(parser.deserialize(u8"false"sv), false); } TEST(JsonParser, deserialize_integer) { json_parser parser; - EXPECT_EQ(parser.deserialize("1234"sv), 1234); - EXPECT_EQ(parser.deserialize("-1234"sv), -1234); + EXPECT_EQ(parser.deserialize(u8"1234"sv), 1234); + EXPECT_EQ(parser.deserialize(u8"-1234"sv), -1234); } TEST(JsonParser, deserialize_decimal) { json_parser parser; - EXPECT_DOUBLE_EQ(parser.deserialize("12.34"sv).get(), 12.34); - EXPECT_DOUBLE_EQ(parser.deserialize("1234."sv).get(), 1234.0); - EXPECT_DOUBLE_EQ(parser.deserialize("0.1234"sv).get(), 0.1234); - EXPECT_THROW(parser.deserialize(".1234"sv), std::invalid_argument); - EXPECT_DOUBLE_EQ(parser.deserialize("-12.34"sv).get(), -12.34); + EXPECT_DOUBLE_EQ(parser.deserialize(u8"12.34"sv).get(), 12.34); + EXPECT_DOUBLE_EQ(parser.deserialize(u8"1234."sv).get(), 1234.0); + EXPECT_DOUBLE_EQ(parser.deserialize(u8"0.1234"sv).get(), 0.1234); + EXPECT_THROW(parser.deserialize(u8".1234"sv), std::invalid_argument); + EXPECT_DOUBLE_EQ(parser.deserialize(u8"-12.34"sv).get(), -12.34); } TEST(JsonParser, deserialize_string) { json_parser parser; - EXPECT_EQ(parser.deserialize(R"json("text")json"sv), u8"text"); + EXPECT_EQ(parser.deserialize(u8R"json("text")json"sv), u8"text"); object obj; std::u8string_view u8text = u8R"json("text")json"sv; @@ -143,7 +143,7 @@ TEST(JsonParser, deserialize_string) { TEST(JsonParser, deserialize_array) { json_parser parser; - constexpr const char* json_data = R"json([ + constexpr const char8_t* json_data = u8R"json([ true, false, 1234, @@ -165,7 +165,7 @@ TEST(JsonParser, deserialize_array) { TEST(JsonParser, deserialize_array_nested) { json_parser parser; - constexpr const char* json_data = R"json([ + constexpr const char8_t* json_data = u8R"json([ true, false, 1234 @@ -211,7 +211,7 @@ TEST(JsonParser, deserialize_array_nested) { TEST(JsonParser, deserialize_map) { json_parser parser; - constexpr const char* json_data = R"json({ + constexpr const char8_t* json_data = u8R"json({ "some_true":true, "some_false" : false, "some_int": 1234, @@ -233,7 +233,7 @@ TEST(JsonParser, deserialize_map) { TEST(JsonParser, deserialize_map_nested) { json_parser parser; - constexpr const char* json_data = R"json({ + constexpr const char8_t* json_data = u8R"json({ "some_text" : "text", "some_object" : { "some_null_object": {} diff --git a/src/test/unicode.cpp b/src/test/unicode.cpp index 848ecb4..055ca04 100644 --- a/src/test/unicode.cpp +++ b/src/test/unicode.cpp @@ -128,12 +128,18 @@ TEST(UTF32Test, decode_codepoint) { DECODE_CODEPOINT_TEST(U"\U0001F604"sv, U'\U0001F604', 1U); } -using char_types = ::testing::Types; +#ifdef JESSILIB_CHAR_AS_UTF8 using char_type_combos = ::testing::Types< std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair>; +#else // JESSILIB_CHAR_AS_UTF8 +using char_type_combos = ::testing::Types< + std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair>; +#endif // JESSILIB_CHAR_AS_UTF8 template class UnicodeFullTest : public ::testing::Test { @@ -157,9 +163,9 @@ TYPED_TEST(UnicodeFullTest, string_cast) { } TEST(UTF8Test, string_view_cast) { - auto abcd_str = jessilib::string_cast(U"ABCD"); - auto view = string_view_cast(abcd_str); - EXPECT_TRUE(equals(view, abcd_str)); + std::string_view abcd_str = "ABCD"; + auto view = string_view_cast(abcd_str); + EXPECT_TRUE(equals(view, u8"ABCD"sv)); } /** equals */ diff --git a/src/test/unicode_sequence.cpp b/src/test/unicode_sequence.cpp index 291f8e7..e65d643 100644 --- a/src/test/unicode_sequence.cpp +++ b/src/test/unicode_sequence.cpp @@ -34,13 +34,20 @@ ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("test"s) == "test"s); ASSERT_COMPILES_CONSTEXPR(return cpp_constexpr("\\r\\n"s) == "\r\n"s); #endif // __cpp_lib_constexpr_string +#ifdef JESSILIB_CHAR_AS_UTF8 using char_types = ::testing::Types; -using utf8_char_types = ::testing::Types; using char_type_combos = ::testing::Types< std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair, std::pair>; +#else // JESSILIB_CHAR_AS_UTF8 +using char_types = ::testing::Types; +using char_type_combos = ::testing::Types< + std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair>; +#endif // JESSILIB_CHAR_AS_UTF8 template class UnicodeSequenceTest : public ::testing::Test { @@ -51,8 +58,8 @@ TYPED_TEST_SUITE(UnicodeSequenceTest, char_types); constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing #define TEST_CPP_SEQUENCE(expr) \ - { auto parsed_string = jessilib::string_cast(#expr); \ - auto normal_string = jessilib::string_cast(expr); \ + { auto parsed_string = jessilib::string_cast(reinterpret_cast(#expr)); \ + auto normal_string = jessilib::string_cast(reinterpret_cast(expr)); \ parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \ jessilib::apply_cpp_escape_sequences(parsed_string); \ EXPECT_EQ(parsed_string, normal_string); } @@ -130,7 +137,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) { for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) { std::basic_string parsed_string; for (size_t min_length = 0; min_length <= 2; ++min_length) { - parsed_string = jessilib::string_cast("\\x"); + parsed_string = jessilib::string_cast(u8"\\x"); parsed_string += make_hex_string(codepoint, min_length); jessilib::apply_cpp_escape_sequences(parsed_string); EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); @@ -145,7 +152,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) { std::basic_string parsed_string; for (size_t min_length = 0; min_length <= 4; ++min_length) { // "\x0" -> "\xffff" - parsed_string = jessilib::string_cast("\\x"); + parsed_string = jessilib::string_cast(u8"\\x"); parsed_string += make_hex_string(codepoint, min_length); jessilib::apply_cpp_escape_sequences(parsed_string); EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); @@ -161,7 +168,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) { std::basic_string parsed_string; for (size_t min_length = 0; min_length <= 8; ++min_length) { // "\x0" -> "\x0010ffff" - parsed_string = jessilib::string_cast("\\x"); + parsed_string = jessilib::string_cast(u8"\\x"); parsed_string += make_hex_string(codepoint, min_length); jessilib::apply_cpp_escape_sequences(parsed_string); EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); @@ -175,7 +182,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_hex) { TYPED_TEST(UnicodeSequenceTest, cpp_u16) { // "u0000" -> "uffff" with & without leading zeroes for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { - std::basic_string parsed_string = jessilib::string_cast("\\u"); + std::basic_string parsed_string = jessilib::string_cast(u8"\\u"); parsed_string += make_hex_string(codepoint, 4); jessilib::apply_cpp_escape_sequences(parsed_string); @@ -188,7 +195,7 @@ TYPED_TEST(UnicodeSequenceTest, cpp_u16) { TYPED_TEST(UnicodeSequenceTest, cpp_u32) { // "U00000000" -> "U000100FF" with & without leading zeroes for (unsigned int codepoint = 0; codepoint <= MAX_LOOP_CODEPOINT; ++codepoint) { - std::basic_string parsed_string = jessilib::string_cast("\\U"); + std::basic_string parsed_string = jessilib::string_cast(u8"\\U"); parsed_string += make_hex_string(codepoint, 8); jessilib::apply_cpp_escape_sequences(parsed_string);