From 0ff8722bde8eb994e8cb083afe1b90b257b127a3 Mon Sep 17 00:00:00 2001 From: Jessica James Date: Fri, 18 Feb 2022 06:30:29 -0600 Subject: [PATCH] Some endianness related changes which were never pushed --- src/common/parsers/json.cpp | 36 ++++++++++- src/include/jessilib/parser.hpp | 2 +- src/include/jessilib/util.hpp | 104 +++++++++++++++++++++++++++++--- src/test/parsers/json.cpp | 51 +++++++++++++++- src/test/util.cpp | 58 +++++++++++++++++- 5 files changed, 234 insertions(+), 17 deletions(-) diff --git a/src/common/parsers/json.cpp b/src/common/parsers/json.cpp index ffc03f1..a136b08 100644 --- a/src/common/parsers/json.cpp +++ b/src/common/parsers/json.cpp @@ -40,11 +40,27 @@ object json_parser::deserialize_bytes(bytes_view_type in_data, text_encoding in_ deserialize_json(result, data_view); } else if (in_write_encoding == text_encoding::multibyte) { - // TODO: support without copying... somehow + // TODO: support without copying auto u8_data = mbstring_to_ustring(jessilib::string_view_cast(in_data)); std::u8string_view data_view = u8_data.second; deserialize_json(result, data_view); } + else if (in_write_encoding == text_encoding::utf_16_foreign) { + // TODO: support without copying + std::u16string u16_data{ jessilib::string_view_cast(in_data) }; + array_byteswap(u16_data.data(), u16_data.data() + u16_data.size()); + std::u16string_view data_view = u16_data; + + deserialize_json(result, data_view); + } + else if (in_write_encoding == text_encoding::utf_32_foreign) { + // TODO: support without copying + std::u32string u32_data{ jessilib::string_view_cast(in_data) }; + array_byteswap(u32_data.data(), u32_data.data() + u32_data.size()); + std::u32string_view data_view = u32_data; + + deserialize_json(result, data_view); + } return result; } @@ -56,9 +72,23 @@ std::string json_parser::serialize_bytes(const object& in_object, text_encoding case text_encoding::utf_16: return serialize_impl(in_object); case text_encoding::utf_32: - return serialize_impl(in_object); + return serialize_impl(in_object); case text_encoding::wchar: - return serialize_impl(in_object); + return serialize_impl(in_object); + case text_encoding::multibyte: + return ustring_to_mbstring(serialize_impl(in_object)).second; + + // Other-endianness + case text_encoding::utf_16_foreign: { + std::string result = serialize_impl(in_object); + string_byteswap(result); + return result; + } + case text_encoding::utf_32_foreign: { + std::string result = serialize_impl(in_object); + string_byteswap(result); + return result; + } default: break; } diff --git a/src/include/jessilib/parser.hpp b/src/include/jessilib/parser.hpp index 82cdc63..2580eb5 100644 --- a/src/include/jessilib/parser.hpp +++ b/src/include/jessilib/parser.hpp @@ -28,7 +28,7 @@ namespace jessilib { class parser { public: virtual ~parser() = default; - using byte_type = uint8_t; + using byte_type = char; using bytes_view_type = std::basic_string_view; /** Interface methods */ diff --git a/src/include/jessilib/util.hpp b/src/include/jessilib/util.hpp index 9979888..e4f0ec7 100644 --- a/src/include/jessilib/util.hpp +++ b/src/include/jessilib/util.hpp @@ -20,14 +20,25 @@ #include #include +#include /** Macros */ -#define JESSILIB_FILENAME \ - (::jessilib::impl::filename_from_string(__FILE__)) +#define JESSILIB_IMPL_TYPE_LITERAL(IN, ...) __VA_ARGS__ ## IN +#define JESSILIB_TYPE_LITERAL(IN, ...) JESSILIB_IMPL_TYPE_LITERAL(IN, __VA_ARGS__) + +#define JESSILIB_FILENAME(...) \ + (::jessilib::impl::filename_from_string(JESSILIB_TYPE_LITERAL(__FILE__, __VA_ARGS__))) + +#define JESSILIB_FILENAME_VIEW(...) (::jessilib::make_string_view(JESSILIB_FILENAME(__VA_ARGS__))) namespace jessilib { +template +std::basic_string_view make_string_view(const CharT* in_ntstring) { + return in_ntstring; +} + template const CharT* parse_decimal_part(const CharT* in_str, const CharT* in_str_end, NumberT& out_value) { int denominator = 10; @@ -129,19 +140,71 @@ constexpr T square(T in_value) { return in_value * in_value; } -/** Implementation details */ +template +constexpr IntegerT byteswap(IntegerT in_integer) { // TODO: Remove w/ C++23 + static_assert(sizeof(IntegerT) > 1, "byteswap on single byte does nothing"); + if constexpr (sizeof(IntegerT) == 2) { + return (in_integer << 8) | (in_integer >> 8); + } + else if constexpr (sizeof(IntegerT) == 4) { + return (in_integer << 24) + | ((in_integer & 0xFF00) << 8) + | ((in_integer & 0xFF0000) >> 8) + | (in_integer >> 24); + } + else if constexpr (sizeof(IntegerT) == 8) { + return (in_integer << 56) + | ((in_integer & 0xFF00) << 40) + | ((in_integer & 0xFF0000) << 24) + | ((in_integer & 0xFF000000) << 8) + | ((in_integer & 0xFF00000000) >> 8) + | ((in_integer & 0xFF0000000000) >> 24) + | ((in_integer & 0xFF000000000000) >> 40) + | (in_integer >> 56); + } + else { + // Fallback to std::reverse for exotic integer types + std::reverse(reinterpret_cast(&in_integer), + reinterpret_cast(&in_integer) + sizeof(IntegerT)); + return in_integer; + } +} + +template +void array_byteswap(CharT* begin, CharT* end) { + while (begin != end) { + *begin = byteswap(*begin); + ++begin; + } +} + +template +void string_byteswap(std::string& in_string) { + // Ensure correct number of bytes for underlying representation type + if (in_string.size() % sizeof(CharT) != 0) { + return; + } + + for (auto itr = in_string.begin(); itr != in_string.end();) { + auto unit_end = itr + sizeof(CharT); + std::reverse(itr, unit_end); + itr = unit_end; + } +} + +/** Implementation details */ namespace impl { -template -constexpr const char* filename_from_string(const char (&in_filename)[in_filename_length]) { - const char* filename_itr = in_filename; - const char* filename_end = filename_itr + in_filename_length; - const char* result = filename_itr; +template +constexpr const CharT* filename_from_string(const CharT (&in_filename)[in_filename_length]) { + const CharT* filename_itr = in_filename; + const CharT* filename_end = filename_itr + in_filename_length; + const CharT* result = filename_itr; while (filename_itr != filename_end) { - if (*filename_itr == '/' || *filename_itr == '\\') { + if (*filename_itr == U'/' || *filename_itr == U'\\') { ++filename_itr; result = filename_itr; } @@ -153,5 +216,28 @@ constexpr const char* filename_from_string(const char (&in_filename)[in_filename return result; } +template +constexpr const CharT* file_extension_from_string(const CharT (&in_filename)[in_filename_length]) { + const CharT* filename_itr = in_filename; + const CharT* filename_end = filename_itr + in_filename_length; + const CharT* result = filename_end; + + while (filename_itr != filename_end) { + if (*filename_itr == U'/' || *filename_itr == U'\\') { + ++filename_itr; + result = filename_end; + } + else { + if (*filename_itr == '.') { + result = filename_itr; + } + + ++filename_itr; + } + } + + return result; +} + } // namespace impl } // namespace jessilib diff --git a/src/test/parsers/json.cpp b/src/test/parsers/json.cpp index c3189dd..28f934b 100644 --- a/src/test/parsers/json.cpp +++ b/src/test/parsers/json.cpp @@ -137,12 +137,12 @@ TEST(JsonParser, deserialize_string) { std::u16string_view u16text = uR"json("text")json"sv; EXPECT_TRUE(deserialize_json(obj, u16text)); EXPECT_EQ(obj, u8"text"sv); - EXPECT_TRUE(u8text.empty()); + EXPECT_TRUE(u16text.empty()); std::u32string_view u32text = UR"json("text")json"sv; EXPECT_TRUE(deserialize_json(obj, u32text)); EXPECT_EQ(obj, u8"text"sv); - EXPECT_TRUE(u8text.empty()); + EXPECT_TRUE(u32text.empty()); } TEST(JsonParser, deserialize_array) { @@ -273,3 +273,50 @@ TEST(JsonParser, deserialize_map_nested) { EXPECT_EQ(obj[u8"some_other_object"][u8"beans"][u8"magical"], true); EXPECT_EQ(obj[u8"some_other_object"][u8"beans"][u8"makes toot"], true); } + +/** Some basic foreign-encoding tests */ + +template +std::string make_foreign_string(std::basic_string_view in_string) { + std::string result{ reinterpret_cast(in_string.data()), + reinterpret_cast(in_string.data() + in_string.size()) }; + jessilib::string_byteswap(result); + + return result; +} + +TEST(JsonParser, deserialize_fu16_string) { + json_parser parser; + std::string fu16text = make_foreign_string(uR"json("text")json"sv); + + // Deserialize foreign utf-16 text + object obj = parser.deserialize_bytes(fu16text, text_encoding::utf_16_foreign); + EXPECT_EQ(obj, u8"text"sv); +} + +TEST(JsonParser, deserialize_fu32_string) { + json_parser parser; + std::string fu32text = make_foreign_string(UR"json("text")json"sv); + + // Deserialize foreign utf-16 text + object obj = parser.deserialize_bytes(fu32text, text_encoding::utf_32_foreign); + EXPECT_EQ(obj, u8"text"sv); +} + +TEST(JsonParser, serialize_fu16_string) { + json_parser parser; + std::string fu16text = make_foreign_string(uR"json("\"text\"")json"sv); + + // Deserialize foreign utf-16 text + std::string serialized_bytes = parser.serialize_bytes(u8R"json("text")json"sv, text_encoding::utf_16_foreign); + EXPECT_EQ(serialized_bytes, fu16text); +} + +TEST(JsonParser, serialize_fu32_string) { + json_parser parser; + std::string fu32text = make_foreign_string(UR"json("\"text\"")json"sv); + + // Deserialize foreign utf-16 text + std::string serialized_bytes = parser.serialize_bytes(u8R"json("text")json"sv, text_encoding::utf_32_foreign); + EXPECT_EQ(serialized_bytes, fu32text); +} diff --git a/src/test/util.cpp b/src/test/util.cpp index 256ef97..39cfe8a 100644 --- a/src/test/util.cpp +++ b/src/test/util.cpp @@ -23,6 +23,60 @@ using namespace jessilib; using namespace std::literals; TEST(UtilTest, filename) { - constexpr const char* filename = JESSILIB_FILENAME; - EXPECT_STREQ(filename, "util.cpp"); + EXPECT_EQ(JESSILIB_FILENAME_VIEW(), "util.cpp"); + EXPECT_EQ(JESSILIB_FILENAME_VIEW(u8), u8"util.cpp"sv); + EXPECT_EQ(JESSILIB_FILENAME_VIEW(u), u"util.cpp"sv); + EXPECT_EQ(JESSILIB_FILENAME_VIEW(U), U"util.cpp"sv); + EXPECT_EQ(JESSILIB_FILENAME_VIEW(L), L"util.cpp"sv); + //EXPECT_EQ(JESSILIB_FILENAME(u8), u8".cpp"sv); +} + +TEST(UtilTest, byteswap) { + EXPECT_EQ(byteswap(uint16_t{0x1234}), uint16_t{0x3412}) + << "hex: " << std::hex << byteswap(uint16_t{0x1234}); + EXPECT_EQ(byteswap(uint32_t{0x12345678UL}), uint32_t{0x78563412UL}) + << "hex: " << std::hex << byteswap(uint32_t{0x12345678UL}); + EXPECT_EQ(byteswap(uint64_t{0x123456789ABCDEF0ULL}), uint64_t{0xF0DEBC9A78563412ULL}) + << "hex: " << std::hex << byteswap(uint64_t{0x123456789ABCDEF0ULL}); +} + +TEST(UtilTest, array_byteswap) { + uint16_t numbers[]{ + 0x1234, + 0x5678, + 0x9ABC, + 0xDEF0 + }; + + uint16_t byteswapped_numbers[]{ + 0x3412, + 0x7856, + 0xBC9A, + 0xF0DE + }; + + array_byteswap(std::begin(numbers), std::end(numbers)); + EXPECT_EQ(numbers[0], byteswapped_numbers[0]); + EXPECT_EQ(numbers[1], byteswapped_numbers[1]); + EXPECT_EQ(numbers[2], byteswapped_numbers[2]); + EXPECT_EQ(numbers[3], byteswapped_numbers[3]); +} + +TEST(UtilTest, string_byteswap) { + std::string numbers{ + 0x12, 0x34, + 0x56, 0x78, + static_cast(0x9A), static_cast(0xBC), + static_cast(0xDE), static_cast(0xF0) + }; + + std::string u16byteswapped_numbers{ + 0x34, 0x12, + 0x78, 0x56, + static_cast(0xBC), static_cast(0x9A), + static_cast(0xF0), static_cast(0xDE) + }; + + string_byteswap(numbers); + EXPECT_EQ(numbers, u16byteswapped_numbers); }