From 0093f0f00e4f0b5405f0b2b8fda588580023f0c3 Mon Sep 17 00:00:00 2001 From: Jessica James Date: Sun, 30 Sep 2018 22:13:46 -0500 Subject: [PATCH] Added initial JSON parser Added `unicode.hpp`, providing various unicode-related utilities Moved template type helpers from `util.hpp` to `type_traits.hpp` Added Boost.Asio as submodule for upcoming use --- .gitmodules | 3 + src/CMakeLists.txt | 1 - src/common/CMakeLists.txt | 9 +- src/common/parsers/json.cpp | 542 +++++++++++++++++++++++++++++++++++ src/common/unicode.cpp | 257 +++++++++++++++++ src/include/asio.hpp | 27 ++ src/include/impl/asio | 1 + src/include/object.hpp | 18 +- src/include/parsers/json.hpp | 32 +++ src/include/type_traits.hpp | 185 ++++++++++++ src/include/unicode.hpp | 79 +++++ src/include/util.hpp | 201 ++++--------- src/test/CMakeLists.txt | 7 +- src/test/parsers/json.cpp | 247 ++++++++++++++++ src/test/unicode.cpp | 120 ++++++++ src/test/util.cpp | 3 - 16 files changed, 1573 insertions(+), 159 deletions(-) create mode 100644 src/common/parsers/json.cpp create mode 100644 src/common/unicode.cpp create mode 100644 src/include/asio.hpp create mode 160000 src/include/impl/asio create mode 100644 src/include/parsers/json.hpp create mode 100644 src/include/type_traits.hpp create mode 100644 src/include/unicode.hpp create mode 100644 src/test/parsers/json.cpp create mode 100644 src/test/unicode.cpp diff --git a/.gitmodules b/.gitmodules index 49ff7da..f843000 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "googletest"] path = src/test/googletest url = https://github.com/google/googletest.git +[submodule "asio"] + path = src/include/impl/asio + url = https://github.com/boostorg/asio.git diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1672ee..f33c1bb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,5 @@ cmake_minimum_required(VERSION 3.8) # Setup source files -include_directories(include) add_subdirectory(common) add_subdirectory(test) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fd669b7..0e474fc 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,10 +1,15 @@ cmake_minimum_required(VERSION 3.8) # Setup source files -include_directories(../include) set(SOURCE_FILES - timer/timer.cpp timer/timer_manager.cpp thread_pool.cpp timer/timer_context.cpp timer/cancel_token.cpp timer/synchronized_timer.cpp object.cpp parser/parser.cpp parser/parser_manager.cpp config.cpp serialize.cpp) + timer/timer.cpp timer/timer_manager.cpp thread_pool.cpp timer/timer_context.cpp timer/cancel_token.cpp timer/synchronized_timer.cpp object.cpp parser/parser.cpp parser/parser_manager.cpp config.cpp serialize.cpp parsers/json.cpp unicode.cpp) +# Setup library build target add_library(jessilib ${SOURCE_FILES}) +# Setup include directories +target_include_directories(jessilib PUBLIC ../include) +target_include_directories(jessilib PRIVATE ../include/impl/asio/include) + +# Setup additionally needed libs target_link_libraries(jessilib ${JESSILIB_ADDITOINAL_LIBS}) diff --git a/src/common/parsers/json.cpp b/src/common/parsers/json.cpp new file mode 100644 index 0000000..a6419d1 --- /dev/null +++ b/src/common/parsers/json.cpp @@ -0,0 +1,542 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "parsers/json.hpp" +#include +#include "unicode.hpp" +#include "util.hpp" + +using namespace std::literals; + +namespace jessilib { + +std::string make_json_string(std::string_view in_string) { + std::string result; + result.reserve(in_string.size() + 2); + result = '\"'; + + while (!in_string.empty()) { + if (in_string.front() == '\\') { // backslash + result += '\\'; + result += '\\'; + } + else if (in_string.front() == '\"') { // quotation + result += '\\'; + result += '\"'; + } + else if (in_string.front() < 0x20) { // control characters + result += "\\u0000"sv; + + // overwrite last 2 zeroes with correct hexadecimal sequence + char* data_end = result.data() + result.size(); + char* data = data_end - 2; + std::to_chars(data, data_end, in_string.front(), 16); + } + else if ((in_string.front() & 0x80) != 0) { // UTF-8 sequence; copy to bypass above processing + if ((in_string.front() & 0x40) != 0) { + // this is a 2+ byte sequence + + if ((in_string.front() & 0x20) != 0) { + // this is a 3+ byte sequence + + if ((in_string.front() & 0x10) != 0) { + // this is a 4 byte sequence + if (in_string.size() < 4) { + // Invalid sequence encountered (first byte indicates 4 bytes, but less than 4 available) + break; + } + + // This is a 4-byte sequence + result += in_string.substr(0, 4); + in_string.remove_prefix(4); + continue; + } + + if (in_string.size() < 3) { + // Invalid sequence encountered (first byte indicates 3 bytes, but less than 3 available) + break; + } + + // This is a 3-byte sequence + result += in_string.substr(0, 3); + in_string.remove_prefix(3); + continue; + } + + if (in_string.size() < 2) { + // Invalid sequence encountered (first byte indicates 2 bytes, but less than 2 available) + break; + } + + // This is a 2-byte sequence + result += in_string.substr(0, 2); + in_string.remove_prefix(2); + continue; + } + + // Invalid sequence encountered (first bit is 1, but not second) + break; + } + else { + // Character in standard ASCII table + result += in_string.front(); + } + + in_string.remove_prefix(1); + } + + result += '\"'; + return result; +} + +void advance_whitespace(std::string_view& in_data) { + while (!in_data.empty()) { + switch (in_data.front()) { + case ' ': + case '\t': + case '\r': + case '\n': + in_data.remove_prefix(1); + break; + + default: + return; + } + } +} + +uint16_t get_codepoint_from_hex(const std::string_view& in_data) { + uint16_t value{}; + auto data = in_data.data(); + auto end = in_data.data() + 4; + data = std::from_chars(data, end, value, 16).ptr; + if (data != end) { + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + *data + "' when parsing unicode escape sequence" }; + }; + + return value; +} + +std::string read_json_string(std::string_view& in_data) { + std::string result; + + // Remove leading quotation + in_data.remove_prefix(1); + + // Iterate over view until we reach the ending quotation, or the end of the view + while (!in_data.empty()) { + switch (in_data.front()) { + // Escape sequence + case '\\': + // strip '\' + in_data.remove_prefix(1); + if (in_data.empty()) { + throw std::invalid_argument{ "Invalid JSON data; unexpected end of data when parsing escape sequence" }; + } + + // Parse escape type + switch (in_data.front()) { + // Quote + case '\"': + in_data.remove_prefix(1); + result += '\"'; + break; + + // Backslash + case '\\': + in_data.remove_prefix(1); + result += '\\'; + break; + + // Forward slash + case '/': + in_data.remove_prefix(1); + result += '/'; + break; + + // Backspace + case 'b': + in_data.remove_prefix(1); + result += '\b'; + break; + + // Formfeed + case 'f': + in_data.remove_prefix(1); + result += '\f'; + break; + + // Newline + case 'n': + in_data.remove_prefix(1); + result += '\n'; + break; + + // Carriage return + case 'r': + in_data.remove_prefix(1); + result += '\r'; + break; + + // Horizontal tab + case 't': + in_data.remove_prefix(1); + result += '\t'; + break; + + // Unicode codepoint + case 'u': { + in_data.remove_prefix(1); // strip 'u' + if (in_data.size() < 4) { + throw std::invalid_argument{ + "Invalid JSON data; unexpected end of data when parsing unicode escape sequence" }; + } + + char16_t codepoint = get_codepoint_from_hex(in_data); + in_data.remove_prefix(4); // strip codepoint hex + + if (is_high_surrogate(codepoint) // If we have a high surrogate... + && in_data.size() >= 6) { // And we have enough room for "\uXXXX"... + // Special case: we just parsed a high surrogate. Handle this with the low surrogate, if there is one + if (in_data.substr(0, 2) == "\\u"sv) { + // Another codepoint follows; read it in + in_data.remove_prefix(2); // strip "\u" + char16_t second_codepoint = get_codepoint_from_hex(in_data); + in_data.remove_prefix(4); // strip codepoint hex + + if (is_low_surrogate(second_codepoint)) { + // We've got a valid surrogate pair; serialize the represented codepoint; decode it + codepoint = decode_surrogate_pair(codepoint, second_codepoint).codepoint; + encode_codepoint(result, codepoint); // serialize the real codepoint + } + else { + // This is not a valid surrogate pair; serialize the codepoints directly + encode_codepoint(result, codepoint); + encode_codepoint(result, second_codepoint); + } + continue; + } + } + + encode_codepoint(result, codepoint); + continue; + } + + default: + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing escape sequence" }; + } + + break; + + // End of string + case '\"': + in_data.remove_prefix(1); // strip trailing quotation + advance_whitespace(in_data); // strip trailing spaces + return result; + + // Unicode sequence + default: { + auto codepoint = decode_codepoint(in_data); + if (codepoint.units == 0) { + // Invalid unicode sequence + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing string" }; + } + + // Valid unicode sequence + result += in_data.substr(0, codepoint.units); + in_data.remove_prefix(codepoint.units); + break; + } + } + } + + // We reached the end of the string_view before encountering an ending quote + throw std::invalid_argument{ "Invalid JSON data; missing ending quote (\") when parsing string" }; +} + +object read_json_number(std::string_view& in_data) { + // parse integer + intmax_t integer_value{}; + const char* from_chars_end = std::from_chars(in_data.data(), in_data.data() + in_data.size(), integer_value).ptr; + if (in_data.data() == from_chars_end) { + // Failed to parse integer portion + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing number" }; + } + + // Strip integer portion and return if nothing remains + in_data.remove_prefix(from_chars_end - in_data.data()); + if (in_data.empty() || in_data.front() != '.') { + return integer_value; + } + + // Parse decimal portion + + /* + // std::from_chars method + long double decimal_value{}; + from_chars_end = std::from_chars(data, data_end, decimal_value).ptr; + return static_cast(integer_value) + decimal_value; + */ + + // parse_decimal_part method + in_data.remove_prefix(1); // strip leading '.' + long double decimal_value = integer_value; + from_chars_end = parse_decimal_part(in_data.data(), in_data.data() + in_data.size(), decimal_value); + // TODO: parse exponent + + // Strip decimal portion and return + in_data.remove_prefix(from_chars_end - in_data.data()); + return decimal_value; +} + +object read_json_object(std::string_view& in_data) { + while (!in_data.empty()) { + switch (in_data.front()) { + /** Start of null */ + case 'n': + if (in_data.substr(0, 4) != "null"sv) { + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + std::string{ in_data } + "' when parsing null" }; + } + + in_data.remove_prefix(4); + return {}; + + /** Start of boolean (true) */ + case 't': + if (in_data.substr(0, 4) != "true"sv) { + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + std::string{ in_data } + "' when parsing boolean" }; + } + + in_data.remove_prefix(4); + return true; + + /** Start of boolean (false) */ + case 'f': + if (in_data.substr(0, 5) != "false"sv) { + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + std::string{ in_data } + "' when parsing boolean" }; + } + + in_data.remove_prefix(5); + return false; + + /** Whitespace */ + case ' ': + case '\t': + case '\r': + case '\n': + in_data.remove_prefix(1); + break; + + /** Start of string */ + case '\"': + return read_json_string(in_data); + + /** Start of number */ + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + return read_json_number(in_data); + } + + /** Start of array */ + case '[': { + // Strip brace and leading whitespace + in_data.remove_prefix(1); + advance_whitespace(in_data); + + // Build and populate result + std::vector result; + while (true) { + if (in_data.empty()) { + throw std::invalid_argument{ "Invalid JSON data; unexpected end of data when parsing object array" }; + } + + if (in_data.front() == ']') { + // End of array + in_data.remove_prefix(1); + return result; + } + + // We've reached the start of an object; parse it into our array + result.push_back(read_json_object(in_data)); + + // Strip leading whitespace + advance_whitespace(in_data); + + if (in_data.empty()) { + throw std::invalid_argument{ "Invalid JSON data; unexpected end of data when parsing object array" }; + } + + if (in_data.front() == ',') { + // Strip comma and trailing whitespace + in_data.remove_prefix(1); + advance_whitespace(in_data); + } + } + } + + /** Start of map */ + case '{': { + // Strip brace and leading whitespace + in_data.remove_prefix(1); + advance_whitespace(in_data); + + // Build and populate result + object result{ object::map_t{} }; + while (true) { + if (in_data.empty()) { + throw std::invalid_argument{ "Invalid JSON data; unexpected end of data when parsing object map" }; + } + + if (in_data.front() == '}') { + // End of object + in_data.remove_prefix(1); + return result; + } + + // Assert that we've reached the start of a key + if (in_data.front() != '\"') { + throw std::invalid_argument{ + "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing object map" }; + } + + // Read in the key and build a value + auto& value = result[read_json_string(in_data)]; + + // Verify next character is ':' + if (in_data.empty()) { + throw std::invalid_argument{ + "Invalid JSON data; unexpected end of data after parsing map key; expected ':' followed by value" }; + } + if (in_data.front() != ':') { + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + + "' when parsing map key (expected ':' instead)" }; + } + in_data.remove_prefix(1); // strip ':' + + // We've reached an object value; parse it + value = read_json_object(in_data); + + // Advance through whitespace to ',' or '}' + advance_whitespace(in_data); + + if (in_data.empty()) { + throw std::invalid_argument{ + "Invalid JSON data; unexpected end of data after parsing map value; expected '}'" }; + } + + if (in_data.front() == ',') { + // Strip comma and trailing whitespace + in_data.remove_prefix(1); + advance_whitespace(in_data); + } + } + + // Unreachable; above code will always return on success or throw on failure + } + + default: + throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing JSON" }; + } + } + + // No non-whitespace data passed in; return a null object + return {}; +} + +object json_parser::deserialize(std::string_view in_data) { + return read_json_object(in_data); +} + +std::string json_parser::serialize(const object& in_object) { + static const object::array_t s_null_array; + static const object::map_t s_null_map; + + switch (in_object.type()) { + case object::type::null: + return "null"s; + + case object::type::boolean: + if (in_object.get()) { + return "true"s; + } + return "false"s; + + case object::type::integer: + return std::to_string(in_object.get()); + + case object::type::decimal: + return std::to_string(in_object.get()); + + case object::type::string: + return make_json_string(in_object.get()); + + case object::type::array: { + if (in_object.size() == 0) { + return "[]"s; + } + + std::string result; + result = '['; + + // Serialize all objects in array + for (auto& obj : in_object.get(s_null_array)) { + result += json_parser::serialize(obj); + result += ','; + } + + // Return result + result.back() = ']'; + return result; + } + + case object::type::map: { + if (in_object.size() == 0) { + return "{}"s; + } + + std::string result; + result = '{'; + + // Serialize all objects in map + for (auto& item : in_object.get(s_null_map)) { + result += make_json_string(item.first); + result += ":"sv; + result += json_parser::serialize(item.second); + result += ','; + } + + // Return result + result.back() = '}'; + return result; + } + + default: + throw std::invalid_argument{ "Invalid data type: " + std::to_string(static_cast(in_object.type())) }; + } +} + +} // namespace jessilib \ No newline at end of file diff --git a/src/common/unicode.cpp b/src/common/unicode.cpp new file mode 100644 index 0000000..9211f71 --- /dev/null +++ b/src/common/unicode.cpp @@ -0,0 +1,257 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "unicode.hpp" + +namespace jessilib { + +/** encode_codepoint */ + +template +void append_helper(std::basic_string& out_string, T in_value) { + out_string += in_value; +} + +template +void append_helper(std::basic_ostream& out_string, T in_value) { + out_string << in_value; +} + +template +size_t encode_codepoint_utf8(T& out_destination, char32_t in_codepoint) { + if (in_codepoint > 0x10FFFF) { + return 0; + } + + if (in_codepoint <= 0x007F) { + // 1-byte sequence (7 bits) + append_helper(out_destination, static_cast(in_codepoint)); + return 1; + } + + if (in_codepoint <= 0x07FF) { + // 2-byte sequence (11 bits; 5 + 6) + append_helper(out_destination, static_cast(0xC0 | ((in_codepoint >> 6) & 0x1F))); + append_helper(out_destination, static_cast(0x80 | (in_codepoint & 0x3F))); + return 2; + } + + if (in_codepoint <= 0xFFFF) { + // 3-byte sequence (16 bits; 4 + 6 + 6) + append_helper(out_destination, static_cast(0xE0 | ((in_codepoint >> 12) & 0x0F))); + append_helper(out_destination, static_cast(0x80 | ((in_codepoint >> 6) & 0x3F))); + append_helper(out_destination, static_cast(0x80 | (in_codepoint & 0x3F))); + return 3; + } + + // 4-byte sequence (21 bits; 3 + 6 + 6 + 6) + append_helper(out_destination, static_cast(0xF0 | ((in_codepoint >> 18) & 0x07))); + append_helper(out_destination, static_cast(0x80 | ((in_codepoint >> 12) & 0x3F))); + append_helper(out_destination, static_cast(0x80 | ((in_codepoint >> 6) & 0x3F))); + append_helper(out_destination, static_cast(0x80 | (in_codepoint & 0x3F))); + return 4; +} + +template +size_t encode_codepoint_utf16(T& out_destination, char32_t in_codepoint) { + if (in_codepoint > 0x10FFFF) { + return 0; + } + + if (in_codepoint <= 0xFFFF) { + // 1-unit sequence + append_helper(out_destination, static_cast(in_codepoint)); + return 1; + } + + // 2-unit sequence + in_codepoint -= 0x10000; + append_helper(out_destination, static_cast((in_codepoint >> 10) + 0xD800)); + append_helper(out_destination, static_cast((in_codepoint & 0x03FF) + 0xDC00)); + return 2; +} + +template +size_t encode_codepoint_utf32(T& out_destination, char32_t in_codepoint) { + if (in_codepoint > 0x10FFFF) { + return 0; + } + + append_helper(out_destination, in_codepoint); + return 1; +} + +size_t encode_codepoint(std::string& out_string, char32_t in_codepoint) { + return encode_codepoint_utf8(out_string, in_codepoint); +} + +size_t encode_codepoint(std::u16string& out_string, char32_t in_codepoint) { + return encode_codepoint_utf16(out_string, in_codepoint); +} + +size_t encode_codepoint(std::u32string& out_string, char32_t in_codepoint) { + return encode_codepoint_utf32(out_string, in_codepoint); +} + +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint) { + return encode_codepoint_utf8(out_stream, in_codepoint); +} + +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint) { + return encode_codepoint_utf16(out_stream, in_codepoint); +} + +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint) { + return encode_codepoint_utf32(out_stream, in_codepoint); +} + +std::string encode_codepoint_u8(char32_t in_codepoint) { + std::string result; + encode_codepoint(result, in_codepoint); + return result; +} + +std::u16string encode_codepoint_u16(char32_t in_codepoint) { + std::u16string result; + encode_codepoint(result, in_codepoint); + return result; +} + +std::u32string encode_codepoint_u32(char32_t in_codepoint) { + std::u32string result; + encode_codepoint(result, in_codepoint); + return result; +} + +/** decode_codepoint */ + +get_endpoint_result decode_codepoint(const std::string_view& in_string) { + get_endpoint_result result{ 0, 0 }; + + if (in_string.empty()) { + return result; + } + + if ((in_string.front() & 0x80) != 0) { // UTF-8 sequence{ + // Validity check + if (in_string.size() < 2 + || (in_string.front() & 0x40) == 0) { + // This is an invalid 1 byte sequence + return result; + } + + // get codepoint value + if ((in_string.front() & 0x20) != 0) { + // This is a 3+ byte sequence + if (in_string.size() < 3) { + // Invalid sequence; too few characters available + return result; + } + + if ((in_string.front() & 0x10) != 0) { + // This is a 4 byte sequence + if (in_string.size() < 4) { + // Invalid sequence; too few characters available + return result; + } + + result.codepoint = static_cast(in_string[0] & 0x0F) << 18; + result.codepoint += static_cast(in_string[1] & 0x3F) << 12; + result.codepoint += static_cast(in_string[2] & 0x3F) << 6; + result.codepoint += static_cast(in_string[3] & 0x3F); + result.units = 4; + return result; + } + + // this is a 3 byte sequence + result.codepoint = static_cast(in_string[0] & 0x0F) << 12; + result.codepoint += static_cast(in_string[1] & 0x3F) << 6; + result.codepoint += static_cast(in_string[2] & 0x3F); + result.units = 3; + return result; + } + + // This is a 2 byte sequence + result.codepoint = static_cast(in_string[0] & 0x1F) << 6; + result.codepoint += static_cast(in_string[1] & 0x3F); + result.units = 2; + return result; + } + + // This is a valid 1 byte sequence + result.codepoint = static_cast(in_string.front()); + result.units = 1; + + return result; +} + +get_endpoint_result decode_codepoint(const std::u16string_view& in_string) { + if (in_string.empty()) { + return { 0, 0 }; + } + + if (is_high_surrogate(in_string.front()) // If this is a high surrogate codepoint... + && in_string.size() > 1 // And a codepoint follows this surrogate.. + && is_low_surrogate(in_string[1])) { // And that codepoint is a low surrogate... + // We have a valid surrogate pair; decode it into a codepoint and return + char32_t codepoint { static_cast( + ((in_string.front() - 0xD800U) * 0x400U) // high surrogate magic + + (in_string[1] - 0xDC00U) // low surrogate magic + + 0x10000ULL // more magic + ) }; + + return { codepoint, 2 }; + } + + // Codepoint is a single char16_t; return codepoint directly + return { in_string.front(), 1 }; +} + +get_endpoint_result decode_codepoint(const std::u32string_view& in_string) { + if (in_string.empty()) { + return { 0, 0 }; + } + + return { in_string.front(), 1 }; +} + +bool is_high_surrogate(char32_t in_codepoint) { + return in_codepoint >= 0xD800 && in_codepoint <= 0xDBFF; +} + +bool is_low_surrogate(char32_t in_codepoint) { + return in_codepoint >= 0xDC00 && in_codepoint <= 0xDFFF; +} + +get_endpoint_result decode_surrogate_pair(char16_t in_high_surrogate, char16_t in_low_surrogate) { + if (is_high_surrogate(in_high_surrogate) + && is_low_surrogate((in_low_surrogate))) { + // We have a valid surrogate pair; decode it into a codepoint and return + char32_t codepoint { static_cast( + ((in_high_surrogate - 0xD800U) * 0x400U) // high surrogate magic + + (in_low_surrogate - 0xDC00U) // low surrogate magic + + 0x10000ULL // more magic + ) }; + + return { codepoint, 2 }; + } + + return { 0, 0 }; +} + +} // namespace jessilib \ No newline at end of file diff --git a/src/include/asio.hpp b/src/include/asio.hpp new file mode 100644 index 0000000..52c09dc --- /dev/null +++ b/src/include/asio.hpp @@ -0,0 +1,27 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#pragma once + +#include + +namespace jessilib { + +namespace asio = boost::asio; + +} // namespace jessilib diff --git a/src/include/impl/asio b/src/include/impl/asio new file mode 160000 index 0000000..fbe86d8 --- /dev/null +++ b/src/include/impl/asio @@ -0,0 +1 @@ +Subproject commit fbe86d86b1ac53e40444e5af03ca4a6c74c33bda diff --git a/src/include/object.hpp b/src/include/object.hpp index 7932484..66845e6 100644 --- a/src/include/object.hpp +++ b/src/include/object.hpp @@ -24,7 +24,7 @@ #include #include #include -#include "util.hpp" +#include "type_traits.hpp" namespace jessilib { @@ -76,6 +76,18 @@ public: using type = map_t; }; + /** type */ + + enum class type : size_t { + null = 0, + boolean, + integer, + decimal, + string, // TODO: consider separating into 'binary' (std::vector) and 'text' (std::string) types + array, + map + }; + // Standard constructors object() = default; object(const object& in_config); @@ -166,6 +178,10 @@ public: return std::holds_alternative(m_value); } + type type() const { + return static_cast(m_value.index()); + } + /** arithmetic types (numbers, bool) */ template + */ + +#pragma once + +#include "parser.hpp" + +namespace jessilib { + +class json_parser : public parser { +public: + /** deserialize/serialize overrides */ + virtual object deserialize(std::string_view in_data) override; + virtual std::string serialize(const object& in_object) override; +}; + +} // namespace jessilib \ No newline at end of file diff --git a/src/include/type_traits.hpp b/src/include/type_traits.hpp new file mode 100644 index 0000000..4239e08 --- /dev/null +++ b/src/include/type_traits.hpp @@ -0,0 +1,185 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace jessilib { + +/** is_vector */ + +template +struct is_vector : std::false_type {}; + +template +struct is_vector> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_list */ + +template +struct is_list : std::false_type {}; + +template +struct is_list> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_forward_list */ + +template +struct is_forward_list : std::false_type {}; + +template +struct is_forward_list> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_set */ + +template +struct is_set : std::false_type {}; + +template +struct is_set> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_multiset */ + +template +struct is_multiset : std::false_type {}; + +template +struct is_multiset> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_unordered_set */ + +template +struct is_unordered_set : std::false_type {}; + +template +struct is_unordered_set> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_unordered_set */ + +template +struct is_unordered_multiset : std::false_type {}; + +template +struct is_unordered_multiset> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +/** is_sequence_container */ + +template +struct is_sequence_container : std::false_type {}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_sequence_container> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +} // namespace jessilib diff --git a/src/include/unicode.hpp b/src/include/unicode.hpp new file mode 100644 index 0000000..7c80bc3 --- /dev/null +++ b/src/include/unicode.hpp @@ -0,0 +1,79 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#pragma once + +#include +#include +#include + +namespace jessilib { + +/** encode_codepoint */ + +size_t encode_codepoint(std::string& out_string, char32_t in_codepoint); +size_t encode_codepoint(std::u16string& out_string, char32_t in_codepoint); +size_t encode_codepoint(std::u32string& out_string, char32_t in_codepoint); +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint); +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint); +size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint); +std::string encode_codepoint_u8(char32_t in_codepoint); +std::u16string encode_codepoint_u16(char32_t in_codepoint); +std::u32string encode_codepoint_u32(char32_t in_codepoint); + +/** decode_codepoint */ + +struct get_endpoint_result { + char32_t codepoint{}; + size_t units{}; +}; + +get_endpoint_result decode_codepoint(const std::string_view& in_string); // UTF-8 +get_endpoint_result decode_codepoint(const std::u16string_view& in_string); // UTF-16 +get_endpoint_result decode_codepoint(const std::u32string_view& in_string); // UTF-32 + +/** advance_codepoint */ + +template +char32_t advance_codepoint(std::basic_string_view& in_string) { + auto result = decode_codepoint(in_string); + in_string.remove_prefix(result.units); + return result.codepoint; +} + +/** next_codepoint */ + +template +std::string_view next_codepoint(const std::basic_string_view& in_string) { + return in_string.substr(decode_codepoint(in_string).units); +} + +/** is_valid_codepoint */ + +template +bool is_valid_codepoint(const std::basic_string_view& in_string) { + return decode_codepoint(in_string).units != 0; +} + +/** utf-16 surrogate helpers */ + +bool is_high_surrogate(char32_t in_codepoint); +bool is_low_surrogate(char32_t in_codepoint); +get_endpoint_result decode_surrogate_pair(char16_t in_high_surrogate, char16_t in_low_surrogate); + +} // namespace jessilib diff --git a/src/include/util.hpp b/src/include/util.hpp index 294e367..654146b 100644 --- a/src/include/util.hpp +++ b/src/include/util.hpp @@ -19,14 +19,7 @@ #pragma once #include -#include -#include -#include -#include -#include -#include -#include -#include +#include /** Macros */ @@ -35,157 +28,65 @@ namespace jessilib { -/** is_vector */ - -template -struct is_vector : std::false_type {}; - -template -struct is_vector> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_list */ - -template -struct is_list : std::false_type {}; - -template -struct is_list> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_forward_list */ - -template -struct is_forward_list : std::false_type {}; - template -struct is_forward_list> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_set */ - -template -struct is_set : std::false_type {}; - -template -struct is_set> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_multiset */ - -template -struct is_multiset : std::false_type {}; - -template -struct is_multiset> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_unordered_set */ - -template -struct is_unordered_set : std::false_type {}; - -template -struct is_unordered_set> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_unordered_set */ - -template -struct is_unordered_multiset : std::false_type {}; - -template -struct is_unordered_multiset> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; - -/** is_sequence_container */ - -template -struct is_sequence_container : std::false_type {}; +const char* parse_decimal_part(const char* in_str, const char* in_str_end, T& out_value) { + int denominator = 10; + while (in_str != in_str_end) { + switch (*in_str) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (out_value >= 0.0) { + out_value += (static_cast(*in_str - '0') / denominator); + } + else { + out_value -= (static_cast(*in_str - '0') / denominator); + } + denominator *= 10; + break; + + default: + return in_str; + } -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; + ++in_str; + } -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; + return in_str; +} template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; +std::from_chars_result from_chars(const char* in_str, const char* in_str_end, T& out_value) { + // TODO: use std::from_chars when available for floating point types + if constexpr (std::is_floating_point::value) { + // Read integer portion + long long integer_value{}; + std::from_chars_result result{ std::from_chars(in_str, in_str_end, integer_value) }; + out_value = integer_value; -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; + // Read decimal portion (if one exists) + if (result.ptr != in_str_end && *result.ptr == '.') { + ++result.ptr; + result.ptr = parse_decimal_part(result.ptr, in_str_end, out_value); + result.ec = std::errc{}; + } -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; + // TODO: Read exponents -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; + return result; + } + else { + return std::from_chars(in_str, in_str_end, out_value); + } +} -template -struct is_sequence_container> { - using type = T; - static constexpr bool value{ true }; - constexpr operator bool() const noexcept { return true; } - constexpr bool operator()() const noexcept { return true; } -}; /** Implementation details */ diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 36a6f40..a5c8cb1 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,16 +1,19 @@ cmake_minimum_required(VERSION 3.8) # Setup source files -include_directories(../include) set(SOURCE_FILES - timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp) + timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp) # Setup gtest add_subdirectory(googletest/googletest) #include_directories(googletest/googletest) #include_directories(googletest/googlemock) +# Setup executable build target add_executable(jessilib_tests ${SOURCE_FILES}) +# Setup include directories +target_include_directories(jessilib_tests PRIVATE .) + # Link with gtest target_link_libraries(jessilib_tests gtest gtest_main jessilib) diff --git a/src/test/parsers/json.cpp b/src/test/parsers/json.cpp new file mode 100644 index 0000000..9cd4cfd --- /dev/null +++ b/src/test/parsers/json.cpp @@ -0,0 +1,247 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "test.hpp" +#include "parsers/json.hpp" + +using namespace jessilib; +using namespace std::literals; + +TEST(JsonParser, serialize_null) { + json_parser parser; + + EXPECT_EQ(parser.serialize({}), "null"); +} + +TEST(JsonParser, serialize_boolean) { + json_parser parser; + + EXPECT_EQ(parser.serialize(true), "true"); + EXPECT_EQ(parser.serialize(false), "false"); +} + +TEST(JsonParser, serialize_integer) { + json_parser parser; + + EXPECT_EQ(parser.serialize(1234), "1234"); +} + +TEST(JsonParser, serialize_decimal) { + json_parser parser; + EXPECT_DOUBLE_EQ(std::atof(parser.serialize(12.34).c_str()), 12.34); + EXPECT_DOUBLE_EQ(std::atof(parser.serialize(1234.0).c_str()), 1234.0); +} + +TEST(JsonParser, serialize_string) { + json_parser parser; + + EXPECT_EQ(parser.serialize("text"), R"json("text")json"); + EXPECT_EQ(parser.serialize("\"text\""), R"json("\"text\"")json"); + EXPECT_EQ(parser.serialize("\"te\x10xt\""), R"json("\"te\u0010xt\"")json"); +} + +TEST(JsonParser, serialize_array) { + json_parser parser; + std::vector array { + true, + 1234, + "text", + object{} + }; + + EXPECT_EQ(parser.serialize(array), + R"json([true,1234,"text",null])json"); +} + +TEST(JsonParser, serialize_map) { + json_parser parser; + object obj; + + obj["some_bool"] = true; + obj["some_int"] = 1234; + obj["some_string"] = "text"; + obj["some_null"]; + + EXPECT_EQ(parser.serialize(obj), + R"json({"some_bool":true,"some_int":1234,"some_null":null,"some_string":"text"})json"); +} + +TEST(JsonParser, deserialize_null) { + json_parser parser; + + EXPECT_EQ(parser.deserialize("null"sv), object{}); +} + +TEST(JsonParser, deserialize_boolean) { + json_parser parser; + + EXPECT_EQ(parser.deserialize("true"sv), true); + EXPECT_EQ(parser.deserialize("false"sv), false); +} + +TEST(JsonParser, deserialize_integer) { + json_parser parser; + + EXPECT_EQ(parser.deserialize("1234"sv), 1234); + EXPECT_EQ(parser.deserialize("-1234"sv), -1234); +} + +TEST(JsonParser, deserialize_decimal) { + json_parser parser; + + EXPECT_DOUBLE_EQ(parser.deserialize("12.34"sv).get(), 12.34); + EXPECT_DOUBLE_EQ(parser.deserialize("1234."sv).get(), 1234.0); + EXPECT_DOUBLE_EQ(parser.deserialize("0.1234"sv).get(), 0.1234); + EXPECT_THROW(parser.deserialize(".1234"sv), std::invalid_argument); + EXPECT_DOUBLE_EQ(parser.deserialize("-12.34"sv).get(), -12.34); +} + +TEST(JsonParser, deserialize_string) { + json_parser parser; + + EXPECT_EQ(parser.deserialize(R"json("text")json"sv), "text"); +} + +TEST(JsonParser, deserialize_array) { + json_parser parser; + + constexpr const char* json_data = R"json([ + true, + false, + 1234, + 12.34, + 0.1234, + "text" + ])json"; + + auto array = parser.deserialize(json_data).get>(); + ASSERT_EQ(array.size(), 6U); + EXPECT_EQ(array[0], true); + EXPECT_EQ(array[1], false); + EXPECT_EQ(array[2], 1234); + EXPECT_DOUBLE_EQ(array[3].get(), 12.34); + EXPECT_DOUBLE_EQ(array[4].get(), 0.1234); + EXPECT_EQ(array[5], "text"); +} + +TEST(JsonParser, deserialize_array_nested) { + json_parser parser; + + constexpr const char* json_data = R"json([ + true, + false, + 1234 + , [ + 1,2,3, + null, + "text", + [5,6,7], + ] , [ ] , [ " text " ], + 12.34, + 0.1234, + "text" + ])json"; + + auto array = parser.deserialize(json_data).get>(); + ASSERT_EQ(array.size(), 9U); + EXPECT_EQ(array[0], true); + EXPECT_EQ(array[1], false); + EXPECT_EQ(array[2], 1234); + EXPECT_FALSE(array[3].null()); + EXPECT_EQ(array[3].size(), 6U); + EXPECT_FALSE(array[4].null()); + EXPECT_EQ(array[4].size(), 0U); + EXPECT_FALSE(array[5].null()); + ASSERT_EQ(array[5].size(), 1U); + EXPECT_EQ(array[5], std::vector{ " text " }); + EXPECT_EQ(array[5], std::vector{ " text " }); + EXPECT_DOUBLE_EQ(array[6].get(), 12.34); + EXPECT_DOUBLE_EQ(array[7].get(), 0.1234); + EXPECT_EQ(array[8], "text"); + + auto nested_array = array[3].get>(); + ASSERT_EQ(nested_array.size(), 6U); + EXPECT_EQ(nested_array[0], 1); + EXPECT_EQ(nested_array[1], 2); + EXPECT_EQ(nested_array[2], 3); + EXPECT_TRUE(nested_array[3].null()); + EXPECT_EQ(nested_array[4], "text"); + std::vector expected{ 5, 6, 7 }; + EXPECT_EQ(nested_array[5], expected); +} + +TEST(JsonParser, deserialize_map) { + json_parser parser; + + constexpr const char* json_data = R"json({ + "some_true":true, + "some_false" : false, + "some_int": 1234, + "some_double" : 12.34, + "some_other_double" :0.1234, + "some_text" : "text" + })json"; + + object obj = parser.deserialize(json_data); + EXPECT_EQ(obj.size(), 6U); + EXPECT_EQ(obj["some_true"], true); + EXPECT_EQ(obj["some_false"], false); + EXPECT_EQ(obj["some_int"], 1234); + EXPECT_DOUBLE_EQ(obj["some_double"].get(), 12.34); + EXPECT_DOUBLE_EQ(obj["some_other_double"].get(), 0.1234); + EXPECT_EQ(obj["some_text"], "text"); +} + +TEST(JsonParser, deserialize_map_nested) { + json_parser parser; + + constexpr const char* json_data = R"json({ + "some_text" : "text", + "some_object" : { + "some_null_object": {} + + } , + "some_other_object" : { + "beans": { + "fruit": true + , "magical": true , + "makes toot": true + }}, + "some other text":" asdf " + })json"; + + object obj = parser.deserialize(json_data); + EXPECT_EQ(obj.size(), 4U); + EXPECT_EQ(obj["some_text"], "text"); + EXPECT_EQ(obj["some other text"], " asdf "); + + // some_object + EXPECT_FALSE(obj["some_object"].null()); + EXPECT_EQ(obj["some_object"].size(), 1U); + EXPECT_FALSE(obj["some_object"]["some_null_object"].null()); + EXPECT_EQ(obj["some_object"]["some_null_object"].size(), 0U); + + // some_other_object + EXPECT_FALSE(obj["some_other_object"].null()); + EXPECT_EQ(obj["some_other_object"].size(), 1U); + EXPECT_FALSE(obj["some_other_object"]["beans"].null()); + EXPECT_EQ(obj["some_other_object"]["beans"].size(), 3U); + EXPECT_EQ(obj["some_other_object"]["beans"]["fruit"], true); + EXPECT_EQ(obj["some_other_object"]["beans"]["magical"], true); + EXPECT_EQ(obj["some_other_object"]["beans"]["makes toot"], true); +} diff --git a/src/test/unicode.cpp b/src/test/unicode.cpp new file mode 100644 index 0000000..d491c69 --- /dev/null +++ b/src/test/unicode.cpp @@ -0,0 +1,120 @@ +/** + * Copyright (C) 2018 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "unicode.hpp" +#include "test.hpp" + +using namespace jessilib; +using namespace std::literals; + +/** encode_codepoint */ + +TEST(UTF8Test, encode_codepoint) { + EXPECT_EQ(encode_codepoint_u8(U'\0'), u8"\0"sv); + EXPECT_EQ(encode_codepoint_u8(U'A'), u8"A"sv); + EXPECT_EQ(encode_codepoint_u8(U'z'), u8"z"sv); + EXPECT_EQ(encode_codepoint_u8(U'\u007F'), u8"\u007F"sv); + EXPECT_EQ(encode_codepoint_u8(U'\u0080'), u8"\u0080"sv); + EXPECT_EQ(encode_codepoint_u8(U'\u07FF'), u8"\u07FF"sv); + EXPECT_EQ(encode_codepoint_u8(U'\u0800'), u8"\u0800"sv); + EXPECT_EQ(encode_codepoint_u8(U'\uFFFF'), u8"\uFFFF"sv); + EXPECT_EQ(encode_codepoint_u8(U'\U00010000'), u8"\U00010000"sv); + EXPECT_EQ(encode_codepoint_u8(U'\U0010FFFF'), u8"\U0010FFFF"sv); + EXPECT_EQ(encode_codepoint_u8(U'\U0001F604'), u8"\U0001F604"sv); +} + +TEST(UTF16Test, encode_codepoint) { + EXPECT_EQ(encode_codepoint_u16(U'\0'), u"\0"sv); + EXPECT_EQ(encode_codepoint_u16(U'A'), u"A"sv); + EXPECT_EQ(encode_codepoint_u16(U'z'), u"z"sv); + EXPECT_EQ(encode_codepoint_u16(U'\u007F'), u"\u007F"sv); + EXPECT_EQ(encode_codepoint_u16(U'\u0080'), u"\u0080"sv); + EXPECT_EQ(encode_codepoint_u16(U'\u07FF'), u"\u07FF"sv); + EXPECT_EQ(encode_codepoint_u16(U'\u0800'), u"\u0800"sv); + EXPECT_EQ(encode_codepoint_u16(U'\uFFFF'), u"\uFFFF"sv); + EXPECT_EQ(encode_codepoint_u16(U'\U00010000'), u"\U00010000"sv); + EXPECT_EQ(encode_codepoint_u16(U'\U0010FFFF'), u"\U0010FFFF"sv); + EXPECT_EQ(encode_codepoint_u16(U'\U0001F604'), u"\U0001F604"sv); +} + +TEST(UTF32Test, encode_codepoint) { + EXPECT_EQ(encode_codepoint_u32(U'\0'), U"\0"sv); + EXPECT_EQ(encode_codepoint_u32(U'A'), U"A"sv); + EXPECT_EQ(encode_codepoint_u32(U'z'), U"z"sv); + EXPECT_EQ(encode_codepoint_u32(U'\u007F'), U"\u007F"sv); + EXPECT_EQ(encode_codepoint_u32(U'\u0080'), U"\u0080"sv); + EXPECT_EQ(encode_codepoint_u32(U'\u07FF'), U"\u07FF"sv); + EXPECT_EQ(encode_codepoint_u32(U'\u0800'), U"\u0800"sv); + EXPECT_EQ(encode_codepoint_u32(U'\uFFFF'), U"\uFFFF"sv); + EXPECT_EQ(encode_codepoint_u32(U'\U00010000'), U"\U00010000"sv); + EXPECT_EQ(encode_codepoint_u32(U'\U0010FFFF'), U"\U0010FFFF"sv); + EXPECT_EQ(encode_codepoint_u32(U'\U0001F604'), U"\U0001F604"sv); +} + +/** decode_codepoint */ + +#define DECODE_CODEPOINT_TEST(IN_STR, IN_CODEPOINT, IN_UNITS) \ + EXPECT_EQ(decode_codepoint( IN_STR ).codepoint, IN_CODEPOINT); \ + EXPECT_EQ(decode_codepoint( IN_STR ).units, IN_UNITS) + +TEST(UTF8Test, decode_codepoint) { + DECODE_CODEPOINT_TEST(u8""sv, U'\0', 0U); + DECODE_CODEPOINT_TEST(u8"\0"sv, U'\0', 1U); + DECODE_CODEPOINT_TEST(u8"A"sv, U'A', 1U); + DECODE_CODEPOINT_TEST(u8"z"sv, U'z', 1U); + DECODE_CODEPOINT_TEST(u8"\u007F"sv, U'\u007F', 1U); + DECODE_CODEPOINT_TEST(u8"\u0080"sv, U'\u0080', 2U); + DECODE_CODEPOINT_TEST(u8"\u07FF"sv, U'\u07FF', 2U); + DECODE_CODEPOINT_TEST(u8"\u0800"sv, U'\u0800', 3U); + DECODE_CODEPOINT_TEST(u8"\uFFFF"sv, U'\uFFFF', 3U); + DECODE_CODEPOINT_TEST(u8"\U00010000"sv, U'\U00010000', 4U); + DECODE_CODEPOINT_TEST(u8"\U0010FFFF"sv, U'\U0010FFFF', 4U); + DECODE_CODEPOINT_TEST(u8"\U0001F604"sv, U'\U0001F604', 4U); +} + +TEST(UTF16Test, decode_codepoint) { + DECODE_CODEPOINT_TEST(u""sv, U'\0', 0U); + DECODE_CODEPOINT_TEST(u"\0"sv, U'\0', 1U); + DECODE_CODEPOINT_TEST(u"A"sv, U'A', 1U); + DECODE_CODEPOINT_TEST(u"z"sv, U'z', 1U); + DECODE_CODEPOINT_TEST(u"\u007F"sv, U'\u007F', 1U); + DECODE_CODEPOINT_TEST(u"\u0080"sv, U'\u0080', 1U); + DECODE_CODEPOINT_TEST(u"\u07FF"sv, U'\u07FF', 1U); + DECODE_CODEPOINT_TEST(u"\u0800"sv, U'\u0800', 1U); + DECODE_CODEPOINT_TEST(u"\uD7FF"sv, U'\uD7FF', 1U); + DECODE_CODEPOINT_TEST(u"\uE000"sv, U'\uE000', 1U); + DECODE_CODEPOINT_TEST(u"\uFFFF"sv, U'\uFFFF', 1U); + DECODE_CODEPOINT_TEST(u"\U00010000"sv, U'\U00010000', 2U); + DECODE_CODEPOINT_TEST(u"\U0010FFFF"sv, U'\U0010FFFF', 2U); + DECODE_CODEPOINT_TEST(u"\U0001F604"sv, U'\U0001F604', 2U); +} + +TEST(UTF32Test, decode_codepoint) { + DECODE_CODEPOINT_TEST(U""sv, U'\0', 0U); + DECODE_CODEPOINT_TEST(U"\0"sv, U'\0', 1U); + DECODE_CODEPOINT_TEST(U"A"sv, U'A', 1U); + DECODE_CODEPOINT_TEST(U"z"sv, U'z', 1U); + DECODE_CODEPOINT_TEST(U"\u007F"sv, U'\u007F', 1U); + DECODE_CODEPOINT_TEST(U"\u0080"sv, U'\u0080', 1U); + DECODE_CODEPOINT_TEST(U"\u07FF"sv, U'\u07FF', 1U); + DECODE_CODEPOINT_TEST(U"\u0800"sv, U'\u0800', 1U); + DECODE_CODEPOINT_TEST(U"\uFFFF"sv, U'\uFFFF', 1U); + DECODE_CODEPOINT_TEST(U"\U00010000"sv, U'\U00010000', 1U); + DECODE_CODEPOINT_TEST(U"\U0010FFFF"sv, U'\U0010FFFF', 1U); + DECODE_CODEPOINT_TEST(U"\U0001F604"sv, U'\U0001F604', 1U); +} diff --git a/src/test/util.cpp b/src/test/util.cpp index 3933c2b..f22e7ee 100644 --- a/src/test/util.cpp +++ b/src/test/util.cpp @@ -27,6 +27,3 @@ TEST(UtilTest, filename) { constexpr const char* filename = JESSILIB_FILENAME; EXPECT_STREQ(filename, "util.cpp"); } - -// Non-virtual using variant: 48 -// \ No newline at end of file