From d7e4f337c786184a68b095449176261ce1700295 Mon Sep 17 00:00:00 2001 From: Jessica James Date: Wed, 1 Dec 2021 20:34:06 -0600 Subject: [PATCH] Add 'apply_cpp_escape_sequences', general cleanup --- src/common/unicode.cpp | 50 ++++ src/include/jessilib/unicode.hpp | 113 ++++++- src/include/jessilib/unicode_sequence.hpp | 343 ++++++++++++++++++++++ src/test/CMakeLists.txt | 2 +- src/test/unicode.cpp | 332 ++++++++++----------- src/test/unicode_sequence.cpp | 182 ++++++++++++ 6 files changed, 843 insertions(+), 179 deletions(-) create mode 100644 src/include/jessilib/unicode_sequence.hpp create mode 100644 src/test/unicode_sequence.cpp diff --git a/src/common/unicode.cpp b/src/common/unicode.cpp index 3ba1790..5e28030 100644 --- a/src/common/unicode.cpp +++ b/src/common/unicode.cpp @@ -32,6 +32,12 @@ void append_helper(std::basic_ostream& out_string, T in_value) { out_string << in_value; } +template +void append_helper(T*& out_string, T in_value) { + *out_string = in_value; + ++out_string; +} + template size_t encode_codepoint_utf8(T& out_destination, char32_t in_codepoint) { if (in_codepoint > 0x10FFFF) { @@ -96,6 +102,8 @@ size_t encode_codepoint_utf32(T& out_destination, char32_t in_codepoint) { return 1; } +/** Strings */ + size_t encode_codepoint(std::string& out_string, char32_t in_codepoint) { return encode_codepoint_utf8(out_string, in_codepoint); } @@ -112,6 +120,8 @@ size_t encode_codepoint(std::u32string& out_string, char32_t in_codepoint) { return encode_codepoint_utf32(out_string, in_codepoint); } +/** Streams */ + size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint) { return encode_codepoint_utf8, char>(out_stream, in_codepoint); } @@ -128,6 +138,26 @@ size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_co return encode_codepoint_utf32(out_stream, in_codepoint); } +/** Pointers */ + +size_t encode_codepoint(char* out_buffer, char32_t in_codepoint) { + return encode_codepoint_utf8(out_buffer, in_codepoint); +} + +size_t encode_codepoint(char8_t* out_buffer, char32_t in_codepoint) { + return encode_codepoint_utf8(out_buffer, in_codepoint); +} + +size_t encode_codepoint(char16_t* out_buffer, char32_t in_codepoint) { + return encode_codepoint_utf16(out_buffer, in_codepoint); +} + +size_t encode_codepoint(char32_t* out_buffer, char32_t in_codepoint) { + return encode_codepoint_utf32(out_buffer, in_codepoint); +} + +/** Allocating */ + std::u8string encode_codepoint_u8(char32_t in_codepoint) { std::u8string result; encode_codepoint(result, in_codepoint); @@ -519,4 +549,24 @@ char32_t fold(char32_t in_codepoint) { return match->fold(in_codepoint); } +const unsigned char base_table[]{ + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 127, 127, 127, 127, 127, 127, + 127, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 127, 127, 127, 127, 127, + 127, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 127, 127, 127, 127, 127, +}; + +int as_base(char32_t in_character, unsigned int base) { + if (in_character >= sizeof(base_table)) { + return -1; + } + + unsigned int result = base_table[in_character]; + if (result >= base) { + return -1; + } + + return base_table[in_character]; +} + } // namespace jessilib diff --git a/src/include/jessilib/unicode.hpp b/src/include/jessilib/unicode.hpp index 8f8cc33..34d45bb 100644 --- a/src/include/jessilib/unicode.hpp +++ b/src/include/jessilib/unicode.hpp @@ -1,5 +1,5 @@ /** - * Copyright (C) 2018 Jessica James. + * Copyright (C) 2018-2021 Jessica James. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -50,6 +50,23 @@ size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_cod size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint); size_t encode_codepoint(std::basic_ostream& out_stream, char32_t in_codepoint); +/** + * Encodes a codepoint directly to a character buffer + * Note: Do not use this without careful consideration; note the size requirements: + * 1) char8_t may write up to 4 elements + * 2) char16_t may write up to 2 elements + * 3) char32_t may write up to 1 element + * 4) char may write up to 4 elements; provided solely for compatibility/ease of use + * + * @param out_buffer Character buffer to write to + * @param in_codepoint Codepoint to encode + * @return Number of data elements written to out_buffer + */ +size_t encode_codepoint(char* out_buffer, char32_t in_codepoint); +size_t encode_codepoint(char8_t* out_buffer, char32_t in_codepoint); +size_t encode_codepoint(char16_t* out_buffer, char32_t in_codepoint); +size_t encode_codepoint(char32_t* out_buffer, char32_t in_codepoint); + /** * Encodes a codepoint and returns it as a string * @@ -109,9 +126,57 @@ get_endpoint_result decode_surrogate_pair(char16_t in_high_surrogate, char16_t i /** Utilities */ +namespace impl_unicode { + +// Add a narrower version in type_traits.hpp if this is needed elsewhere +template +struct is_string : std::false_type {}; + +template +struct is_string> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_string> { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_string { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_string { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +template +struct is_string { + using type = T; + static constexpr bool value{ true }; + constexpr operator bool() const noexcept { return true; } + constexpr bool operator()() const noexcept { return true; } +}; + +} // namespace impl_unicode + template bool is_valid(const InT& in_string) { - using InCharT = typename InT::value_type; + using InCharT = typename impl_unicode::is_string::type; using InViewT = std::basic_string_view; InViewT in_string_view = static_cast(in_string); @@ -137,7 +202,7 @@ bool is_valid(const InT& in_string) { */ template std::basic_string_view string_view_cast(const InT& in_string) { - using InCharT = typename InT::value_type; + using InCharT = typename impl_unicode::is_string::type; size_t in_string_bytes = in_string.size() * sizeof(InCharT); if constexpr (sizeof(OutCharT) > sizeof(InCharT)) { // The output type is larger than the input type; verify no partial codepoints @@ -161,7 +226,8 @@ std::basic_string_view string_view_cast(const InT& in_string) { template std::basic_string string_cast(const InT& in_string) { - using InCharT = typename InT::value_type; + static_assert(impl_unicode::is_string::value == true); + using InCharT = typename impl_unicode::is_string::type; using InViewT = std::basic_string_view; std::basic_string result; @@ -196,11 +262,12 @@ std::basic_string string_cast(const InT& in_string) { return result; } -/** single-unit case folding utilities */ -char32_t fold(char32_t in_codepoint); // Folds codepoint for case insensitive checks (not for human output) +/** single-unit helper utilities */ +char32_t fold(char32_t in_codepoint); // Folds codepoint for case-insensitive checks (not for human output) +int as_base(char32_t in_character, unsigned int base); // The value represented by in_character in terms of base if valid, -1 otherwise /** - * Checks if two codepoints are equal to eachother (case insensitive) + * Checks if two codepoints are equal to each-other (case insensitive) * * @param lhs First codepoint to compare * @param rhs Second codepoint to compare @@ -593,6 +660,38 @@ size_t findi(std::basic_string_view in_string, std::basic_string_view< ADAPT_BASIC_STRING(findi) +using find_if_predicate_type = bool(*)(char32_t, char*, size_t); +inline void find_if(std::basic_string& in_string, find_if_predicate_type in_predicate) { + using CharT = char; + CharT* ptr = in_string.data(); + std::basic_string_view in_string_view = in_string; + for (auto decode = decode_codepoint(in_string_view); decode.units != 0; decode = decode_codepoint(in_string_view)) { + if (in_predicate(decode.codepoint, ptr, decode.units)) { + // predicate indicates it's found what it's looking for, cool + return; + } + + in_string_view.remove_prefix(decode.units); + ptr += decode.units; + } +} + +using find_if_view_predicate_type = bool(*)(char32_t, const char*, size_t); +inline void find_if(std::basic_string_view& in_string, find_if_view_predicate_type in_predicate) { + using CharT = char; + const CharT* ptr = in_string.data(); + std::basic_string_view in_string_view = in_string; + for (auto decode = decode_codepoint(in_string_view); decode.units != 0; decode = decode_codepoint(in_string_view)) { + if (in_predicate(decode.codepoint, ptr, decode.units)) { + // predicate indicates it's found what it's looking for, cool + return; + } + + in_string_view.remove_prefix(decode.units); + ptr += decode.units; + } +} + /** to_lower / to_upper */ //char32_t to_lower(char32_t in_chr); // TODO: implement //char32_t to_upper(char32_t in_chr); // TODO: implement diff --git a/src/include/jessilib/unicode_sequence.hpp b/src/include/jessilib/unicode_sequence.hpp new file mode 100644 index 0000000..935a132 --- /dev/null +++ b/src/include/jessilib/unicode_sequence.hpp @@ -0,0 +1,343 @@ +/** + * Copyright (C) 2021 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +/** + * @file unicode_sequence.hpp + * @author Jessica James + * + * Unicode-aware escape sequence parsing utilities + */ + +#pragma once + +#include +#include "unicode.hpp" + +namespace jessilib { + +// CharT is the codepoint which has just been read, in_write_head is a writeable string buffer, read_view points to remainder +template +using shrink_sequence_tree_action = bool(*)(CharT*& in_write_head, std::basic_string_view& read_view); + +template +using shrink_sequence_tree = std::map>; + +template +using shrink_sequence_tree_member = std::pair>; + +// Only use for ASTs where each character process is guaranteed to write at most 1 character for each character consumed +template +bool apply_shrink_sequence_tree(std::basic_string& inout_string, const SequenceTreeT& in_tree) { + if (inout_string.empty()) { + // Nothing to parse + return true; + } + + std::basic_string_view read_view = inout_string; + CharT* write_head = inout_string.data(); + get_endpoint_result decode; + + while ((decode = decode_codepoint(read_view)).units != 0) { + auto parser = in_tree.find(decode.codepoint); + if (parser == in_tree.end()) { + // Just a normal character; write it over + while (decode.units != 0) { + *write_head = read_view.front(); + ++write_head; + --decode.units; + read_view.remove_prefix(1); + } + + continue; + } + + // This is a parsed sequence; pass it to the parser instead + read_view.remove_prefix(decode.units); + if (!(parser->second)(write_head, read_view)) { + // Bad input received; strip off whatever we haven't parsed + size_t length = write_head - inout_string.data(); + inout_string.erase(length); + return false; + } + } + + // We've finished parsing successfully; strip off the extraneous codepoints + size_t length = write_head - inout_string.data(); + inout_string.erase(length); + return true; +} + +// Only for codepoints representable w/ char8_t (i.e: \n) +template +shrink_sequence_tree_member make_simple_sequence_pair() { + return { + InCodepointV, + [](CharT*& in_write_head, std::basic_string_view&) { + *in_write_head = static_cast(OutCodepointV); + ++in_write_head; + return true; + } + }; +} + +// Skips a character (i.e: skipping/ignoring newlines) +template +shrink_sequence_tree_member make_noop_sequence_pair() { + return { + InCodepointV, + [](CharT*&, std::basic_string_view&) { + return true; + } + }; +} + +// Skips a character or two (i.e: skipping/ignoring newlines) +template +shrink_sequence_tree_member make_noop_sequence_pair() { + return { + InCodepointV, + [](CharT*&, std::basic_string_view& read_view) { + // Strip trailing 'InTrailing', if it's present + auto decode = decode_codepoint(read_view); + if (decode.units != 0 + && decode.codepoint == InOptionalTrailing) { + read_view.remove_prefix(decode.units); + } + + return true; + } + }; +} + +template +shrink_sequence_tree_member make_octal_sequence_pair() { + static_assert(MaxDigitsV > 0); // Use noop instead + static_assert((MaxDigitsV == 2 && InCodepointV >= U'0' && InCodepointV <= U'7') + || (MaxDigitsV == 3 && InCodepointV >= U'0' && InCodepointV <= U'3')); // Only currently support single-octet octal values + + // Must have at least 1 octal digit (this one), but may not have more than 3 (2 more). + return { + InCodepointV, + [](CharT*& in_write_head, std::basic_string_view& read_view) { + // Read in first octal character from InCodepointV + unsigned int out_value = InCodepointV - U'0'; // Set initial value + if (read_view.empty()) { + *in_write_head = out_value; + ++in_write_head; + return true; + } + + // Read is second octal unit from front; octal characters are always 1 unit + int octal_value = as_base(read_view.front(), 8); + if (octal_value < 0) { + if constexpr (ExactDigitsV) { + // Expected 2-3 digits, received 1 + return false; + } + + // Not an octal character; write & return + *in_write_head = out_value; + ++in_write_head; + return true; + } + + out_value <<= 3; + out_value |= octal_value; + read_view.remove_prefix(1); + + if constexpr (MaxDigitsV == 2) { + // We've read in both digits; go ahead and write & return + *in_write_head = out_value; + ++in_write_head; + return true; + } + + if (read_view.empty()) { + if constexpr (ExactDigitsV) { + // Expected 3 digits, received 2 + return false; + } + + *in_write_head = out_value; + ++in_write_head; + return true; + } + + // Read in third octal unit from front; octal characters are always 1 unit + octal_value = as_base(read_view.front(), 8); + if (octal_value < 0) { + // Not an octal character; push what we have and handle this + return true; + } + + out_value <<= 3; + out_value |= octal_value; + read_view.remove_prefix(1); + + // Write & return + *in_write_head = out_value; + ++in_write_head; + return true; + } + }; +} + +template +shrink_sequence_tree_member make_hex_sequence_pair() { + static_assert(MaxDigitsV > 0); + + return { + InCodepointV, + [](CharT*& in_write_head, std::basic_string_view& read_view) { + // Does not modify + auto read_hex = [](uint32_t& out_value, std::basic_string_view in_view, size_t max_digits) { + size_t result{}; + int hex_value; + out_value = 0; + while (result != max_digits + && !in_view.empty()) { + hex_value = as_base(in_view.front(), 16); // hexadecimal characters are always 1 unit + if (hex_value < 0) { + // Not a hexadecimal character; push what we have and handle this + return result; + } + + out_value <<= 4; + out_value |= hex_value; + + in_view.remove_prefix(1); + ++result; + } + + // Number of elements that are hexadecimal digits + return result; + }; + + // Read in hex value + uint32_t hex_value; + size_t units_read = read_hex(hex_value, read_view, MaxDigitsV); + + // Sanity check digits read + if constexpr(ExactDigitsV) { + if (units_read != MaxDigitsV) { + // We expected example MaxDigitsV digits; fail + return false; + } + } + else { + if (units_read == 0) { + // We didn't read any digits; fail + return false; + } + } + + // We read an acceptable number of digits; write the unit and call it a day + read_view.remove_prefix(units_read); + if constexpr (IsUnicode) { + in_write_head += encode_codepoint(in_write_head, hex_value); + } + else { + static_assert(MaxDigitsV <= sizeof(CharT) * 2); + *in_write_head = static_cast(hex_value); + ++in_write_head; + } + + return true; + } + }; +} + +// Calls into another tree with the next character +template& SubTreeR, bool FailNotFound = true> +shrink_sequence_tree_member make_tree_sequence_pair() { + return { InCodepointV, [](CharT*& in_write_head, std::basic_string_view& read_view) { + auto decode = decode_codepoint(read_view); + auto parser = SubTreeR.find(decode.codepoint); + if (parser == SubTreeR.end()) { + if constexpr (FailNotFound) { + // Code not found; fail + return false; + } + + // Just a normal character; write it over + while (decode.units != 0) { + *in_write_head = read_view.front(); + ++in_write_head; + --decode.units; + read_view.remove_prefix(1); + } + + return true; + } + + // This is a parsed sequence; pass it to the parser + read_view.remove_prefix(decode.units); + return (parser->second)(in_write_head, read_view); + } }; +} + +// Return true for valid sequences, false otherwise +template +bool apply_cpp_escape_sequences(std::basic_string& inout_string) { + // Handles parsing first character of escape sequence + static const shrink_sequence_tree main_tree{ + /** Newline skippers; not actually a C++ thing, but I want it */ + make_noop_sequence_pair(), + make_noop_sequence_pair(), + + /** Simple escape sequences */ + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + make_simple_sequence_pair(), + + /** Numeric escape sequences */ + // Octal (Single byte value only); should we support octal escapes in sequence? + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + make_octal_sequence_pair(), + + // Hex; should we support hex escapes in sequence? (i.e: \x00FF == \x00\xFF, which is only true for char/char8_t atm) + make_hex_sequence_pair(), + + /** Unicode escape sequences */ + make_hex_sequence_pair(), + make_hex_sequence_pair(), + }; + + // Only checks for '\' + static const shrink_sequence_tree root_tree{ + make_tree_sequence_pair() + }; + + return apply_shrink_sequence_tree(inout_string, root_tree); +} + +} // namespace jessilib diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 3c0ccdf..f6175e3 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,6 +1,6 @@ # Setup source files set(SOURCE_FILES - timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp) + timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp unicode_sequence.cpp) # Setup gtest set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/src/test/unicode.cpp b/src/test/unicode.cpp index aa1796a..859374d 100644 --- a/src/test/unicode.cpp +++ b/src/test/unicode.cpp @@ -135,20 +135,10 @@ public: }; TYPED_TEST_SUITE(UnicodeFullTest, char_type_combos); -template -std::basic_string make_str(const char32_t (&in_str)[InLength]) { - std::basic_string result; - auto in_str_end = std::end(in_str) - 1; // ignore null terminator - for (auto itr = std::begin(in_str); itr != in_str_end; ++itr) { - jessilib::encode_codepoint(result, *itr); - } - return result; -} - /** string_cast */ TYPED_TEST(UnicodeFullTest, string_cast) { - auto abcd_str = make_str(U"ABCD"); + auto abcd_str = jessilib::string_cast(U"ABCD"); std::basic_string_view abcd_string_view = abcd_str; EXPECT_TRUE(equals(abcd_str, @@ -161,7 +151,7 @@ TYPED_TEST(UnicodeFullTest, string_cast) { } TEST(UTF8Test, string_view_cast) { - auto abcd_str = make_str(U"ABCD"); + auto abcd_str = jessilib::string_cast(U"ABCD"); auto view = string_view_cast(abcd_str); EXPECT_TRUE(equals(view, abcd_str)); } @@ -170,219 +160,219 @@ TEST(UTF8Test, string_view_cast) { TYPED_TEST(UnicodeFullTest, equals) { // TypeParam::first_type == TypeParam::second_type - EXPECT_TRUE(equals(make_str(U"ABCD"), - make_str(U"ABCD"))); - EXPECT_TRUE(equals(make_str(U"abcd"), - make_str(U"abcd"))); - EXPECT_FALSE(equals(make_str(U"ABCD"), - make_str(U"abcd"))); - EXPECT_FALSE(equals(make_str(U"abcd"), - make_str(U"ABCD"))); - EXPECT_FALSE(equals(make_str(U"ABcd"), - make_str(U"abCD"))); + EXPECT_TRUE(equals(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(equals(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(equals(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(equals(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCD"))); + EXPECT_FALSE(equals(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCD"))); } /** equalsi */ TYPED_TEST(UnicodeFullTest, equalsi) { // TypeParam::first_type == TypeParam::second_type - EXPECT_TRUE(equalsi(make_str(U"ABCD"), - make_str(U"ABCD"))); - EXPECT_TRUE(equalsi(make_str(U"abcd"), - make_str(U"abcd"))); - EXPECT_TRUE(equalsi(make_str(U"ABCD"), - make_str(U"abcd"))); - EXPECT_TRUE(equalsi(make_str(U"abcd"), - make_str(U"ABCD"))); - EXPECT_TRUE(equalsi(make_str(U"ABcd"), - make_str(U"abCD"))); + EXPECT_TRUE(equalsi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(equalsi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(equalsi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(equalsi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(equalsi(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCD"))); } /** starts_with */ TYPED_TEST(UnicodeFullTest, starts_with) { // TypeParam::first_type == TypeParam::second_type - EXPECT_TRUE(starts_with(make_str(U"ABCD"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_with(make_str(U"abcd"), - make_str(U"abcd"))); - EXPECT_FALSE(starts_with(make_str(U"ABCD"), - make_str(U"abcd"))); - EXPECT_FALSE(starts_with(make_str(U"abcd"), - make_str(U"ABCD"))); - EXPECT_FALSE(starts_with(make_str(U"ABcd"), - make_str(U"abCD"))); + EXPECT_TRUE(starts_with(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_with(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCD"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCD"))); // TypeParam::first_type starts_with TypeParam::second_type... (always false) - EXPECT_FALSE(starts_with(make_str(U"ABCD"), - make_str(U"ABCDzz"))); - EXPECT_FALSE(starts_with(make_str(U"abcd"), - make_str(U"abcdzz"))); - EXPECT_FALSE(starts_with(make_str(U"ABCD"), - make_str(U"abcdzz"))); - EXPECT_FALSE(starts_with(make_str(U"abcd"), - make_str(U"ABCDzz"))); - EXPECT_FALSE(starts_with(make_str(U"ABcd"), - make_str(U"abCDzz"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCDzz"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcdzz"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcdzz"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCDzz"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCDzz"))); // TypeParam::first_type... starts_with TypeParam::second_type (always same results as first) - EXPECT_TRUE(starts_with(make_str(U"ABCDzz"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_with(make_str(U"abcdzz"), - make_str(U"abcd"))); - EXPECT_FALSE(starts_with(make_str(U"ABCDzz"), - make_str(U"abcd"))); - EXPECT_FALSE(starts_with(make_str(U"abcdzz"), - make_str(U"ABCD"))); - EXPECT_FALSE(starts_with(make_str(U"ABcdzz"), - make_str(U"abCD"))); + EXPECT_TRUE(starts_with(jessilib::string_cast(U"ABCDzz"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_with(jessilib::string_cast(U"abcdzz"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABCDzz"), + jessilib::string_cast(U"abcd"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"abcdzz"), + jessilib::string_cast(U"ABCD"))); + EXPECT_FALSE(starts_with(jessilib::string_cast(U"ABcdzz"), + jessilib::string_cast(U"abCD"))); } /** starts_withi */ TYPED_TEST(UnicodeFullTest, starts_withi) { // TypeParam::first_type == TypeParam::second_type - EXPECT_TRUE(starts_withi(make_str(U"ABCD"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_withi(make_str(U"abcd"), - make_str(U"abcd"))); - EXPECT_TRUE(starts_withi(make_str(U"ABCD"), - make_str(U"abcd"))); - EXPECT_TRUE(starts_withi(make_str(U"abcd"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_withi(make_str(U"ABcd"), - make_str(U"abCD"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCD"))); // TypeParam::first_type starts_with TypeParam::second_type... (always false) - EXPECT_FALSE(starts_withi(make_str(U"ABCD"), - make_str(U"ABCDzz"))); - EXPECT_FALSE(starts_withi(make_str(U"abcd"), - make_str(U"abcdzz"))); - EXPECT_FALSE(starts_withi(make_str(U"ABCD"), - make_str(U"abcdzz"))); - EXPECT_FALSE(starts_withi(make_str(U"abcd"), - make_str(U"ABCDzz"))); - EXPECT_FALSE(starts_withi(make_str(U"ABcd"), - make_str(U"abCDzz"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"ABCDzz"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"abcdzz"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"ABCD"), + jessilib::string_cast(U"abcdzz"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"abcd"), + jessilib::string_cast(U"ABCDzz"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"ABcd"), + jessilib::string_cast(U"abCDzz"))); // TypeParam::first_type... starts_with TypeParam::second_type (always same results as first) - EXPECT_TRUE(starts_withi(make_str(U"ABCDzz"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_withi(make_str(U"abcdzz"), - make_str(U"abcd"))); - EXPECT_TRUE(starts_withi(make_str(U"ABCDzz"), - make_str(U"abcd"))); - EXPECT_TRUE(starts_withi(make_str(U"abcdzz"), - make_str(U"ABCD"))); - EXPECT_TRUE(starts_withi(make_str(U"ABcdzz"), - make_str(U"abCD"))); - - EXPECT_TRUE(starts_withi(make_str(U"Les Bean del Dallas"), - make_str(U"les"))); - EXPECT_TRUE(starts_withi(make_str(U"Les Bean del Dallas"), - make_str(U"les Bean"))); - EXPECT_FALSE(starts_withi(make_str(U"Les Bean del Dallas"), - make_str(U"del"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABCDzz"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"abcdzz"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABCDzz"), + jessilib::string_cast(U"abcd"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"abcdzz"), + jessilib::string_cast(U"ABCD"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"ABcdzz"), + jessilib::string_cast(U"abCD"))); + + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"Les Bean del Dallas"), + jessilib::string_cast(U"les"))); + EXPECT_TRUE(starts_withi(jessilib::string_cast(U"Les Bean del Dallas"), + jessilib::string_cast(U"les Bean"))); + EXPECT_FALSE(starts_withi(jessilib::string_cast(U"Les Bean del Dallas"), + jessilib::string_cast(U"del"))); } TYPED_TEST(UnicodeFullTest, find) { - auto abcd_str = make_str(U"ABCD"); + auto abcd_str = jessilib::string_cast(U"ABCD"); // Empty substring - EXPECT_EQ(find(abcd_str, make_str(U"")), 0); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"")), 0); // Single-characters - EXPECT_EQ(find(abcd_str, make_str(U"A")), 0); - EXPECT_EQ(find(abcd_str, make_str(U"B")), 1); - EXPECT_EQ(find(abcd_str, make_str(U"C")), 2); - EXPECT_EQ(find(abcd_str, make_str(U"D")), 3); - EXPECT_EQ(find(abcd_str, make_str(U"E")), decltype(abcd_str)::npos); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"A")), 0); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"B")), 1); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"C")), 2); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"D")), 3); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"E")), decltype(abcd_str)::npos); // Two characters - EXPECT_EQ(find(abcd_str, make_str(U"AB")), 0); - EXPECT_EQ(find(abcd_str, make_str(U"BC")), 1); - EXPECT_EQ(find(abcd_str, make_str(U"CD")), 2); - EXPECT_EQ(find(abcd_str, make_str(U"DA")), decltype(abcd_str)::npos); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"AB")), 0); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"BC")), 1); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"CD")), 2); + EXPECT_EQ(find(abcd_str, jessilib::string_cast(U"DA")), decltype(abcd_str)::npos); - auto double_abcd_str = make_str(U"AABBCCDD"); + auto double_abcd_str = jessilib::string_cast(U"AABBCCDD"); // Single-characters - EXPECT_EQ(find(double_abcd_str, make_str(U"A")), 0); - EXPECT_EQ(find(double_abcd_str, make_str(U"B")), 2); - EXPECT_EQ(find(double_abcd_str, make_str(U"C")), 4); - EXPECT_EQ(find(double_abcd_str, make_str(U"D")), 6); - EXPECT_EQ(find(double_abcd_str, make_str(U"E")), decltype(double_abcd_str)::npos); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"A")), 0); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"B")), 2); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"C")), 4); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"D")), 6); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"E")), decltype(double_abcd_str)::npos); // Two characters - EXPECT_EQ(find(double_abcd_str, make_str(U"AA")), 0); - EXPECT_EQ(find(double_abcd_str, make_str(U"AB")), 1); - EXPECT_EQ(find(double_abcd_str, make_str(U"BB")), 2); - EXPECT_EQ(find(double_abcd_str, make_str(U"BC")), 3); - EXPECT_EQ(find(double_abcd_str, make_str(U"CC")), 4); - EXPECT_EQ(find(double_abcd_str, make_str(U"CD")), 5); - EXPECT_EQ(find(double_abcd_str, make_str(U"DD")), 6); - EXPECT_EQ(find(double_abcd_str, make_str(U"DA")), decltype(double_abcd_str)::npos); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"AA")), 0); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"AB")), 1); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"BB")), 2); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"BC")), 3); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"CC")), 4); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"CD")), 5); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"DD")), 6); + EXPECT_EQ(find(double_abcd_str, jessilib::string_cast(U"DA")), decltype(double_abcd_str)::npos); } TYPED_TEST(UnicodeFullTest, findi) { - auto abcd_str = make_str(U"ABCD"); + auto abcd_str = jessilib::string_cast(U"ABCD"); // Empty substring - EXPECT_EQ(findi(abcd_str, make_str(U"")), 0); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"")), 0); // Single-characters - EXPECT_EQ(findi(abcd_str, make_str(U"A")), 0); - EXPECT_EQ(findi(abcd_str, make_str(U"B")), 1); - EXPECT_EQ(findi(abcd_str, make_str(U"C")), 2); - EXPECT_EQ(findi(abcd_str, make_str(U"D")), 3); - EXPECT_EQ(findi(abcd_str, make_str(U"E")), decltype(abcd_str)::npos); - EXPECT_EQ(findi(abcd_str, make_str(U"a")), 0); - EXPECT_EQ(findi(abcd_str, make_str(U"b")), 1); - EXPECT_EQ(findi(abcd_str, make_str(U"c")), 2); - EXPECT_EQ(findi(abcd_str, make_str(U"d")), 3); - EXPECT_EQ(findi(abcd_str, make_str(U"e")), decltype(abcd_str)::npos); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"A")), 0); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"B")), 1); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"C")), 2); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"D")), 3); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"E")), decltype(abcd_str)::npos); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"a")), 0); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"b")), 1); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"c")), 2); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"d")), 3); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"e")), decltype(abcd_str)::npos); // Two characters - EXPECT_EQ(findi(abcd_str, make_str(U"AB")), 0); - EXPECT_EQ(findi(abcd_str, make_str(U"BC")), 1); - EXPECT_EQ(findi(abcd_str, make_str(U"CD")), 2); - EXPECT_EQ(findi(abcd_str, make_str(U"DA")), decltype(abcd_str)::npos); - EXPECT_EQ(findi(abcd_str, make_str(U"ab")), 0); - EXPECT_EQ(findi(abcd_str, make_str(U"bc")), 1); - EXPECT_EQ(findi(abcd_str, make_str(U"cd")), 2); - EXPECT_EQ(findi(abcd_str, make_str(U"da")), decltype(abcd_str)::npos); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"AB")), 0); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"BC")), 1); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"CD")), 2); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"DA")), decltype(abcd_str)::npos); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"ab")), 0); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"bc")), 1); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"cd")), 2); + EXPECT_EQ(findi(abcd_str, jessilib::string_cast(U"da")), decltype(abcd_str)::npos); - auto double_abcd_str = make_str(U"AABBCCDD"); + auto double_abcd_str = jessilib::string_cast(U"AABBCCDD"); // Single-characters - EXPECT_EQ(findi(double_abcd_str, make_str(U"A")), 0); - EXPECT_EQ(findi(double_abcd_str, make_str(U"B")), 2); - EXPECT_EQ(findi(double_abcd_str, make_str(U"C")), 4); - EXPECT_EQ(findi(double_abcd_str, make_str(U"D")), 6); - EXPECT_EQ(findi(double_abcd_str, make_str(U"E")), decltype(double_abcd_str)::npos); - EXPECT_EQ(findi(double_abcd_str, make_str(U"a")), 0); - EXPECT_EQ(findi(double_abcd_str, make_str(U"b")), 2); - EXPECT_EQ(findi(double_abcd_str, make_str(U"c")), 4); - EXPECT_EQ(findi(double_abcd_str, make_str(U"d")), 6); - EXPECT_EQ(findi(double_abcd_str, make_str(U"e")), decltype(double_abcd_str)::npos); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"A")), 0); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"B")), 2); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"C")), 4); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"D")), 6); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"E")), decltype(double_abcd_str)::npos); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"a")), 0); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"b")), 2); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"c")), 4); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"d")), 6); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"e")), decltype(double_abcd_str)::npos); // Two characters - EXPECT_EQ(findi(double_abcd_str, make_str(U"AA")), 0); - EXPECT_EQ(findi(double_abcd_str, make_str(U"AB")), 1); - EXPECT_EQ(findi(double_abcd_str, make_str(U"BB")), 2); - EXPECT_EQ(findi(double_abcd_str, make_str(U"BC")), 3); - EXPECT_EQ(findi(double_abcd_str, make_str(U"CC")), 4); - EXPECT_EQ(findi(double_abcd_str, make_str(U"CD")), 5); - EXPECT_EQ(findi(double_abcd_str, make_str(U"DD")), 6); - EXPECT_EQ(findi(double_abcd_str, make_str(U"DA")), decltype(double_abcd_str)::npos); - EXPECT_EQ(findi(double_abcd_str, make_str(U"aa")), 0); - EXPECT_EQ(findi(double_abcd_str, make_str(U"ab")), 1); - EXPECT_EQ(findi(double_abcd_str, make_str(U"bb")), 2); - EXPECT_EQ(findi(double_abcd_str, make_str(U"bc")), 3); - EXPECT_EQ(findi(double_abcd_str, make_str(U"cc")), 4); - EXPECT_EQ(findi(double_abcd_str, make_str(U"cd")), 5); - EXPECT_EQ(findi(double_abcd_str, make_str(U"dd")), 6); - EXPECT_EQ(findi(double_abcd_str, make_str(U"da")), decltype(double_abcd_str)::npos); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"AA")), 0); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"AB")), 1); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"BB")), 2); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"BC")), 3); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"CC")), 4); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"CD")), 5); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"DD")), 6); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"DA")), decltype(double_abcd_str)::npos); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"aa")), 0); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"ab")), 1); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"bb")), 2); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"bc")), 3); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"cc")), 4); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"cd")), 5); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"dd")), 6); + EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast(U"da")), decltype(double_abcd_str)::npos); } /** diff --git a/src/test/unicode_sequence.cpp b/src/test/unicode_sequence.cpp new file mode 100644 index 0000000..5067714 --- /dev/null +++ b/src/test/unicode_sequence.cpp @@ -0,0 +1,182 @@ +/** + * Copyright (C) 2021 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "jessilib/unicode_sequence.hpp" +#include +#include "test.hpp" + +using char_types = ::testing::Types; +using char_type_combos = ::testing::Types< + std::pair, std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair, std::pair, + std::pair, std::pair, std::pair, std::pair>; + +template +class UnicodeSequenceTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(UnicodeSequenceTest, char_types); + +#define TEST_CPP_SEQUENCE(expr) \ + { auto parsed_string = jessilib::string_cast(#expr); \ + auto normal_string = jessilib::string_cast(expr); \ + parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \ + jessilib::apply_cpp_escape_sequences(parsed_string); \ + EXPECT_EQ(parsed_string, normal_string); } + +TYPED_TEST(UnicodeSequenceTest, cpp_simple) { + // Most basic of tests + TEST_CPP_SEQUENCE("test") + TEST_CPP_SEQUENCE("\"test\"") + + // Do each character once + TEST_CPP_SEQUENCE("\'") + TEST_CPP_SEQUENCE("\"") + TEST_CPP_SEQUENCE("\?") + TEST_CPP_SEQUENCE("\\") + TEST_CPP_SEQUENCE("\a") + TEST_CPP_SEQUENCE("\b") + TEST_CPP_SEQUENCE("\f") + TEST_CPP_SEQUENCE("\n") + TEST_CPP_SEQUENCE("\r") + TEST_CPP_SEQUENCE("\t") + TEST_CPP_SEQUENCE("\v") +} + +TYPED_TEST(UnicodeSequenceTest, cpp_octal) { + // "\0" -> "\177" with & without leading zeroes + std::basic_string parsed_string; + for (unsigned int codepoint = 0; codepoint <= 0377; ++codepoint) { + uint8_t front = (codepoint >> 6); + uint8_t middle = (codepoint & 0b00'111'000) >> 3; + uint8_t last = (codepoint & 0b00'000'111); + + // "\000" -> "\177" + parsed_string = static_cast('\\'); + parsed_string += static_cast('0' + front); + parsed_string += static_cast('0' + middle); + parsed_string += static_cast('0' + last); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + + if (front == 0) { + // "\00" -> "\77" + parsed_string = static_cast('\\'); + parsed_string += static_cast('0' + middle); + parsed_string += static_cast('0' + last); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + + if (middle == 0) { + // "\0" -> "\7" + parsed_string = static_cast('\\'); + parsed_string += static_cast('0' + last); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + } + } + } +} + +template +std::basic_string make_hex_string(IntegerT in_integer, size_t min_length = 0) { + char buffer[32]; + auto buffer_end = std::to_chars(buffer, std::end(buffer), in_integer, 16).ptr; + std::basic_string result{ buffer, buffer_end }; + + if (min_length > result.size()) { + result.insert(0, min_length - result.size(), static_cast('0')); + } + + return result; +} + +TYPED_TEST(UnicodeSequenceTest, cpp_hex) { + // "x0" -> "xff" with & without leading zeroes + if constexpr (sizeof(TypeParam) == 1) { + for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) { + std::basic_string parsed_string; + for (size_t min_length = 0; min_length <= 2; ++min_length) { + parsed_string = jessilib::string_cast("\\x"); + parsed_string += make_hex_string(codepoint, min_length); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + } + } + return; + } + + // "x0" -> "xffff" with & without leading zeroes + if constexpr (sizeof(TypeParam) == 2) { + for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { + std::basic_string parsed_string; + for (size_t min_length = 0; min_length <= 4; ++min_length) { + // "\x0" -> "\xffff" + parsed_string = jessilib::string_cast("\\x"); + parsed_string += make_hex_string(codepoint, min_length); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + } + } + + return; + } + + // "x0" -> "x10ffff" with & without leading zeroes + if constexpr (sizeof(TypeParam) == 4) { + for (unsigned int codepoint = 0; codepoint <= 0x10000; ++codepoint) { + std::basic_string parsed_string; + for (size_t min_length = 0; min_length <= 8; ++min_length) { + // "\x0" -> "\x0010ffff" + parsed_string = jessilib::string_cast("\\x"); + parsed_string += make_hex_string(codepoint, min_length); + jessilib::apply_cpp_escape_sequences(parsed_string); + EXPECT_EQ(parsed_string.front(), static_cast(codepoint)); + } + } + + return; + } +} + +TYPED_TEST(UnicodeSequenceTest, cpp_u16) { + // "u000" -> "uffff" with & without leading zeroes + for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { + std::basic_string parsed_string = jessilib::string_cast("\\u"); + parsed_string += make_hex_string(codepoint, 4); + jessilib::apply_cpp_escape_sequences(parsed_string); + + auto decode = jessilib::decode_codepoint(parsed_string); + EXPECT_NE(decode.units, 0); + EXPECT_EQ(decode.codepoint, static_cast(codepoint)); + } +} + +TYPED_TEST(UnicodeSequenceTest, cpp_u32) { + // "u000" -> "uffff" with & without leading zeroes + for (unsigned int codepoint = 0; codepoint <= 0x100FF; ++codepoint) { + std::basic_string parsed_string = jessilib::string_cast("\\U"); + parsed_string += make_hex_string(codepoint, 8); + jessilib::apply_cpp_escape_sequences(parsed_string); + + auto decode = jessilib::decode_codepoint(parsed_string); + EXPECT_NE(decode.units, 0); + EXPECT_EQ(decode.codepoint, static_cast(codepoint)); + } +}