mirror of https://github.com/JAJames/jessilib.git
Jessica James
3 years ago
6 changed files with 843 additions and 179 deletions
@ -0,0 +1,343 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
/**
|
||||
|
* @file unicode_sequence.hpp |
||||
|
* @author Jessica James |
||||
|
* |
||||
|
* Unicode-aware escape sequence parsing utilities |
||||
|
*/ |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <map> |
||||
|
#include "unicode.hpp" |
||||
|
|
||||
|
namespace jessilib { |
||||
|
|
||||
|
// CharT is the codepoint which has just been read, in_write_head is a writeable string buffer, read_view points to remainder
|
||||
|
template<typename CharT> |
||||
|
using shrink_sequence_tree_action = bool(*)(CharT*& in_write_head, std::basic_string_view<CharT>& read_view); |
||||
|
|
||||
|
template<typename CharT> |
||||
|
using shrink_sequence_tree = std::map<char32_t, shrink_sequence_tree_action<CharT>>; |
||||
|
|
||||
|
template<typename CharT> |
||||
|
using shrink_sequence_tree_member = std::pair<char32_t, shrink_sequence_tree_action<CharT>>; |
||||
|
|
||||
|
// Only use for ASTs where each character process is guaranteed to write at most 1 character for each character consumed
|
||||
|
template<typename CharT, typename SequenceTreeT> |
||||
|
bool apply_shrink_sequence_tree(std::basic_string<CharT>& inout_string, const SequenceTreeT& in_tree) { |
||||
|
if (inout_string.empty()) { |
||||
|
// Nothing to parse
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
std::basic_string_view<CharT> read_view = inout_string; |
||||
|
CharT* write_head = inout_string.data(); |
||||
|
get_endpoint_result decode; |
||||
|
|
||||
|
while ((decode = decode_codepoint(read_view)).units != 0) { |
||||
|
auto parser = in_tree.find(decode.codepoint); |
||||
|
if (parser == in_tree.end()) { |
||||
|
// Just a normal character; write it over
|
||||
|
while (decode.units != 0) { |
||||
|
*write_head = read_view.front(); |
||||
|
++write_head; |
||||
|
--decode.units; |
||||
|
read_view.remove_prefix(1); |
||||
|
} |
||||
|
|
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
// This is a parsed sequence; pass it to the parser instead
|
||||
|
read_view.remove_prefix(decode.units); |
||||
|
if (!(parser->second)(write_head, read_view)) { |
||||
|
// Bad input received; strip off whatever we haven't parsed
|
||||
|
size_t length = write_head - inout_string.data(); |
||||
|
inout_string.erase(length); |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// We've finished parsing successfully; strip off the extraneous codepoints
|
||||
|
size_t length = write_head - inout_string.data(); |
||||
|
inout_string.erase(length); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// Only for codepoints representable w/ char8_t (i.e: \n)
|
||||
|
template<typename CharT, char32_t InCodepointV, char8_t OutCodepointV> |
||||
|
shrink_sequence_tree_member<CharT> make_simple_sequence_pair() { |
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](CharT*& in_write_head, std::basic_string_view<CharT>&) { |
||||
|
*in_write_head = static_cast<CharT>(OutCodepointV); |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
// Skips a character (i.e: skipping/ignoring newlines)
|
||||
|
template<typename CharT, char32_t InCodepointV> |
||||
|
shrink_sequence_tree_member<CharT> make_noop_sequence_pair() { |
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](CharT*&, std::basic_string_view<CharT>&) { |
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
// Skips a character or two (i.e: skipping/ignoring newlines)
|
||||
|
template<typename CharT, char32_t InCodepointV, char32_t InOptionalTrailing> |
||||
|
shrink_sequence_tree_member<CharT> make_noop_sequence_pair() { |
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](CharT*&, std::basic_string_view<CharT>& read_view) { |
||||
|
// Strip trailing 'InTrailing', if it's present
|
||||
|
auto decode = decode_codepoint(read_view); |
||||
|
if (decode.units != 0 |
||||
|
&& decode.codepoint == InOptionalTrailing) { |
||||
|
read_view.remove_prefix(decode.units); |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode> |
||||
|
shrink_sequence_tree_member<CharT> make_octal_sequence_pair() { |
||||
|
static_assert(MaxDigitsV > 0); // Use noop instead
|
||||
|
static_assert((MaxDigitsV == 2 && InCodepointV >= U'0' && InCodepointV <= U'7') |
||||
|
|| (MaxDigitsV == 3 && InCodepointV >= U'0' && InCodepointV <= U'3')); // Only currently support single-octet octal values
|
||||
|
|
||||
|
// Must have at least 1 octal digit (this one), but may not have more than 3 (2 more).
|
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) { |
||||
|
// Read in first octal character from InCodepointV
|
||||
|
unsigned int out_value = InCodepointV - U'0'; // Set initial value
|
||||
|
if (read_view.empty()) { |
||||
|
*in_write_head = out_value; |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// Read is second octal unit from front; octal characters are always 1 unit
|
||||
|
int octal_value = as_base(read_view.front(), 8); |
||||
|
if (octal_value < 0) { |
||||
|
if constexpr (ExactDigitsV) { |
||||
|
// Expected 2-3 digits, received 1
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// Not an octal character; write & return
|
||||
|
*in_write_head = out_value; |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
out_value <<= 3; |
||||
|
out_value |= octal_value; |
||||
|
read_view.remove_prefix(1); |
||||
|
|
||||
|
if constexpr (MaxDigitsV == 2) { |
||||
|
// We've read in both digits; go ahead and write & return
|
||||
|
*in_write_head = out_value; |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
if (read_view.empty()) { |
||||
|
if constexpr (ExactDigitsV) { |
||||
|
// Expected 3 digits, received 2
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
*in_write_head = out_value; |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// Read in third octal unit from front; octal characters are always 1 unit
|
||||
|
octal_value = as_base(read_view.front(), 8); |
||||
|
if (octal_value < 0) { |
||||
|
// Not an octal character; push what we have and handle this
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
out_value <<= 3; |
||||
|
out_value |= octal_value; |
||||
|
read_view.remove_prefix(1); |
||||
|
|
||||
|
// Write & return
|
||||
|
*in_write_head = out_value; |
||||
|
++in_write_head; |
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode> |
||||
|
shrink_sequence_tree_member<CharT> make_hex_sequence_pair() { |
||||
|
static_assert(MaxDigitsV > 0); |
||||
|
|
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) { |
||||
|
// Does not modify
|
||||
|
auto read_hex = [](uint32_t& out_value, std::basic_string_view<CharT> in_view, size_t max_digits) { |
||||
|
size_t result{}; |
||||
|
int hex_value; |
||||
|
out_value = 0; |
||||
|
while (result != max_digits |
||||
|
&& !in_view.empty()) { |
||||
|
hex_value = as_base(in_view.front(), 16); // hexadecimal characters are always 1 unit
|
||||
|
if (hex_value < 0) { |
||||
|
// Not a hexadecimal character; push what we have and handle this
|
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
out_value <<= 4; |
||||
|
out_value |= hex_value; |
||||
|
|
||||
|
in_view.remove_prefix(1); |
||||
|
++result; |
||||
|
} |
||||
|
|
||||
|
// Number of elements that are hexadecimal digits
|
||||
|
return result; |
||||
|
}; |
||||
|
|
||||
|
// Read in hex value
|
||||
|
uint32_t hex_value; |
||||
|
size_t units_read = read_hex(hex_value, read_view, MaxDigitsV); |
||||
|
|
||||
|
// Sanity check digits read
|
||||
|
if constexpr(ExactDigitsV) { |
||||
|
if (units_read != MaxDigitsV) { |
||||
|
// We expected example MaxDigitsV digits; fail
|
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
else { |
||||
|
if (units_read == 0) { |
||||
|
// We didn't read any digits; fail
|
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// We read an acceptable number of digits; write the unit and call it a day
|
||||
|
read_view.remove_prefix(units_read); |
||||
|
if constexpr (IsUnicode) { |
||||
|
in_write_head += encode_codepoint(in_write_head, hex_value); |
||||
|
} |
||||
|
else { |
||||
|
static_assert(MaxDigitsV <= sizeof(CharT) * 2); |
||||
|
*in_write_head = static_cast<CharT>(hex_value); |
||||
|
++in_write_head; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
// Calls into another tree with the next character
|
||||
|
template<typename CharT, char32_t InCodepointV, const shrink_sequence_tree<CharT>& SubTreeR, bool FailNotFound = true> |
||||
|
shrink_sequence_tree_member<CharT> make_tree_sequence_pair() { |
||||
|
return { InCodepointV, [](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) { |
||||
|
auto decode = decode_codepoint(read_view); |
||||
|
auto parser = SubTreeR.find(decode.codepoint); |
||||
|
if (parser == SubTreeR.end()) { |
||||
|
if constexpr (FailNotFound) { |
||||
|
// Code not found; fail
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// Just a normal character; write it over
|
||||
|
while (decode.units != 0) { |
||||
|
*in_write_head = read_view.front(); |
||||
|
++in_write_head; |
||||
|
--decode.units; |
||||
|
read_view.remove_prefix(1); |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// This is a parsed sequence; pass it to the parser
|
||||
|
read_view.remove_prefix(decode.units); |
||||
|
return (parser->second)(in_write_head, read_view); |
||||
|
} }; |
||||
|
} |
||||
|
|
||||
|
// Return true for valid sequences, false otherwise
|
||||
|
template<typename CharT> |
||||
|
bool apply_cpp_escape_sequences(std::basic_string<CharT>& inout_string) { |
||||
|
// Handles parsing first character of escape sequence
|
||||
|
static const shrink_sequence_tree<CharT> main_tree{ |
||||
|
/** Newline skippers; not actually a C++ thing, but I want it */ |
||||
|
make_noop_sequence_pair<CharT, U'\n', U'\r'>(), |
||||
|
make_noop_sequence_pair<CharT, U'\r', U'\n'>(), |
||||
|
|
||||
|
/** Simple escape sequences */ |
||||
|
make_simple_sequence_pair<CharT, U'\'', '\''>(), |
||||
|
make_simple_sequence_pair<CharT, U'\"', '\"'>(), |
||||
|
make_simple_sequence_pair<CharT, U'?', '\?'>(), |
||||
|
make_simple_sequence_pair<CharT, U'\\', '\\'>(), |
||||
|
make_simple_sequence_pair<CharT, U'a', '\a'>(), |
||||
|
make_simple_sequence_pair<CharT, U'b', '\b'>(), |
||||
|
make_simple_sequence_pair<CharT, U'f', '\f'>(), |
||||
|
make_simple_sequence_pair<CharT, U'n', '\n'>(), |
||||
|
make_simple_sequence_pair<CharT, U'r', '\r'>(), |
||||
|
make_simple_sequence_pair<CharT, U't', '\t'>(), |
||||
|
make_simple_sequence_pair<CharT, U'v', '\v'>(), |
||||
|
|
||||
|
/** Numeric escape sequences */ |
||||
|
// Octal (Single byte value only); should we support octal escapes in sequence?
|
||||
|
make_octal_sequence_pair<CharT, U'0', 3, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'1', 3, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'2', 3, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'3', 3, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'4', 2, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'5', 2, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'6', 2, false, false>(), |
||||
|
make_octal_sequence_pair<CharT, U'7', 2, false, false>(), |
||||
|
|
||||
|
// Hex; should we support hex escapes in sequence? (i.e: \x00FF == \x00\xFF, which is only true for char/char8_t atm)
|
||||
|
make_hex_sequence_pair<CharT, U'x', sizeof(CharT) * 2, false, false>(), |
||||
|
|
||||
|
/** Unicode escape sequences */ |
||||
|
make_hex_sequence_pair<CharT, U'u', 4, true, true>(), |
||||
|
make_hex_sequence_pair<CharT, U'U', 8, true, true>(), |
||||
|
}; |
||||
|
|
||||
|
// Only checks for '\'
|
||||
|
static const shrink_sequence_tree<CharT> root_tree{ |
||||
|
make_tree_sequence_pair<CharT, U'\\', main_tree>() |
||||
|
}; |
||||
|
|
||||
|
return apply_shrink_sequence_tree(inout_string, root_tree); |
||||
|
} |
||||
|
|
||||
|
} // namespace jessilib
|
@ -0,0 +1,182 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
#include "jessilib/unicode_sequence.hpp" |
||||
|
#include <charconv> |
||||
|
#include "test.hpp" |
||||
|
|
||||
|
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>; |
||||
|
using char_type_combos = ::testing::Types< |
||||
|
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>, |
||||
|
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>, |
||||
|
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>, |
||||
|
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>; |
||||
|
|
||||
|
template<typename T> |
||||
|
class UnicodeSequenceTest : public ::testing::Test { |
||||
|
public: |
||||
|
}; |
||||
|
TYPED_TEST_SUITE(UnicodeSequenceTest, char_types); |
||||
|
|
||||
|
#define TEST_CPP_SEQUENCE(expr) \ |
||||
|
{ auto parsed_string = jessilib::string_cast<TypeParam>(#expr); \ |
||||
|
auto normal_string = jessilib::string_cast<TypeParam>(expr); \ |
||||
|
parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \ |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); \ |
||||
|
EXPECT_EQ(parsed_string, normal_string); } |
||||
|
|
||||
|
TYPED_TEST(UnicodeSequenceTest, cpp_simple) { |
||||
|
// Most basic of tests
|
||||
|
TEST_CPP_SEQUENCE("test") |
||||
|
TEST_CPP_SEQUENCE("\"test\"") |
||||
|
|
||||
|
// Do each character once
|
||||
|
TEST_CPP_SEQUENCE("\'") |
||||
|
TEST_CPP_SEQUENCE("\"") |
||||
|
TEST_CPP_SEQUENCE("\?") |
||||
|
TEST_CPP_SEQUENCE("\\") |
||||
|
TEST_CPP_SEQUENCE("\a") |
||||
|
TEST_CPP_SEQUENCE("\b") |
||||
|
TEST_CPP_SEQUENCE("\f") |
||||
|
TEST_CPP_SEQUENCE("\n") |
||||
|
TEST_CPP_SEQUENCE("\r") |
||||
|
TEST_CPP_SEQUENCE("\t") |
||||
|
TEST_CPP_SEQUENCE("\v") |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(UnicodeSequenceTest, cpp_octal) { |
||||
|
// "\0" -> "\177" with & without leading zeroes
|
||||
|
std::basic_string<TypeParam> parsed_string; |
||||
|
for (unsigned int codepoint = 0; codepoint <= 0377; ++codepoint) { |
||||
|
uint8_t front = (codepoint >> 6); |
||||
|
uint8_t middle = (codepoint & 0b00'111'000) >> 3; |
||||
|
uint8_t last = (codepoint & 0b00'000'111); |
||||
|
|
||||
|
// "\000" -> "\177"
|
||||
|
parsed_string = static_cast<TypeParam>('\\'); |
||||
|
parsed_string += static_cast<TypeParam>('0' + front); |
||||
|
parsed_string += static_cast<TypeParam>('0' + middle); |
||||
|
parsed_string += static_cast<TypeParam>('0' + last); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
|
||||
|
if (front == 0) { |
||||
|
// "\00" -> "\77"
|
||||
|
parsed_string = static_cast<TypeParam>('\\'); |
||||
|
parsed_string += static_cast<TypeParam>('0' + middle); |
||||
|
parsed_string += static_cast<TypeParam>('0' + last); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
|
||||
|
if (middle == 0) { |
||||
|
// "\0" -> "\7"
|
||||
|
parsed_string = static_cast<TypeParam>('\\'); |
||||
|
parsed_string += static_cast<TypeParam>('0' + last); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename IntegerT> |
||||
|
std::basic_string<CharT> make_hex_string(IntegerT in_integer, size_t min_length = 0) { |
||||
|
char buffer[32]; |
||||
|
auto buffer_end = std::to_chars(buffer, std::end(buffer), in_integer, 16).ptr; |
||||
|
std::basic_string<CharT> result{ buffer, buffer_end }; |
||||
|
|
||||
|
if (min_length > result.size()) { |
||||
|
result.insert(0, min_length - result.size(), static_cast<CharT>('0')); |
||||
|
} |
||||
|
|
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(UnicodeSequenceTest, cpp_hex) { |
||||
|
// "x0" -> "xff" with & without leading zeroes
|
||||
|
if constexpr (sizeof(TypeParam) == 1) { |
||||
|
for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) { |
||||
|
std::basic_string<TypeParam> parsed_string; |
||||
|
for (size_t min_length = 0; min_length <= 2; ++min_length) { |
||||
|
parsed_string = jessilib::string_cast<TypeParam>("\\x"); |
||||
|
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
} |
||||
|
} |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// "x0" -> "xffff" with & without leading zeroes
|
||||
|
if constexpr (sizeof(TypeParam) == 2) { |
||||
|
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { |
||||
|
std::basic_string<TypeParam> parsed_string; |
||||
|
for (size_t min_length = 0; min_length <= 4; ++min_length) { |
||||
|
// "\x0" -> "\xffff"
|
||||
|
parsed_string = jessilib::string_cast<TypeParam>("\\x"); |
||||
|
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// "x0" -> "x10ffff" with & without leading zeroes
|
||||
|
if constexpr (sizeof(TypeParam) == 4) { |
||||
|
for (unsigned int codepoint = 0; codepoint <= 0x10000; ++codepoint) { |
||||
|
std::basic_string<TypeParam> parsed_string; |
||||
|
for (size_t min_length = 0; min_length <= 8; ++min_length) { |
||||
|
// "\x0" -> "\x0010ffff"
|
||||
|
parsed_string = jessilib::string_cast<TypeParam>("\\x"); |
||||
|
parsed_string += make_hex_string<TypeParam>(codepoint, min_length); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(UnicodeSequenceTest, cpp_u16) { |
||||
|
// "u000" -> "uffff" with & without leading zeroes
|
||||
|
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) { |
||||
|
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\u"); |
||||
|
parsed_string += make_hex_string<TypeParam>(codepoint, 4); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
|
||||
|
auto decode = jessilib::decode_codepoint(parsed_string); |
||||
|
EXPECT_NE(decode.units, 0); |
||||
|
EXPECT_EQ(decode.codepoint, static_cast<char32_t>(codepoint)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(UnicodeSequenceTest, cpp_u32) { |
||||
|
// "u000" -> "uffff" with & without leading zeroes
|
||||
|
for (unsigned int codepoint = 0; codepoint <= 0x100FF; ++codepoint) { |
||||
|
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\U"); |
||||
|
parsed_string += make_hex_string<TypeParam>(codepoint, 8); |
||||
|
jessilib::apply_cpp_escape_sequences(parsed_string); |
||||
|
|
||||
|
auto decode = jessilib::decode_codepoint(parsed_string); |
||||
|
EXPECT_NE(decode.units, 0); |
||||
|
EXPECT_EQ(decode.codepoint, static_cast<char32_t>(codepoint)); |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue