Browse Source

Add 'apply_cpp_escape_sequences', general cleanup

master
Jessica James 3 years ago
parent
commit
d7e4f337c7
  1. 50
      src/common/unicode.cpp
  2. 113
      src/include/jessilib/unicode.hpp
  3. 343
      src/include/jessilib/unicode_sequence.hpp
  4. 2
      src/test/CMakeLists.txt
  5. 332
      src/test/unicode.cpp
  6. 182
      src/test/unicode_sequence.cpp

50
src/common/unicode.cpp

@ -32,6 +32,12 @@ void append_helper(std::basic_ostream<T>& out_string, T in_value) {
out_string << in_value; out_string << in_value;
} }
template<typename T>
void append_helper(T*& out_string, T in_value) {
*out_string = in_value;
++out_string;
}
template<typename T, typename CharT> template<typename T, typename CharT>
size_t encode_codepoint_utf8(T& out_destination, char32_t in_codepoint) { size_t encode_codepoint_utf8(T& out_destination, char32_t in_codepoint) {
if (in_codepoint > 0x10FFFF) { if (in_codepoint > 0x10FFFF) {
@ -96,6 +102,8 @@ size_t encode_codepoint_utf32(T& out_destination, char32_t in_codepoint) {
return 1; return 1;
} }
/** Strings */
size_t encode_codepoint(std::string& out_string, char32_t in_codepoint) { size_t encode_codepoint(std::string& out_string, char32_t in_codepoint) {
return encode_codepoint_utf8<std::string, char>(out_string, in_codepoint); return encode_codepoint_utf8<std::string, char>(out_string, in_codepoint);
} }
@ -112,6 +120,8 @@ size_t encode_codepoint(std::u32string& out_string, char32_t in_codepoint) {
return encode_codepoint_utf32(out_string, in_codepoint); return encode_codepoint_utf32(out_string, in_codepoint);
} }
/** Streams */
size_t encode_codepoint(std::basic_ostream<char>& out_stream, char32_t in_codepoint) { size_t encode_codepoint(std::basic_ostream<char>& out_stream, char32_t in_codepoint) {
return encode_codepoint_utf8<std::basic_ostream<char>, char>(out_stream, in_codepoint); return encode_codepoint_utf8<std::basic_ostream<char>, char>(out_stream, in_codepoint);
} }
@ -128,6 +138,26 @@ size_t encode_codepoint(std::basic_ostream<char32_t>& out_stream, char32_t in_co
return encode_codepoint_utf32(out_stream, in_codepoint); return encode_codepoint_utf32(out_stream, in_codepoint);
} }
/** Pointers */
size_t encode_codepoint(char* out_buffer, char32_t in_codepoint) {
return encode_codepoint_utf8<decltype(out_buffer), char>(out_buffer, in_codepoint);
}
size_t encode_codepoint(char8_t* out_buffer, char32_t in_codepoint) {
return encode_codepoint_utf8<decltype(out_buffer), char8_t>(out_buffer, in_codepoint);
}
size_t encode_codepoint(char16_t* out_buffer, char32_t in_codepoint) {
return encode_codepoint_utf16(out_buffer, in_codepoint);
}
size_t encode_codepoint(char32_t* out_buffer, char32_t in_codepoint) {
return encode_codepoint_utf32(out_buffer, in_codepoint);
}
/** Allocating */
std::u8string encode_codepoint_u8(char32_t in_codepoint) { std::u8string encode_codepoint_u8(char32_t in_codepoint) {
std::u8string result; std::u8string result;
encode_codepoint(result, in_codepoint); encode_codepoint(result, in_codepoint);
@ -519,4 +549,24 @@ char32_t fold(char32_t in_codepoint) {
return match->fold(in_codepoint); return match->fold(in_codepoint);
} }
const unsigned char base_table[]{
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 127, 127, 127, 127, 127, 127,
127, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 127, 127, 127, 127, 127,
127, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 127, 127, 127, 127, 127,
};
int as_base(char32_t in_character, unsigned int base) {
if (in_character >= sizeof(base_table)) {
return -1;
}
unsigned int result = base_table[in_character];
if (result >= base) {
return -1;
}
return base_table[in_character];
}
} // namespace jessilib } // namespace jessilib

113
src/include/jessilib/unicode.hpp

@ -1,5 +1,5 @@
/** /**
* Copyright (C) 2018 Jessica James. * Copyright (C) 2018-2021 Jessica James.
* *
* Permission to use, copy, modify, and/or distribute this software for any * Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above * purpose with or without fee is hereby granted, provided that the above
@ -50,6 +50,23 @@ size_t encode_codepoint(std::basic_ostream<char8_t>& out_stream, char32_t in_cod
size_t encode_codepoint(std::basic_ostream<char16_t>& out_stream, char32_t in_codepoint); size_t encode_codepoint(std::basic_ostream<char16_t>& out_stream, char32_t in_codepoint);
size_t encode_codepoint(std::basic_ostream<char32_t>& out_stream, char32_t in_codepoint); size_t encode_codepoint(std::basic_ostream<char32_t>& out_stream, char32_t in_codepoint);
/**
* Encodes a codepoint directly to a character buffer
* Note: Do not use this without careful consideration; note the size requirements:
* 1) char8_t may write up to 4 elements
* 2) char16_t may write up to 2 elements
* 3) char32_t may write up to 1 element
* 4) char may write up to 4 elements; provided solely for compatibility/ease of use
*
* @param out_buffer Character buffer to write to
* @param in_codepoint Codepoint to encode
* @return Number of data elements written to out_buffer
*/
size_t encode_codepoint(char* out_buffer, char32_t in_codepoint);
size_t encode_codepoint(char8_t* out_buffer, char32_t in_codepoint);
size_t encode_codepoint(char16_t* out_buffer, char32_t in_codepoint);
size_t encode_codepoint(char32_t* out_buffer, char32_t in_codepoint);
/** /**
* Encodes a codepoint and returns it as a string * Encodes a codepoint and returns it as a string
* *
@ -109,9 +126,57 @@ get_endpoint_result decode_surrogate_pair(char16_t in_high_surrogate, char16_t i
/** Utilities */ /** Utilities */
namespace impl_unicode {
// Add a narrower version in type_traits.hpp if this is needed elsewhere
template<typename T>
struct is_string : std::false_type {};
template<typename T>
struct is_string<std::basic_string<T>> {
using type = T;
static constexpr bool value{ true };
constexpr operator bool() const noexcept { return true; }
constexpr bool operator()() const noexcept { return true; }
};
template<typename T>
struct is_string<std::basic_string_view<T>> {
using type = T;
static constexpr bool value{ true };
constexpr operator bool() const noexcept { return true; }
constexpr bool operator()() const noexcept { return true; }
};
template<typename T>
struct is_string<T*> {
using type = T;
static constexpr bool value{ true };
constexpr operator bool() const noexcept { return true; }
constexpr bool operator()() const noexcept { return true; }
};
template<typename T>
struct is_string<T[]> {
using type = T;
static constexpr bool value{ true };
constexpr operator bool() const noexcept { return true; }
constexpr bool operator()() const noexcept { return true; }
};
template<typename T, size_t N>
struct is_string<T[N]> {
using type = T;
static constexpr bool value{ true };
constexpr operator bool() const noexcept { return true; }
constexpr bool operator()() const noexcept { return true; }
};
} // namespace impl_unicode
template<typename InT> template<typename InT>
bool is_valid(const InT& in_string) { bool is_valid(const InT& in_string) {
using InCharT = typename InT::value_type; using InCharT = typename impl_unicode::is_string<InT>::type;
using InViewT = std::basic_string_view<InCharT>; using InViewT = std::basic_string_view<InCharT>;
InViewT in_string_view = static_cast<InViewT>(in_string); InViewT in_string_view = static_cast<InViewT>(in_string);
@ -137,7 +202,7 @@ bool is_valid(const InT& in_string) {
*/ */
template<typename OutCharT, typename InT> template<typename OutCharT, typename InT>
std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) { std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) {
using InCharT = typename InT::value_type; using InCharT = typename impl_unicode::is_string<InT>::type;
size_t in_string_bytes = in_string.size() * sizeof(InCharT); size_t in_string_bytes = in_string.size() * sizeof(InCharT);
if constexpr (sizeof(OutCharT) > sizeof(InCharT)) { if constexpr (sizeof(OutCharT) > sizeof(InCharT)) {
// The output type is larger than the input type; verify no partial codepoints // The output type is larger than the input type; verify no partial codepoints
@ -161,7 +226,8 @@ std::basic_string_view<OutCharT> string_view_cast(const InT& in_string) {
template<typename OutCharT, typename InT> template<typename OutCharT, typename InT>
std::basic_string<OutCharT> string_cast(const InT& in_string) { std::basic_string<OutCharT> string_cast(const InT& in_string) {
using InCharT = typename InT::value_type; static_assert(impl_unicode::is_string<InT>::value == true);
using InCharT = typename impl_unicode::is_string<InT>::type;
using InViewT = std::basic_string_view<InCharT>; using InViewT = std::basic_string_view<InCharT>;
std::basic_string<OutCharT> result; std::basic_string<OutCharT> result;
@ -196,11 +262,12 @@ std::basic_string<OutCharT> string_cast(const InT& in_string) {
return result; return result;
} }
/** single-unit case folding utilities */ /** single-unit helper utilities */
char32_t fold(char32_t in_codepoint); // Folds codepoint for case insensitive checks (not for human output) char32_t fold(char32_t in_codepoint); // Folds codepoint for case-insensitive checks (not for human output)
int as_base(char32_t in_character, unsigned int base); // The value represented by in_character in terms of base if valid, -1 otherwise
/** /**
* Checks if two codepoints are equal to eachother (case insensitive) * Checks if two codepoints are equal to each-other (case insensitive)
* *
* @param lhs First codepoint to compare * @param lhs First codepoint to compare
* @param rhs Second codepoint to compare * @param rhs Second codepoint to compare
@ -593,6 +660,38 @@ size_t findi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<
ADAPT_BASIC_STRING(findi) ADAPT_BASIC_STRING(findi)
using find_if_predicate_type = bool(*)(char32_t, char*, size_t);
inline void find_if(std::basic_string<char>& in_string, find_if_predicate_type in_predicate) {
using CharT = char;
CharT* ptr = in_string.data();
std::basic_string_view<CharT> in_string_view = in_string;
for (auto decode = decode_codepoint(in_string_view); decode.units != 0; decode = decode_codepoint(in_string_view)) {
if (in_predicate(decode.codepoint, ptr, decode.units)) {
// predicate indicates it's found what it's looking for, cool
return;
}
in_string_view.remove_prefix(decode.units);
ptr += decode.units;
}
}
using find_if_view_predicate_type = bool(*)(char32_t, const char*, size_t);
inline void find_if(std::basic_string_view<char>& in_string, find_if_view_predicate_type in_predicate) {
using CharT = char;
const CharT* ptr = in_string.data();
std::basic_string_view<CharT> in_string_view = in_string;
for (auto decode = decode_codepoint(in_string_view); decode.units != 0; decode = decode_codepoint(in_string_view)) {
if (in_predicate(decode.codepoint, ptr, decode.units)) {
// predicate indicates it's found what it's looking for, cool
return;
}
in_string_view.remove_prefix(decode.units);
ptr += decode.units;
}
}
/** to_lower / to_upper */ /** to_lower / to_upper */
//char32_t to_lower(char32_t in_chr); // TODO: implement //char32_t to_lower(char32_t in_chr); // TODO: implement
//char32_t to_upper(char32_t in_chr); // TODO: implement //char32_t to_upper(char32_t in_chr); // TODO: implement

343
src/include/jessilib/unicode_sequence.hpp

@ -0,0 +1,343 @@
/**
* Copyright (C) 2021 Jessica James.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Written by Jessica James <jessica.aj@outlook.com>
*/
/**
* @file unicode_sequence.hpp
* @author Jessica James
*
* Unicode-aware escape sequence parsing utilities
*/
#pragma once
#include <map>
#include "unicode.hpp"
namespace jessilib {
// CharT is the codepoint which has just been read, in_write_head is a writeable string buffer, read_view points to remainder
template<typename CharT>
using shrink_sequence_tree_action = bool(*)(CharT*& in_write_head, std::basic_string_view<CharT>& read_view);
template<typename CharT>
using shrink_sequence_tree = std::map<char32_t, shrink_sequence_tree_action<CharT>>;
template<typename CharT>
using shrink_sequence_tree_member = std::pair<char32_t, shrink_sequence_tree_action<CharT>>;
// Only use for ASTs where each character process is guaranteed to write at most 1 character for each character consumed
template<typename CharT, typename SequenceTreeT>
bool apply_shrink_sequence_tree(std::basic_string<CharT>& inout_string, const SequenceTreeT& in_tree) {
if (inout_string.empty()) {
// Nothing to parse
return true;
}
std::basic_string_view<CharT> read_view = inout_string;
CharT* write_head = inout_string.data();
get_endpoint_result decode;
while ((decode = decode_codepoint(read_view)).units != 0) {
auto parser = in_tree.find(decode.codepoint);
if (parser == in_tree.end()) {
// Just a normal character; write it over
while (decode.units != 0) {
*write_head = read_view.front();
++write_head;
--decode.units;
read_view.remove_prefix(1);
}
continue;
}
// This is a parsed sequence; pass it to the parser instead
read_view.remove_prefix(decode.units);
if (!(parser->second)(write_head, read_view)) {
// Bad input received; strip off whatever we haven't parsed
size_t length = write_head - inout_string.data();
inout_string.erase(length);
return false;
}
}
// We've finished parsing successfully; strip off the extraneous codepoints
size_t length = write_head - inout_string.data();
inout_string.erase(length);
return true;
}
// Only for codepoints representable w/ char8_t (i.e: \n)
template<typename CharT, char32_t InCodepointV, char8_t OutCodepointV>
shrink_sequence_tree_member<CharT> make_simple_sequence_pair() {
return {
InCodepointV,
[](CharT*& in_write_head, std::basic_string_view<CharT>&) {
*in_write_head = static_cast<CharT>(OutCodepointV);
++in_write_head;
return true;
}
};
}
// Skips a character (i.e: skipping/ignoring newlines)
template<typename CharT, char32_t InCodepointV>
shrink_sequence_tree_member<CharT> make_noop_sequence_pair() {
return {
InCodepointV,
[](CharT*&, std::basic_string_view<CharT>&) {
return true;
}
};
}
// Skips a character or two (i.e: skipping/ignoring newlines)
template<typename CharT, char32_t InCodepointV, char32_t InOptionalTrailing>
shrink_sequence_tree_member<CharT> make_noop_sequence_pair() {
return {
InCodepointV,
[](CharT*&, std::basic_string_view<CharT>& read_view) {
// Strip trailing 'InTrailing', if it's present
auto decode = decode_codepoint(read_view);
if (decode.units != 0
&& decode.codepoint == InOptionalTrailing) {
read_view.remove_prefix(decode.units);
}
return true;
}
};
}
template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
shrink_sequence_tree_member<CharT> make_octal_sequence_pair() {
static_assert(MaxDigitsV > 0); // Use noop instead
static_assert((MaxDigitsV == 2 && InCodepointV >= U'0' && InCodepointV <= U'7')
|| (MaxDigitsV == 3 && InCodepointV >= U'0' && InCodepointV <= U'3')); // Only currently support single-octet octal values
// Must have at least 1 octal digit (this one), but may not have more than 3 (2 more).
return {
InCodepointV,
[](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) {
// Read in first octal character from InCodepointV
unsigned int out_value = InCodepointV - U'0'; // Set initial value
if (read_view.empty()) {
*in_write_head = out_value;
++in_write_head;
return true;
}
// Read is second octal unit from front; octal characters are always 1 unit
int octal_value = as_base(read_view.front(), 8);
if (octal_value < 0) {
if constexpr (ExactDigitsV) {
// Expected 2-3 digits, received 1
return false;
}
// Not an octal character; write & return
*in_write_head = out_value;
++in_write_head;
return true;
}
out_value <<= 3;
out_value |= octal_value;
read_view.remove_prefix(1);
if constexpr (MaxDigitsV == 2) {
// We've read in both digits; go ahead and write & return
*in_write_head = out_value;
++in_write_head;
return true;
}
if (read_view.empty()) {
if constexpr (ExactDigitsV) {
// Expected 3 digits, received 2
return false;
}
*in_write_head = out_value;
++in_write_head;
return true;
}
// Read in third octal unit from front; octal characters are always 1 unit
octal_value = as_base(read_view.front(), 8);
if (octal_value < 0) {
// Not an octal character; push what we have and handle this
return true;
}
out_value <<= 3;
out_value |= octal_value;
read_view.remove_prefix(1);
// Write & return
*in_write_head = out_value;
++in_write_head;
return true;
}
};
}
template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
shrink_sequence_tree_member<CharT> make_hex_sequence_pair() {
static_assert(MaxDigitsV > 0);
return {
InCodepointV,
[](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) {
// Does not modify
auto read_hex = [](uint32_t& out_value, std::basic_string_view<CharT> in_view, size_t max_digits) {
size_t result{};
int hex_value;
out_value = 0;
while (result != max_digits
&& !in_view.empty()) {
hex_value = as_base(in_view.front(), 16); // hexadecimal characters are always 1 unit
if (hex_value < 0) {
// Not a hexadecimal character; push what we have and handle this
return result;
}
out_value <<= 4;
out_value |= hex_value;
in_view.remove_prefix(1);
++result;
}
// Number of elements that are hexadecimal digits
return result;
};
// Read in hex value
uint32_t hex_value;
size_t units_read = read_hex(hex_value, read_view, MaxDigitsV);
// Sanity check digits read
if constexpr(ExactDigitsV) {
if (units_read != MaxDigitsV) {
// We expected example MaxDigitsV digits; fail
return false;
}
}
else {
if (units_read == 0) {
// We didn't read any digits; fail
return false;
}
}
// We read an acceptable number of digits; write the unit and call it a day
read_view.remove_prefix(units_read);
if constexpr (IsUnicode) {
in_write_head += encode_codepoint(in_write_head, hex_value);
}
else {
static_assert(MaxDigitsV <= sizeof(CharT) * 2);
*in_write_head = static_cast<CharT>(hex_value);
++in_write_head;
}
return true;
}
};
}
// Calls into another tree with the next character
template<typename CharT, char32_t InCodepointV, const shrink_sequence_tree<CharT>& SubTreeR, bool FailNotFound = true>
shrink_sequence_tree_member<CharT> make_tree_sequence_pair() {
return { InCodepointV, [](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) {
auto decode = decode_codepoint(read_view);
auto parser = SubTreeR.find(decode.codepoint);
if (parser == SubTreeR.end()) {
if constexpr (FailNotFound) {
// Code not found; fail
return false;
}
// Just a normal character; write it over
while (decode.units != 0) {
*in_write_head = read_view.front();
++in_write_head;
--decode.units;
read_view.remove_prefix(1);
}
return true;
}
// This is a parsed sequence; pass it to the parser
read_view.remove_prefix(decode.units);
return (parser->second)(in_write_head, read_view);
} };
}
// Return true for valid sequences, false otherwise
template<typename CharT>
bool apply_cpp_escape_sequences(std::basic_string<CharT>& inout_string) {
// Handles parsing first character of escape sequence
static const shrink_sequence_tree<CharT> main_tree{
/** Newline skippers; not actually a C++ thing, but I want it */
make_noop_sequence_pair<CharT, U'\n', U'\r'>(),
make_noop_sequence_pair<CharT, U'\r', U'\n'>(),
/** Simple escape sequences */
make_simple_sequence_pair<CharT, U'\'', '\''>(),
make_simple_sequence_pair<CharT, U'\"', '\"'>(),
make_simple_sequence_pair<CharT, U'?', '\?'>(),
make_simple_sequence_pair<CharT, U'\\', '\\'>(),
make_simple_sequence_pair<CharT, U'a', '\a'>(),
make_simple_sequence_pair<CharT, U'b', '\b'>(),
make_simple_sequence_pair<CharT, U'f', '\f'>(),
make_simple_sequence_pair<CharT, U'n', '\n'>(),
make_simple_sequence_pair<CharT, U'r', '\r'>(),
make_simple_sequence_pair<CharT, U't', '\t'>(),
make_simple_sequence_pair<CharT, U'v', '\v'>(),
/** Numeric escape sequences */
// Octal (Single byte value only); should we support octal escapes in sequence?
make_octal_sequence_pair<CharT, U'0', 3, false, false>(),
make_octal_sequence_pair<CharT, U'1', 3, false, false>(),
make_octal_sequence_pair<CharT, U'2', 3, false, false>(),
make_octal_sequence_pair<CharT, U'3', 3, false, false>(),
make_octal_sequence_pair<CharT, U'4', 2, false, false>(),
make_octal_sequence_pair<CharT, U'5', 2, false, false>(),
make_octal_sequence_pair<CharT, U'6', 2, false, false>(),
make_octal_sequence_pair<CharT, U'7', 2, false, false>(),
// Hex; should we support hex escapes in sequence? (i.e: \x00FF == \x00\xFF, which is only true for char/char8_t atm)
make_hex_sequence_pair<CharT, U'x', sizeof(CharT) * 2, false, false>(),
/** Unicode escape sequences */
make_hex_sequence_pair<CharT, U'u', 4, true, true>(),
make_hex_sequence_pair<CharT, U'U', 8, true, true>(),
};
// Only checks for '\'
static const shrink_sequence_tree<CharT> root_tree{
make_tree_sequence_pair<CharT, U'\\', main_tree>()
};
return apply_shrink_sequence_tree(inout_string, root_tree);
}
} // namespace jessilib

2
src/test/CMakeLists.txt

@ -1,6 +1,6 @@
# Setup source files # Setup source files
set(SOURCE_FILES set(SOURCE_FILES
timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp) timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp unicode_sequence.cpp)
# Setup gtest # Setup gtest
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

332
src/test/unicode.cpp

@ -135,20 +135,10 @@ public:
}; };
TYPED_TEST_SUITE(UnicodeFullTest, char_type_combos); TYPED_TEST_SUITE(UnicodeFullTest, char_type_combos);
template<typename CharT, size_t InLength>
std::basic_string<CharT> make_str(const char32_t (&in_str)[InLength]) {
std::basic_string<CharT> result;
auto in_str_end = std::end(in_str) - 1; // ignore null terminator
for (auto itr = std::begin(in_str); itr != in_str_end; ++itr) {
jessilib::encode_codepoint(result, *itr);
}
return result;
}
/** string_cast */ /** string_cast */
TYPED_TEST(UnicodeFullTest, string_cast) { TYPED_TEST(UnicodeFullTest, string_cast) {
auto abcd_str = make_str<typename TypeParam::first_type>(U"ABCD"); auto abcd_str = jessilib::string_cast<typename TypeParam::first_type>(U"ABCD");
std::basic_string_view<typename TypeParam::first_type> abcd_string_view = abcd_str; std::basic_string_view<typename TypeParam::first_type> abcd_string_view = abcd_str;
EXPECT_TRUE(equals(abcd_str, EXPECT_TRUE(equals(abcd_str,
@ -161,7 +151,7 @@ TYPED_TEST(UnicodeFullTest, string_cast) {
} }
TEST(UTF8Test, string_view_cast) { TEST(UTF8Test, string_view_cast) {
auto abcd_str = make_str<char8_t>(U"ABCD"); auto abcd_str = jessilib::string_cast<char8_t>(U"ABCD");
auto view = string_view_cast<char>(abcd_str); auto view = string_view_cast<char>(abcd_str);
EXPECT_TRUE(equals(view, abcd_str)); EXPECT_TRUE(equals(view, abcd_str));
} }
@ -170,219 +160,219 @@ TEST(UTF8Test, string_view_cast) {
TYPED_TEST(UnicodeFullTest, equals) { TYPED_TEST(UnicodeFullTest, equals) {
// TypeParam::first_type == TypeParam::second_type // TypeParam::first_type == TypeParam::second_type
EXPECT_TRUE(equals(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(equals(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(equals(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(equals(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(equals(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(equals(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(equals(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(equals(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_FALSE(equals(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_FALSE(equals(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
} }
/** equalsi */ /** equalsi */
TYPED_TEST(UnicodeFullTest, equalsi) { TYPED_TEST(UnicodeFullTest, equalsi) {
// TypeParam::first_type == TypeParam::second_type // TypeParam::first_type == TypeParam::second_type
EXPECT_TRUE(equalsi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(equalsi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(equalsi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(equalsi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(equalsi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(equalsi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(equalsi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(equalsi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(equalsi(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_TRUE(equalsi(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
} }
/** starts_with */ /** starts_with */
TYPED_TEST(UnicodeFullTest, starts_with) { TYPED_TEST(UnicodeFullTest, starts_with) {
// TypeParam::first_type == TypeParam::second_type // TypeParam::first_type == TypeParam::second_type
EXPECT_TRUE(starts_with(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_with(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
// TypeParam::first_type starts_with TypeParam::second_type... (always false) // TypeParam::first_type starts_with TypeParam::second_type... (always false)
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCDzz")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcdzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcdzz")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcdzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcdzz")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCDzz")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCDzz")));
// TypeParam::first_type... starts_with TypeParam::second_type (always same results as first) // TypeParam::first_type... starts_with TypeParam::second_type (always same results as first)
EXPECT_TRUE(starts_with(make_str<typename TypeParam::first_type>(U"ABCDzz"), EXPECT_TRUE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCDzz"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_with(make_str<typename TypeParam::first_type>(U"abcdzz"), EXPECT_TRUE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcdzz"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABCDzz"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABCDzz"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"abcdzz"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"abcdzz"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_FALSE(starts_with(make_str<typename TypeParam::first_type>(U"ABcdzz"), EXPECT_FALSE(starts_with(jessilib::string_cast<typename TypeParam::first_type>(U"ABcdzz"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
} }
/** starts_withi */ /** starts_withi */
TYPED_TEST(UnicodeFullTest, starts_withi) { TYPED_TEST(UnicodeFullTest, starts_withi) {
// TypeParam::first_type == TypeParam::second_type // TypeParam::first_type == TypeParam::second_type
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
// TypeParam::first_type starts_with TypeParam::second_type... (always false) // TypeParam::first_type starts_with TypeParam::second_type... (always false)
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"ABCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCDzz")));
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"abcdzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcdzz")));
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCD"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCD"),
make_str<typename TypeParam::second_type>(U"abcdzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcdzz")));
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"abcd"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcd"),
make_str<typename TypeParam::second_type>(U"ABCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCDzz")));
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"ABcd"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABcd"),
make_str<typename TypeParam::second_type>(U"abCDzz"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCDzz")));
// TypeParam::first_type... starts_with TypeParam::second_type (always same results as first) // TypeParam::first_type... starts_with TypeParam::second_type (always same results as first)
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCDzz"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCDzz"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"abcdzz"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcdzz"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABCDzz"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABCDzz"),
make_str<typename TypeParam::second_type>(U"abcd"))); jessilib::string_cast<typename TypeParam::second_type>(U"abcd")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"abcdzz"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"abcdzz"),
make_str<typename TypeParam::second_type>(U"ABCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"ABCD")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"ABcdzz"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"ABcdzz"),
make_str<typename TypeParam::second_type>(U"abCD"))); jessilib::string_cast<typename TypeParam::second_type>(U"abCD")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"Les Bean del Dallas"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"Les Bean del Dallas"),
make_str<typename TypeParam::second_type>(U"les"))); jessilib::string_cast<typename TypeParam::second_type>(U"les")));
EXPECT_TRUE(starts_withi(make_str<typename TypeParam::first_type>(U"Les Bean del Dallas"), EXPECT_TRUE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"Les Bean del Dallas"),
make_str<typename TypeParam::second_type>(U"les Bean"))); jessilib::string_cast<typename TypeParam::second_type>(U"les Bean")));
EXPECT_FALSE(starts_withi(make_str<typename TypeParam::first_type>(U"Les Bean del Dallas"), EXPECT_FALSE(starts_withi(jessilib::string_cast<typename TypeParam::first_type>(U"Les Bean del Dallas"),
make_str<typename TypeParam::second_type>(U"del"))); jessilib::string_cast<typename TypeParam::second_type>(U"del")));
} }
TYPED_TEST(UnicodeFullTest, find) { TYPED_TEST(UnicodeFullTest, find) {
auto abcd_str = make_str<typename TypeParam::first_type>(U"ABCD"); auto abcd_str = jessilib::string_cast<typename TypeParam::first_type>(U"ABCD");
// Empty substring // Empty substring
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"")), 0); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"")), 0);
// Single-characters // Single-characters
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"A")), 0); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"A")), 0);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"B")), 1); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"B")), 1);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"C")), 2); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"C")), 2);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"D")), 3); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"D")), 3);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"E")), decltype(abcd_str)::npos); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"E")), decltype(abcd_str)::npos);
// Two characters // Two characters
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"AB")), 0); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AB")), 0);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"BC")), 1); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BC")), 1);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"CD")), 2); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CD")), 2);
EXPECT_EQ(find(abcd_str, make_str<typename TypeParam::second_type>(U"DA")), decltype(abcd_str)::npos); EXPECT_EQ(find(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DA")), decltype(abcd_str)::npos);
auto double_abcd_str = make_str<typename TypeParam::first_type>(U"AABBCCDD"); auto double_abcd_str = jessilib::string_cast<typename TypeParam::first_type>(U"AABBCCDD");
// Single-characters // Single-characters
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"A")), 0); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"A")), 0);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"B")), 2); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"B")), 2);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"C")), 4); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"C")), 4);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"D")), 6); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"D")), 6);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"E")), decltype(double_abcd_str)::npos); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"E")), decltype(double_abcd_str)::npos);
// Two characters // Two characters
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"AA")), 0); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AA")), 0);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"AB")), 1); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AB")), 1);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"BB")), 2); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BB")), 2);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"BC")), 3); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BC")), 3);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"CC")), 4); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CC")), 4);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"CD")), 5); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CD")), 5);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"DD")), 6); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DD")), 6);
EXPECT_EQ(find(double_abcd_str, make_str<typename TypeParam::second_type>(U"DA")), decltype(double_abcd_str)::npos); EXPECT_EQ(find(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DA")), decltype(double_abcd_str)::npos);
} }
TYPED_TEST(UnicodeFullTest, findi) { TYPED_TEST(UnicodeFullTest, findi) {
auto abcd_str = make_str<typename TypeParam::first_type>(U"ABCD"); auto abcd_str = jessilib::string_cast<typename TypeParam::first_type>(U"ABCD");
// Empty substring // Empty substring
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"")), 0); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"")), 0);
// Single-characters // Single-characters
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"A")), 0); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"A")), 0);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"B")), 1); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"B")), 1);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"C")), 2); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"C")), 2);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"D")), 3); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"D")), 3);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"E")), decltype(abcd_str)::npos); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"E")), decltype(abcd_str)::npos);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"a")), 0); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"a")), 0);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"b")), 1); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"b")), 1);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"c")), 2); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"c")), 2);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"d")), 3); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"d")), 3);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"e")), decltype(abcd_str)::npos); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"e")), decltype(abcd_str)::npos);
// Two characters // Two characters
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"AB")), 0); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AB")), 0);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"BC")), 1); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BC")), 1);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"CD")), 2); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CD")), 2);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"DA")), decltype(abcd_str)::npos); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DA")), decltype(abcd_str)::npos);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"ab")), 0); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"ab")), 0);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"bc")), 1); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"bc")), 1);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"cd")), 2); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"cd")), 2);
EXPECT_EQ(findi(abcd_str, make_str<typename TypeParam::second_type>(U"da")), decltype(abcd_str)::npos); EXPECT_EQ(findi(abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"da")), decltype(abcd_str)::npos);
auto double_abcd_str = make_str<typename TypeParam::first_type>(U"AABBCCDD"); auto double_abcd_str = jessilib::string_cast<typename TypeParam::first_type>(U"AABBCCDD");
// Single-characters // Single-characters
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"A")), 0); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"A")), 0);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"B")), 2); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"B")), 2);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"C")), 4); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"C")), 4);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"D")), 6); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"D")), 6);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"E")), decltype(double_abcd_str)::npos); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"E")), decltype(double_abcd_str)::npos);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"a")), 0); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"a")), 0);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"b")), 2); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"b")), 2);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"c")), 4); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"c")), 4);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"d")), 6); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"d")), 6);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"e")), decltype(double_abcd_str)::npos); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"e")), decltype(double_abcd_str)::npos);
// Two characters // Two characters
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"AA")), 0); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AA")), 0);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"AB")), 1); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"AB")), 1);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"BB")), 2); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BB")), 2);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"BC")), 3); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"BC")), 3);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"CC")), 4); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CC")), 4);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"CD")), 5); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"CD")), 5);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"DD")), 6); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DD")), 6);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"DA")), decltype(double_abcd_str)::npos); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"DA")), decltype(double_abcd_str)::npos);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"aa")), 0); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"aa")), 0);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"ab")), 1); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"ab")), 1);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"bb")), 2); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"bb")), 2);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"bc")), 3); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"bc")), 3);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"cc")), 4); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"cc")), 4);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"cd")), 5); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"cd")), 5);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"dd")), 6); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"dd")), 6);
EXPECT_EQ(findi(double_abcd_str, make_str<typename TypeParam::second_type>(U"da")), decltype(double_abcd_str)::npos); EXPECT_EQ(findi(double_abcd_str, jessilib::string_cast<typename TypeParam::second_type>(U"da")), decltype(double_abcd_str)::npos);
} }
/** /**

182
src/test/unicode_sequence.cpp

@ -0,0 +1,182 @@
/**
* Copyright (C) 2021 Jessica James.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Written by Jessica James <jessica.aj@outlook.com>
*/
#include "jessilib/unicode_sequence.hpp"
#include <charconv>
#include "test.hpp"
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
using char_type_combos = ::testing::Types<
std::pair<char, char>, std::pair<char, char8_t>, std::pair<char, char16_t>, std::pair<char, char32_t>,
std::pair<char8_t, char>, std::pair<char8_t, char8_t>, std::pair<char8_t, char16_t>, std::pair<char8_t, char32_t>,
std::pair<char16_t, char>, std::pair<char16_t, char8_t>, std::pair<char16_t, char16_t>, std::pair<char16_t, char32_t>,
std::pair<char32_t, char>, std::pair<char32_t, char8_t>, std::pair<char32_t, char16_t>, std::pair<char32_t, char32_t>>;
template<typename T>
class UnicodeSequenceTest : public ::testing::Test {
public:
};
TYPED_TEST_SUITE(UnicodeSequenceTest, char_types);
#define TEST_CPP_SEQUENCE(expr) \
{ auto parsed_string = jessilib::string_cast<TypeParam>(#expr); \
auto normal_string = jessilib::string_cast<TypeParam>(expr); \
parsed_string = parsed_string.substr(1, parsed_string.size() - 2); \
jessilib::apply_cpp_escape_sequences(parsed_string); \
EXPECT_EQ(parsed_string, normal_string); }
TYPED_TEST(UnicodeSequenceTest, cpp_simple) {
// Most basic of tests
TEST_CPP_SEQUENCE("test")
TEST_CPP_SEQUENCE("\"test\"")
// Do each character once
TEST_CPP_SEQUENCE("\'")
TEST_CPP_SEQUENCE("\"")
TEST_CPP_SEQUENCE("\?")
TEST_CPP_SEQUENCE("\\")
TEST_CPP_SEQUENCE("\a")
TEST_CPP_SEQUENCE("\b")
TEST_CPP_SEQUENCE("\f")
TEST_CPP_SEQUENCE("\n")
TEST_CPP_SEQUENCE("\r")
TEST_CPP_SEQUENCE("\t")
TEST_CPP_SEQUENCE("\v")
}
TYPED_TEST(UnicodeSequenceTest, cpp_octal) {
// "\0" -> "\177" with & without leading zeroes
std::basic_string<TypeParam> parsed_string;
for (unsigned int codepoint = 0; codepoint <= 0377; ++codepoint) {
uint8_t front = (codepoint >> 6);
uint8_t middle = (codepoint & 0b00'111'000) >> 3;
uint8_t last = (codepoint & 0b00'000'111);
// "\000" -> "\177"
parsed_string = static_cast<TypeParam>('\\');
parsed_string += static_cast<TypeParam>('0' + front);
parsed_string += static_cast<TypeParam>('0' + middle);
parsed_string += static_cast<TypeParam>('0' + last);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
if (front == 0) {
// "\00" -> "\77"
parsed_string = static_cast<TypeParam>('\\');
parsed_string += static_cast<TypeParam>('0' + middle);
parsed_string += static_cast<TypeParam>('0' + last);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
if (middle == 0) {
// "\0" -> "\7"
parsed_string = static_cast<TypeParam>('\\');
parsed_string += static_cast<TypeParam>('0' + last);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
}
}
}
}
template<typename CharT, typename IntegerT>
std::basic_string<CharT> make_hex_string(IntegerT in_integer, size_t min_length = 0) {
char buffer[32];
auto buffer_end = std::to_chars(buffer, std::end(buffer), in_integer, 16).ptr;
std::basic_string<CharT> result{ buffer, buffer_end };
if (min_length > result.size()) {
result.insert(0, min_length - result.size(), static_cast<CharT>('0'));
}
return result;
}
TYPED_TEST(UnicodeSequenceTest, cpp_hex) {
// "x0" -> "xff" with & without leading zeroes
if constexpr (sizeof(TypeParam) == 1) {
for (unsigned int codepoint = 0; codepoint <= 0xFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 2; ++min_length) {
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
}
}
return;
}
// "x0" -> "xffff" with & without leading zeroes
if constexpr (sizeof(TypeParam) == 2) {
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 4; ++min_length) {
// "\x0" -> "\xffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
}
}
return;
}
// "x0" -> "x10ffff" with & without leading zeroes
if constexpr (sizeof(TypeParam) == 4) {
for (unsigned int codepoint = 0; codepoint <= 0x10000; ++codepoint) {
std::basic_string<TypeParam> parsed_string;
for (size_t min_length = 0; min_length <= 8; ++min_length) {
// "\x0" -> "\x0010ffff"
parsed_string = jessilib::string_cast<TypeParam>("\\x");
parsed_string += make_hex_string<TypeParam>(codepoint, min_length);
jessilib::apply_cpp_escape_sequences(parsed_string);
EXPECT_EQ(parsed_string.front(), static_cast<TypeParam>(codepoint));
}
}
return;
}
}
TYPED_TEST(UnicodeSequenceTest, cpp_u16) {
// "u000" -> "uffff" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= 0xFFFF; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\u");
parsed_string += make_hex_string<TypeParam>(codepoint, 4);
jessilib::apply_cpp_escape_sequences(parsed_string);
auto decode = jessilib::decode_codepoint(parsed_string);
EXPECT_NE(decode.units, 0);
EXPECT_EQ(decode.codepoint, static_cast<char32_t>(codepoint));
}
}
TYPED_TEST(UnicodeSequenceTest, cpp_u32) {
// "u000" -> "uffff" with & without leading zeroes
for (unsigned int codepoint = 0; codepoint <= 0x100FF; ++codepoint) {
std::basic_string<TypeParam> parsed_string = jessilib::string_cast<TypeParam>("\\U");
parsed_string += make_hex_string<TypeParam>(codepoint, 8);
jessilib::apply_cpp_escape_sequences(parsed_string);
auto decode = jessilib::decode_codepoint(parsed_string);
EXPECT_NE(decode.units, 0);
EXPECT_EQ(decode.codepoint, static_cast<char32_t>(codepoint));
}
}
Loading…
Cancel
Save