From 157a3cac036b28322e164cb17eeab39c3d202e4b Mon Sep 17 00:00:00 2001 From: Jessica James Date: Wed, 10 Nov 2021 23:48:27 -0600 Subject: [PATCH] Add word_split, word_split_once, and word_split_n_once, taking in whitespace instead of a delimiter --- src/include/jessilib/split.hpp | 1 - src/include/jessilib/word_split.hpp | 519 ++++++++++++++++++++++++++++ src/test/CMakeLists.txt | 2 +- src/test/test_split.hpp | 101 +++++- src/test/word_split.cpp | 280 +++++++++++++++ 5 files changed, 893 insertions(+), 10 deletions(-) create mode 100644 src/include/jessilib/word_split.hpp create mode 100644 src/test/word_split.cpp diff --git a/src/include/jessilib/split.hpp b/src/include/jessilib/split.hpp index d91289d..695ae45 100644 --- a/src/include/jessilib/split.hpp +++ b/src/include/jessilib/split.hpp @@ -336,7 +336,6 @@ constexpr auto split_n(ItrT begin, EndT end, ElementT in_delim, size_t in_limit) * @tparam ContainerArgsT Optional template parameters for ContainerT * @param begin Start of range of elements to split * @param end End of range of elements to split - * @param in_delim Delimiter to split upon * @param in_delim_begin Start of range containing the delimiter * @param in_delim_end End of range containing the delimiter * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder diff --git a/src/include/jessilib/word_split.hpp b/src/include/jessilib/word_split.hpp new file mode 100644 index 0000000..b4c3d3e --- /dev/null +++ b/src/include/jessilib/word_split.hpp @@ -0,0 +1,519 @@ +/** + * Copyright (C) 2021 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +/** + * @file word_split.hpp + * @author Jessica James + * + * Over-engineered and over-genericized versions of word_split, word_split_once, and word_split_n, with lots of syntactical sugar + */ + +#pragma once + +#include +#include + +namespace jessilib { + +template typename ContainerT, typename ElementT, typename...> +struct word_split_defaults { + using member_type = std::basic_string; + using container_type = ContainerT; +}; + +template typename ContainerT, typename ElementT, typename FirstOptional, typename... ContainerArgsT> +struct word_split_defaults { + using member_type = FirstOptional; + using container_type = ContainerT; +}; + +// Can probably be specialized for types which don't take in iterators _or_ +template::value>::type* = nullptr> +MemberT make_word_split_member(ItrT in_itr, EndT in_end) { + // Intended for string_view + if constexpr (std::is_pointer_v) { + return { in_itr, static_cast(in_end - in_itr) }; + } + + if (in_itr == in_end) { + return {}; + } + + return { &*in_itr, static_cast(in_end - in_itr) }; +} + +template::value>::type* = nullptr> +MemberT make_word_split_member(ItrT in_itr, EndT in_end) { + // Can construct with iterators, so construct with iterators + return { in_itr, in_end }; +} + +/** + * Splits an input string into substrings based on words + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace Whitespace to split upon + * @return Container populated with + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT> +constexpr auto word_split(ItrT begin, EndT end, ElementT in_whitespace) { + using word_split_defaults_type = word_split_defaults; + using member_type = typename word_split_defaults_type::member_type; + using container_type = typename word_split_defaults_type::container_type; + + container_type result; + if (begin >= end) { + // Nothing to word_split + return result; + } + + // Skip over any preceeding whitespace + while (begin != end + && *begin == in_whitespace) { + ++begin; + } + + for (auto itr = begin; itr != end;) { + if (*itr == in_whitespace) { + // End of word reached; push token to result and skip over any whitespace + result.push_back(make_word_split_member(begin, itr)); + + ++itr; + while (itr != end + && *itr == in_whitespace) { + ++itr; + } + + begin = itr; + continue; + } + + ++itr; + } + + // Push final token to the end if not empty + if (begin != end) { + result.push_back(make_word_split_member(begin, end)); + } + + return result; +} + +/** + * Splits an input string into substrings + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace_begin Start of range containing the whitespace values + * @param in_whitespace_end End of range containing the whitespace values + * @return Container populated with + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT> +constexpr auto word_split(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) { + using ElementT = std::remove_cvref_t; + using word_split_defaults_type = word_split_defaults; + using member_type = typename word_split_defaults_type::member_type; + using container_type = typename word_split_defaults_type::container_type; + + if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { + return word_split(begin, end, *in_whitespace_begin); + } + + container_type result{}; + if (begin >= end) { + // Nothing to word_split + return result; + } + + if (in_whitespace_begin >= in_whitespace_end) { + // Absent whitespace, therefore no match, therefore return input as single token + result.push_back(make_word_split_member(begin, end)); + return result; + } + + auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { + return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; + }; + + while (begin != end + && is_whitespace(*begin)) { + ++begin; + } + + for (auto itr = begin; itr < end;) { + if (is_whitespace(*itr)) { + // Push token to result + result.push_back(make_word_split_member(begin, itr)); + + ++itr; + while (itr != end + && is_whitespace(*itr)) { + ++itr; + } + + begin = itr; + continue; + } + + ++itr; + } + + // Push final token to the end if not empty + if (begin != end) { + result.push_back(make_word_split_member(begin, end)); + } + + return result; +} + +/** + * Splits an input string into substrings + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param in_string String to split + * @param in_whitespace Whitespace to split upon + * @return Container populated with + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> +constexpr auto word_split(const InputT& in_string, typename InputT::value_type in_whitespace) { + return word_split(in_string.begin(), in_string.end(), in_whitespace); +} + +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT, + typename std::enable_if::value>::type* = nullptr> +constexpr auto word_split(const InputT& in_string, const SpaceT& in_whitespace) { + return word_split(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end()); +} + +/** + * Splits an input string into 2 substrings at and omitting any input whitespace. Returns: + * An empty pair if in_string is empty, + * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, + * otherwise, a pair split at first instance of a string of whitespace + * + * @tparam ResultMemberT String type used to populate the result + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace Whitespace to split on + * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` + */ +template +constexpr auto word_split_once(ItrT begin, EndT end, ElementT in_whitespace) { + static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); + using MemberT = std::tuple_element_t<0, std::tuple>>; + + std::pair result; + if (begin >= end) { + // Nothing to word_split + return result; + } + + while (begin != end + && *begin == in_whitespace) { + ++begin; + } + + for (auto itr = begin; itr != end; ++itr) { + if (*itr == in_whitespace) { + // in_whitespace found; word_split upon it + result.first = make_word_split_member(begin, itr); + + ++itr; + while (itr != end + && *itr == in_whitespace) { + ++itr; + } + + result.second = make_word_split_member(itr, end); + return result; + } + } + + // in_whitespace not found + result.first = make_word_split_member(begin, end); + return result; +} + +/** + * Splits an input string into 2 substrings at and omitting any input whitespace. Returns: + * An empty pair if in_string is empty, + * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, + * otherwise, a pair split at first instance of any whitespace + * + * @tparam ResultMemberT String type used to populate the result + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace_begin Start of range containing the whitespace values + * @param in_whitespace_end End of range containing the whitespace values + * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` + */ +template +constexpr auto word_split_once(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) { + static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); + using ElementT = std::remove_cvref_t; + using MemberT = std::tuple_element_t<0, std::tuple>>; + + if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { + return word_split_once(begin, end, *in_whitespace_begin); + } + + std::pair result; + if (begin >= end) { + // Nothing to word_split + return result; + } + + if (in_whitespace_begin >= in_whitespace_end) { + // Absent whitespace, therefore no match, therefore return input as single token + result.first = make_word_split_member(begin, end); + return result; + } + + auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { + return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; + }; + + // Skip over preceeding whitespace + while (begin != end + && is_whitespace(*begin)) { + ++begin; + } + + for (auto itr = begin; itr < end;) { + if (is_whitespace(itr)) { + // in_whitespace found; word_split upon it + result.first = make_word_split_member(begin, itr); + + ++itr; + while (itr != end + && is_whitespace(*itr)) { + ++itr; + } + + result.second = make_word_split_member(itr, end); + return result; + } + } + + // in_whitespace not found + result.first = make_word_split_member(begin, end); + return result; +} + +/** + * Splits an input string into 2 substrings at and omitting any input whitespace. Returns: + * An empty pair if in_string is empty, + * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, + * otherwise, a pair split at first instance of any whitespace + * + * @tparam InputT String type being passed into split_once + * @tparam ResultMemberT String type used to populate the result + * @param in_string string to split + * @param in_whitespace Whitespace to split on + * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` + */ +template +constexpr auto word_split_once(const InputT& in_string, typename InputT::value_type in_whitespace) { + static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); + using ElementT = typename InputT::value_type; + using MemberT = std::tuple_element_t<0, std::tuple>>; + + return word_split_once(in_string.begin(), in_string.end(), in_whitespace); +} + +template::value>::type* = nullptr> +constexpr auto word_split_once(const InputT& in_string, const SpaceT& in_whitespace) { + static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); + return word_split_once(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end()); +} + +/** + * Splits a range of elements up to a specified number of times and returns the result + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace Whitespace to split upon + * @param in_limit Maximum number of times to split + * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT> +constexpr auto word_split_n(ItrT begin, EndT end, ElementT in_whitespace, size_t in_limit) { + using word_split_defaults_type = word_split_defaults; + using member_type = typename word_split_defaults_type::member_type; + using container_type = typename word_split_defaults_type::container_type; + + container_type result; + if (begin >= end) { + // Nothing to word_split + return result; + } + + while (begin != end + && *begin == in_whitespace) { + ++begin; + } + + for (auto itr = begin; itr != end && in_limit != 0; ++itr) { + if (*itr == in_whitespace) { + // Push token to result + result.push_back(make_word_split_member(begin, itr)); + + ++itr; + while (itr != end + && *itr == in_whitespace) { + ++itr; + } + + begin = itr; + --in_limit; + } + } + + // Push final token to the end if not empty + if (begin != end) { + result.push_back(make_word_split_member(begin, end)); + } + + return result; +} + +/** + * Splits a range of elements up to a specified number of times and returns the result + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param begin Start of range of elements to split + * @param end End of range of elements to split + * @param in_whitespace_begin Start of range containing the whitespace values + * @param in_whitespace_end End of range containing the whitespace values + * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT> +constexpr auto word_split_n(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end, size_t in_limit) { + using ElementT = std::remove_cvref_t; + using word_split_defaults_type = word_split_defaults; + using member_type = typename word_split_defaults_type::member_type; + using container_type = typename word_split_defaults_type::container_type; + + if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { + return word_split_n(begin, end, *in_whitespace_begin, in_limit); + } + + container_type result{}; + if (begin >= end) { + // Nothing to word_split + return result; + } + + if (in_whitespace_begin >= in_whitespace_end) { + // Absent whitespace, therefore no match, therefore return input as single token + result.push_back(make_word_split_member(begin, end)); + return result; + } + + auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { + return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; + }; + + while (begin != end + && is_whitespace(*begin)) { + ++begin; + } + + for (auto itr = begin; itr != end && in_limit != 0;) { + if (std::equal(in_whitespace_begin, in_whitespace_end, itr)) { + // Push token to result + result.push_back(make_word_split_member(begin, itr)); + + ++itr; + while (itr != end + && is_whitespace(*itr)) { + ++itr; + } + + begin = itr; + --in_limit; + continue; + } + + ++itr; + } + + // Push final token to the end; may be empty + result.push_back(make_word_split_member(begin, end)); + + return result; +} + +/** + * Splits a string up to a specified number of times and returns the result + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param in_string String to split + * @param in_whitespace Whitespace to split upon + * @param in_limit Maximum number of times to split + * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> +constexpr auto word_split_n(const InputT& in_string, typename InputT::value_type in_whitespace, size_t in_limit) { + return word_split_n(in_string.begin(), in_string.end(), in_whitespace, in_limit); +} + +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT, + typename std::enable_if::value>::type* = nullptr> +constexpr auto word_split_n(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) { + return word_split_n(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end(), in_limit); +} + +/** Splits an input string into view substrings; cannot specify element return type */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT> +constexpr auto word_split_view(const InputT& in_string, const SpaceT& in_whitespace) { + using MemberT = std::basic_string_view; + return word_split(in_string, in_whitespace); +} + +/** Splits an input string into view substring pair */ +template +constexpr auto word_split_once_view(const InputT& in_string, const SpaceT& in_whitespace) { + // Using a static assertion instead of `auto` for inputs, because this is the only place it would be used, and it'd + // require -fconcepts which isn't currently used. Replace inputs with `auto` later and remove template parameters + // if -fconcepts is ever added. + static_assert(sizeof...(NothingT) == 0, "word_split_once_view does not accept template parameters"); + using MemberT = std::basic_string_view; + return word_split_once(in_string, in_whitespace); +} + +/** Splits an input string into a specified number of view substrings */ +template typename ContainerT = std::vector, typename... ContainerArgsT, + typename InputT, typename SpaceT> +constexpr auto word_split_n_view(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) { + using MemberT = std::basic_string_view; + return word_split_n(in_string, in_whitespace, in_limit); +} + +} // namespace jessilib diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 19c7dae..54c966a 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,6 +1,6 @@ # Setup source files set(SOURCE_FILES - timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp) + timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp) # Setup gtest set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/src/test/test_split.hpp b/src/test/test_split.hpp index 8900cfa..3dccf1b 100644 --- a/src/test/test_split.hpp +++ b/src/test/test_split.hpp @@ -25,18 +25,31 @@ template constexpr T default_delim{}; -template> -ResultT make_word(size_t length = 8, T delim = default_delim) { +template, typename DelimT, + typename std::enable_if_t>* = nullptr> +ResultT make_word(DelimT delim, size_t length = 8) { ResultT result; if (length == 0) { - return {}; + return result; + } + + if (delim.size() == 0) { + delim.push_back(default_delim); + } + + // Add initial character + { + auto chr = delim.back() + 1; + while (std::find(delim.begin(), delim.end(), chr) != delim.end()) { + ++chr; + } + result.push_back(chr); } - result.push_back(delim + 1); while (result.size() < length) { auto chr = result.back() + 1; - if (chr == delim) { + while (std::find(delim.begin(), delim.end(), chr) != delim.end()) { ++chr; } result.push_back(chr); @@ -50,6 +63,11 @@ ResultT make_word(size_t length = 8, T delim = default_delim) { return result; } +template> +ResultT make_word(size_t length = 8, T delim = default_delim) { + return make_word(std::basic_string{ delim }, length); +} + template> ResultT make_delim_long(size_t length = 8, T in_delim = default_delim) { // in this context, in_delim should be whatever was previously passed to make_word @@ -74,17 +92,17 @@ struct RandomTestData { : m_fixed_word_count{ in_fixed_word_count }, m_fixed_word_length{ in_fixed_word_length } { m_delim.insert(m_delim.end(), in_delim); - operator()(); + populate(); } RandomTestData(StringT in_delim, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0) : m_delim{ in_delim }, m_fixed_word_count{ in_fixed_word_count }, m_fixed_word_length{ in_fixed_word_length } { - operator()(); + populate(); } - void operator()() { + void populate() { m_tokens.clear(); m_str.clear(); @@ -133,3 +151,70 @@ struct RandomTestData { StringT m_str; std::vector m_tokens; }; + +template> +struct RandomWordTestData { + RandomWordTestData(T in_delim = default_delim, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0) + : m_fixed_word_count{ in_fixed_word_count }, + m_fixed_word_length{ in_fixed_word_length } { + m_delim.insert(m_delim.end(), in_delim); + populate(); + } + + RandomWordTestData(StringT in_delim, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0) + : m_delim{ in_delim }, + m_fixed_word_count{ in_fixed_word_count }, + m_fixed_word_length{ in_fixed_word_length } { + populate(); + } + + void populate() { + m_tokens.clear(); + m_str.clear(); + + std::mt19937 randgen(static_cast(std::chrono::system_clock::now().time_since_epoch().count())); + std::uniform_int_distribution word_count_distribution(5, 64); + std::uniform_int_distribution word_length_distribution(0, 16); + + auto word_count = m_fixed_word_count; + if (word_count == 0) { + word_count = word_count_distribution(randgen); + } + + while (m_tokens.size() < word_count) { + auto word_length = m_fixed_word_length; + if (word_length == 0) { + word_length = word_length_distribution(randgen); + } + m_tokens.push_back(make_word(m_delim, word_length)); + if (m_tokens.back().empty()) { + m_tokens.pop_back(); + } + else { + m_token_indexes.push_back(m_str.size()); + m_str.insert(m_str.end(), m_tokens.back().begin(), m_tokens.back().end()); + } + m_str.insert(m_str.end(), m_delim.begin(), m_delim.end()); + } + } + + StringT get_remainder(size_t in_times_split) { + StringT result; + if (in_times_split < m_tokens.size()) { + auto index = m_token_indexes[in_times_split]; + result.insert(result.end(), m_str.begin() + index, m_str.end()); + } + + return result; + } + + // Inputs + StringT m_delim; + size_t m_fixed_word_count{}; + size_t m_fixed_word_length{}; + + // Outputs + StringT m_str; + std::vector m_tokens; + std::vector m_token_indexes; +}; diff --git a/src/test/word_split.cpp b/src/test/word_split.cpp new file mode 100644 index 0000000..1e8e9de --- /dev/null +++ b/src/test/word_split.cpp @@ -0,0 +1,280 @@ +/** + * Copyright (C) 2021 Jessica James. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Written by Jessica James + */ + +#include "jessilib/word_split.hpp" +#include "test_split.hpp" + +using namespace jessilib; +using namespace std::literals; + +TEST(jessi, lazy) { + std::string sentence = " this is a sentence "; + auto split_result = word_split(sentence, ' '); + EXPECT_EQ(split_result.size(), 4); + EXPECT_EQ(split_result[0], "this"); + EXPECT_EQ(split_result[1], "is"); + EXPECT_EQ(split_result[2], "a"); + EXPECT_EQ(split_result[3], "sentence"); +} + +using char_types = ::testing::Types; + +template +class WordSplitSVTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(WordSplitSVTest, char_types); + +template +class WordSplitStringTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(WordSplitStringTest, char_types); + +template +class WordSplitOnceTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(WordSplitOnceTest, char_types); + +template +class WordSplitNTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(WordSplitNTest, char_types); + +TYPED_TEST(WordSplitSVTest, empty) { + std::basic_string_view empty; + std::vector> split_result = word_split(empty, default_delim); + EXPECT_TRUE(split_result.empty()); +} + +TYPED_TEST(WordSplitSVTest, single_word) { + std::basic_string_view single_word = make_word_view(); + std::vector> split_result = word_split(single_word, default_delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitSVTest, single_word_trailing_delim) { + auto word = make_word(); + word += default_delim; + std::basic_string_view single_word = word; + std::vector> split_result = word_split(single_word, default_delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitSVTest, single_word_prefix_delim) { + std::basic_string word; + word += default_delim; + word += make_word(); + std::basic_string_view single_word = word; + std::vector> split_result = word_split(single_word, default_delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitSVTest, single_word_surround_delim) { + std::basic_string word; + word += default_delim; + word += make_word(); + word += default_delim; + std::basic_string_view single_word = word; + std::vector> split_result = word_split(single_word, default_delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitSVTest, two_words) { + auto word = make_word(); + word += default_delim; + word += make_word(); + std::basic_string_view words = word; + std::vector> split_result = word_split(words, default_delim); + EXPECT_EQ(split_result.size(), 2); + EXPECT_EQ(split_result[0].size(), 8); + EXPECT_EQ(split_result[1].size(), 8); +} + +TYPED_TEST(WordSplitSVTest, three_words) { + auto word = make_word(3); + word += default_delim; + word += make_word(5); + word += default_delim; + word += default_delim; + word += make_word(9); + std::basic_string_view words = word; + std::vector> split_result = word_split(words, default_delim); + EXPECT_EQ(split_result.size(), 3); + EXPECT_EQ(split_result[0].size(), 3); + EXPECT_EQ(split_result[1].size(), 5); + EXPECT_EQ(split_result[2].size(), 9); +} + +/** WordSplitOnceTest */ + +TYPED_TEST(WordSplitOnceTest, random) { + RandomWordTestData data{}; + std::pair, std::basic_string> split_result = word_split_once(data.m_str, default_delim); + + EXPECT_EQ(split_result.first, data.m_tokens[0]); + EXPECT_EQ(split_result.second, data.get_remainder(1)); +} + +TYPED_TEST(WordSplitOnceTest, random_vector) { + using vector_type = std::vector; + RandomWordTestData data{}; + std::pair split_result = word_split_once(data.m_str, default_delim); + + EXPECT_EQ(split_result.first, data.m_tokens[0]); + EXPECT_EQ(split_result.second, data.get_remainder(1)); +} + +TYPED_TEST(WordSplitOnceTest, random_view) { + RandomWordTestData data{}; + std::pair, std::basic_string_view> split_result = word_split_once_view(data.m_str, default_delim); + + EXPECT_EQ(split_result.first, data.m_tokens[0]); + EXPECT_EQ(split_result.second, data.get_remainder(1)); +} + +/** SplitNTest */ + +TYPED_TEST(WordSplitNTest, random) { + RandomWordTestData data{}; + constexpr size_t n = 4; + std::vector> split_result = word_split_n(data.m_str, default_delim, n); + + // Tokens shall be same up until last one (n + 1) + EXPECT_EQ(split_result.size(), n + 1); + for (size_t index = 0; index != n; ++index) { + EXPECT_EQ(split_result[index], data.m_tokens[index]); + } + + EXPECT_EQ(split_result[n], data.get_remainder(n)); +} + +TYPED_TEST(WordSplitNTest, random_vector) { + RandomWordTestData> data{}; + constexpr size_t n = 4; + std::vector> split_result = word_split_n>(data.m_str, default_delim, n); + + // Tokens shall be same up until last one (n + 1) + EXPECT_EQ(split_result.size(), n + 1); + for (size_t index = 0; index != n; ++index) { + EXPECT_EQ(split_result[index], data.m_tokens[index]); + } + + EXPECT_EQ(split_result[n], data.get_remainder(n)); +} + +TYPED_TEST(WordSplitNTest, random_view) { + RandomWordTestData data{}; + constexpr size_t n = 4; + std::vector> split_result = word_split_n_view(data.m_str, default_delim, n); + + // Tokens shall be same up until last one (n + 1) + EXPECT_EQ(split_result.size(), n + 1); + for (size_t index = 0; index != n; ++index) { + EXPECT_EQ(split_result[index], data.m_tokens[index]); + } + + EXPECT_EQ(split_result[n], data.get_remainder(n)); +} + +/** std::string word_split test, really just testing compilation and returned types */ + +TYPED_TEST(WordSplitStringTest, empty) { + std::basic_string empty; + std::vector split_result = word_split(empty, default_delim); + EXPECT_TRUE(split_result.empty()); +} + +TYPED_TEST(WordSplitStringTest, empty_long) { + std::basic_string empty; + auto delim = make_delim_long(8); + std::vector split_result = word_split(empty, delim); + EXPECT_TRUE(split_result.empty()); +} + +TYPED_TEST(WordSplitStringTest, single_word) { + std::basic_string single_word = make_word(); + std::vector split_result = word_split(single_word, default_delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitStringTest, single_word_long) { + auto delim = make_delim_long(8); + std::basic_string single_word = make_word(delim); + std::vector split_result = word_split(single_word, delim); + EXPECT_EQ(split_result.size(), 1); + EXPECT_EQ(split_result[0].size(), 8); +} + +TYPED_TEST(WordSplitStringTest, random) { + RandomWordTestData data{}; + std::vector> split_result = word_split(data.m_str, default_delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_long) { + auto delim = make_delim_long(8); + RandomWordTestData data{ delim }; + std::vector> split_result = word_split(data.m_str, delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_vector) { + RandomWordTestData> data{}; + std::vector> split_result = word_split>(data.m_str, default_delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_long_trailing_delim) { + auto delim = make_delim_long(8); + RandomWordTestData data{ delim }; + data.m_str += delim; + std::vector> split_result = word_split(data.m_str, delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_long_prefix_delim) { + auto delim = make_delim_long(8); + RandomWordTestData data{ delim }; + data.m_str = delim + data.m_str; + std::vector> split_result = word_split(data.m_str, delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_long_trailing_two_delim) { + auto delim = make_delim_long(8); + RandomWordTestData data{ delim }; + data.m_str += delim + delim; + std::vector> split_result = word_split(data.m_str, delim); + EXPECT_EQ(split_result, data.m_tokens); +} + +TYPED_TEST(WordSplitStringTest, random_long_prefix_two_delim) { + auto delim = make_delim_long(8); + RandomWordTestData data{ delim }; + data.m_str = delim + delim + data.m_str; + std::vector> split_result = word_split(data.m_str, delim); + EXPECT_EQ(split_result, data.m_tokens); +}