From 91bd82777fb6d7412dd7e98847ba984cfd319d5b Mon Sep 17 00:00:00 2001 From: Jessica James Date: Tue, 9 Nov 2021 20:35:37 -0600 Subject: [PATCH] Add split_n & split_once; some tests --- src/include/split.hpp | 128 +++++++++++++++++++++++++++++++++++---- src/test/split.cpp | 138 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 243 insertions(+), 23 deletions(-) diff --git a/src/include/split.hpp b/src/include/split.hpp index c79bbce..3e93b1e 100644 --- a/src/include/split.hpp +++ b/src/include/split.hpp @@ -22,17 +22,42 @@ #include namespace jessilib { +namespace impl { + +template +struct first_arg { + using first_type = FirstArgT; +}; + +// Using a function's return type because both `using` and classes require parameter packs be at the end +template typename ContainerT, typename DefaultMemberT, typename... ArgsT, + typename std::enable_if::type* = nullptr> +constexpr auto split_container_helper_f() -> ContainerT { + return {}; +}; + +template typename ContainerT, typename DefaultMemberT, typename... ArgsT, + typename std::enable_if::type* = nullptr> +constexpr auto split_container_helper_f() -> ContainerT { + return {}; +}; + +template typename ContainerT, typename... ArgsT> +using split_container_helper_t = decltype(split_container_helper_f()); + +} // namespace impl /** Splits an input string into substrings */ -template> -constexpr ResultT split(const InStringT& in_string, typename InStringT::value_type in_delim) { - ResultT result; +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> +constexpr auto split(const InputT& in_string, typename InputT::value_type in_delim) { + using MemberT = typename impl::first_arg>::first_type; + using container_type = impl::split_container_helper_t; + container_type result; // Nothing to return if (in_string.empty()) { return result; } - // If only MSVC didn't suck, we could use begin() and end()... auto begin = in_string.data(); auto end = in_string.data() + in_string.size(); for (auto itr = begin; itr != end; ++itr) { @@ -43,21 +68,100 @@ constexpr ResultT split(const InStringT& in_string, typename InStringT::value_ty } } - // begin == end only if last character was in_delim - if (begin == end) { - result.emplace_back(); + // Push final token to the end; may be empty + result.emplace_back(begin, end - begin); + + return result; +} + +/** + * Splits an input string into 2 substrings at and omitting an input delimiter. Returns: + * An empty pair if in_string is empty, + * otherwise if the delimiter is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, + * otherwise, a pair split at first instance of the delimiter + * Complexity: O(in_string.size()) + * + * @param in_string string to split + * @param in_delim delimiter to split on + * @return A pair representing `in_string` split at a delimiter, with first half stored in `first` and second in `last` + */ +template +constexpr std::pair split_once(const InStringT& in_string, typename InStringT::value_type in_delim) { + // Nothing in, nothing out + std::pair result; + if (in_string.empty()) { + return result; } - else { - result.emplace_back(begin, end - begin); + + auto begin = in_string.data(); + auto end = in_string.data() + in_string.size(); + for (auto itr = begin; itr != end; ++itr) { + if (*itr == in_delim) { + // in_delim found; split upon it + result.first = ResultMemberT{ begin, itr }; + result.second = ResultMemberT{ itr + 1, end }; + return result; + } } + // in_delim not found + result.first = in_string; return result; } +/** + * Splits a string up to a specified number of times and returns the result + * + * @tparam ContainerT Container type to store the results in + * @tparam ContainerArgsT Optional template parameters for ContainerT + * @param in_string String to split + * @param in_delim Delimiter to split upon + * @param in_limit Maximum number of times to split + * @return ResultT containing to up `in_limit` + 1 substrings + */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> +constexpr auto split_n(const InputT& in_string, typename InputT::value_type in_delim, size_t in_limit) { + using MemberT = typename impl::first_arg>::first_type; + using container_type = impl::split_container_helper_t; + + container_type result; + if (in_string.empty()) { + return result; + } + + auto begin = in_string.data(); + auto end = in_string.data() + in_string.size(); + for (auto itr = begin; itr != end && in_limit != 0; ++itr) { + if (*itr == in_delim) { + // Push token to result + result.emplace_back(begin, itr - begin); + begin = itr + 1; + --in_limit; + } + } + + // Push final token to the end; may be empty + result.emplace_back(begin, end - begin); + + return result; +} + +/** Splits an input string into view substrings; cannot specify element return type */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InStringT, typename MemberT = std::basic_string_view> +constexpr ContainerT split_view(const InStringT& in_string, typename InStringT::value_type in_delim) { + return split(in_string, in_delim); +} + /** Splits an input string into view substrings (same as split, but different default return type) */ -template>> -constexpr ResultT split_view(const InStringT& in_string, typename InStringT::value_type in_delim) { - return split(in_string, in_delim); +template> +constexpr std::pair split_once_view(const InStringT& in_string, typename InStringT::value_type in_delim) { + return split_once(in_string, in_delim); +} + +/** Splits an input string into a specified number of view substrings */ +template typename ContainerT = std::vector, typename... ContainerArgsT, typename InStringT, typename MemberT = std::basic_string_view> +constexpr auto split_n_view(const InStringT& in_string, typename InStringT::value_type in_delim, size_t in_limit) { + return split_n(in_string, in_delim, in_limit); } } // namespace jessilib diff --git a/src/test/split.cpp b/src/test/split.cpp index ac367fc..1d59a0f 100644 --- a/src/test/split.cpp +++ b/src/test/split.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include "test.hpp" using namespace jessilib; @@ -54,12 +55,24 @@ public: }; TYPED_TEST_SUITE(SplitViewStringTest, char_types); +template +class SplitOnceTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(SplitOnceTest, char_types); + +template +class SplitNTest : public ::testing::Test { +public: +}; +TYPED_TEST_SUITE(SplitNTest, char_types); + template std::basic_string make_word(size_t length = 8, T delim = static_cast(0)) { std::basic_string result; if (length == 0) { - throw std::runtime_error{ "length == 0" }; + return {}; } result.push_back(delim + 1); @@ -86,15 +99,58 @@ std::basic_string_view make_word_view(size_t length = 8, T delim = static_cas return s_result; } +template +struct RandomTestData { + RandomTestData(T in_delim) + : m_delim{ in_delim } { + operator()(); + } + + void operator()() { + m_tokens.clear(); + m_str.clear(); + + std::mt19937 randgen(static_cast(std::chrono::system_clock::now().time_since_epoch().count())); + std::uniform_int_distribution word_count_distribution(5, 64); + std::uniform_int_distribution word_length_distribution(0, 16); + + auto random_words = word_count_distribution(randgen); + while (m_tokens.size() < random_words) { + m_tokens.push_back(make_word(word_length_distribution(randgen))); + m_str += m_tokens.back(); + if (m_tokens.size() < random_words) { + m_str += m_delim; + } + } + } + + std::basic_string get_remainder(size_t in_times_split) { + std::basic_string result; + while (in_times_split < m_tokens.size()) { + result += m_tokens[in_times_split]; + ++in_times_split; + if (in_times_split < m_tokens.size()) { + result += m_delim; + } + } + + return result; + } + + T m_delim; + std::basic_string m_str; + std::vector> m_tokens; +}; + TYPED_TEST(SplitSVTest, empty) { std::basic_string_view empty; - std::vector split_result = split(empty, static_cast(0)); + std::vector> split_result = split(empty, static_cast(0)); EXPECT_TRUE(split_result.empty()); } TYPED_TEST(SplitSVTest, single_word) { std::basic_string_view single_word = make_word_view(); - std::vector split_result = split(single_word, static_cast(0)); + std::vector> split_result = split(single_word, static_cast(0)); EXPECT_EQ(split_result.size(), 1); EXPECT_EQ(split_result[0].size(), 8); } @@ -103,7 +159,7 @@ TYPED_TEST(SplitSVTest, single_word_trailing_delim) { auto word = make_word(); word += static_cast(0); std::basic_string_view single_word = word; - std::vector split_result = split(single_word, static_cast(0)); + std::vector> split_result = split(single_word, static_cast(0)); EXPECT_EQ(split_result.size(), 2); EXPECT_EQ(split_result[0].size(), 8); EXPECT_EQ(split_result[1].size(), 0); @@ -114,7 +170,7 @@ TYPED_TEST(SplitSVTest, single_word_prefix_delim) { word += static_cast(0); word += make_word(); std::basic_string_view single_word = word; - std::vector split_result = split(single_word, static_cast(0)); + std::vector> split_result = split(single_word, static_cast(0)); EXPECT_EQ(split_result.size(), 2); EXPECT_EQ(split_result[0].size(), 0); EXPECT_EQ(split_result[1].size(), 8); @@ -126,7 +182,7 @@ TYPED_TEST(SplitSVTest, single_word_surround_delim) { word += make_word(); word += static_cast(0); std::basic_string_view single_word = word; - std::vector split_result = split(single_word, static_cast(0)); + std::vector> split_result = split(single_word, static_cast(0)); EXPECT_EQ(split_result.size(), 3); EXPECT_EQ(split_result[0].size(), 0); EXPECT_EQ(split_result[1].size(), 8); @@ -138,7 +194,7 @@ TYPED_TEST(SplitSVTest, two_words) { word += static_cast(0); word += make_word(); std::basic_string_view words = word; - std::vector split_result = split(words, static_cast(0)); + std::vector> split_result = split(words, static_cast(0)); EXPECT_EQ(split_result.size(), 2); EXPECT_EQ(split_result[0].size(), 8); EXPECT_EQ(split_result[1].size(), 8); @@ -151,14 +207,62 @@ TYPED_TEST(SplitSVTest, three_words) { word += static_cast(0); word += make_word(9); std::basic_string_view words = word; - std::vector split_result = split(words, static_cast(0)); + std::vector> split_result = split(words, static_cast(0)); EXPECT_EQ(split_result.size(), 3); EXPECT_EQ(split_result[0].size(), 3); EXPECT_EQ(split_result[1].size(), 5); EXPECT_EQ(split_result[2].size(), 9); } -/** std::string split test, really just testing compilation */ +/** SplitOnceTest */ + +TYPED_TEST(SplitOnceTest, random) { + RandomTestData data{ static_cast(0) }; + std::pair, std::basic_string> split_result = split_once(data.m_str, data.m_delim); + + EXPECT_EQ(split_result.first, data.m_tokens[0]); + EXPECT_EQ(split_result.second, data.get_remainder(1)); +} + +TYPED_TEST(SplitOnceTest, random_view) { + RandomTestData data{ static_cast(0) }; + std::pair, std::basic_string_view> split_result = split_once_view(data.m_str, data.m_delim); + + EXPECT_EQ(split_result.first, data.m_tokens[0]); + EXPECT_EQ(split_result.second, data.get_remainder(1)); +} + +/** SplitNTest */ + +TYPED_TEST(SplitNTest, random) { + RandomTestData data{ static_cast(0) }; + constexpr size_t n = 4; + std::vector> split_result = split_n(data.m_str, data.m_delim, n); + + // Tokens shall be same up until last one (n + 1) + EXPECT_EQ(split_result.size(), n + 1); + for (size_t index = 0; index != n; ++index) { + EXPECT_EQ(split_result[index], data.m_tokens[index]); + } + + EXPECT_EQ(split_result[n], data.get_remainder(n)); +} + +TYPED_TEST(SplitNTest, random_view) { + RandomTestData data{ static_cast(0) }; + constexpr size_t n = 4; + std::vector> split_result = split_n_view(data.m_str, data.m_delim, n); + + // Tokens shall be same up until last one (n + 1) + EXPECT_EQ(split_result.size(), n + 1); + for (size_t index = 0; index != n; ++index) { + EXPECT_EQ(split_result[index], data.m_tokens[index]); + } + + EXPECT_EQ(split_result[n], data.get_remainder(n)); +} + +/** std::string split test, really just testing compilation and returned types */ TYPED_TEST(SplitSringTest, empty) { std::basic_string empty; @@ -173,17 +277,23 @@ TYPED_TEST(SplitSringTest, single_word) { EXPECT_EQ(split_result[0].size(), 8); } +TYPED_TEST(SplitSringTest, random) { + RandomTestData data{ static_cast(0) }; + auto split_result = split(data.m_str, data.m_delim); + EXPECT_EQ(split_result, data.m_tokens); +} + /** Some basic tests for compiling with different containers */ TYPED_TEST(SplitSVTest, empty_deque) { std::basic_string_view empty; - std::deque split_result = split>(empty, static_cast(0)); + std::deque> split_result = split(empty, static_cast(0)); EXPECT_TRUE(split_result.empty()); } TYPED_TEST(SplitSVTest, empty_list) { std::basic_string_view empty; - std::list split_result = split>(empty, static_cast(0)); + std::list> split_result = split(empty, static_cast(0)); EXPECT_TRUE(split_result.empty()); } @@ -212,3 +322,9 @@ TYPED_TEST(SplitViewStringTest, single_word) { std::vector> split_result = split_view(single_word, static_cast(0)); EXPECT_EQ(split_result.size(), 1); } + +TYPED_TEST(SplitViewStringTest, random) { + RandomTestData data{ static_cast(0) }; + auto split_result = split_view(data.m_str, data.m_delim); + EXPECT_EQ(split_result.size(), data.m_tokens.size()); +}