Browse Source

Add split_n & split_once; some tests

master
Jessica James 3 years ago
parent
commit
91bd82777f
  1. 128
      src/include/split.hpp
  2. 138
      src/test/split.cpp

128
src/include/split.hpp

@ -22,17 +22,42 @@
#include <vector>
namespace jessilib {
namespace impl {
template<typename FirstArgT, typename...>
struct first_arg {
using first_type = FirstArgT;
};
// Using a function's return type because both `using` and classes require parameter packs be at the end
template<template<typename...> typename ContainerT, typename DefaultMemberT, typename... ArgsT,
typename std::enable_if<sizeof...(ArgsT) == 0>::type* = nullptr>
constexpr auto split_container_helper_f() -> ContainerT<DefaultMemberT> {
return {};
};
template<template<typename...> typename ContainerT, typename DefaultMemberT, typename... ArgsT,
typename std::enable_if<sizeof...(ArgsT) != 0>::type* = nullptr>
constexpr auto split_container_helper_f() -> ContainerT<ArgsT...> {
return {};
};
template<template<typename...> typename ContainerT, typename... ArgsT>
using split_container_helper_t = decltype(split_container_helper_f<ContainerT, ArgsT...>());
} // namespace impl
/** Splits an input string into substrings */
template<typename InStringT, typename ResultT = std::vector<InStringT>>
constexpr ResultT split(const InStringT& in_string, typename InStringT::value_type in_delim) {
ResultT result;
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT>
constexpr auto split(const InputT& in_string, typename InputT::value_type in_delim) {
using MemberT = typename impl::first_arg<ContainerArgsT..., std::basic_string<typename InputT::value_type>>::first_type;
using container_type = impl::split_container_helper_t<ContainerT, MemberT, ContainerArgsT...>;
container_type result;
// Nothing to return
if (in_string.empty()) {
return result;
}
// If only MSVC didn't suck, we could use begin() and end()...
auto begin = in_string.data();
auto end = in_string.data() + in_string.size();
for (auto itr = begin; itr != end; ++itr) {
@ -43,21 +68,100 @@ constexpr ResultT split(const InStringT& in_string, typename InStringT::value_ty
}
}
// begin == end only if last character was in_delim
if (begin == end) {
result.emplace_back();
// Push final token to the end; may be empty
result.emplace_back(begin, end - begin);
return result;
}
/**
* Splits an input string into 2 substrings at and omitting an input delimiter. Returns:
* An empty pair if in_string is empty,
* otherwise if the delimiter is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`,
* otherwise, a pair split at first instance of the delimiter
* Complexity: O(in_string.size())
*
* @param in_string string to split
* @param in_delim delimiter to split on
* @return A pair representing `in_string` split at a delimiter, with first half stored in `first` and second in `last`
*/
template<typename InStringT, typename ResultMemberT = InStringT>
constexpr std::pair<ResultMemberT, ResultMemberT> split_once(const InStringT& in_string, typename InStringT::value_type in_delim) {
// Nothing in, nothing out
std::pair<ResultMemberT, ResultMemberT> result;
if (in_string.empty()) {
return result;
}
else {
result.emplace_back(begin, end - begin);
auto begin = in_string.data();
auto end = in_string.data() + in_string.size();
for (auto itr = begin; itr != end; ++itr) {
if (*itr == in_delim) {
// in_delim found; split upon it
result.first = ResultMemberT{ begin, itr };
result.second = ResultMemberT{ itr + 1, end };
return result;
}
}
// in_delim not found
result.first = in_string;
return result;
}
/**
* Splits a string up to a specified number of times and returns the result
*
* @tparam ContainerT Container type to store the results in
* @tparam ContainerArgsT Optional template parameters for ContainerT
* @param in_string String to split
* @param in_delim Delimiter to split upon
* @param in_limit Maximum number of times to split
* @return ResultT containing to up `in_limit` + 1 substrings
*/
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT>
constexpr auto split_n(const InputT& in_string, typename InputT::value_type in_delim, size_t in_limit) {
using MemberT = typename impl::first_arg<ContainerArgsT..., std::basic_string<typename InputT::value_type>>::first_type;
using container_type = impl::split_container_helper_t<ContainerT, MemberT, ContainerArgsT...>;
container_type result;
if (in_string.empty()) {
return result;
}
auto begin = in_string.data();
auto end = in_string.data() + in_string.size();
for (auto itr = begin; itr != end && in_limit != 0; ++itr) {
if (*itr == in_delim) {
// Push token to result
result.emplace_back(begin, itr - begin);
begin = itr + 1;
--in_limit;
}
}
// Push final token to the end; may be empty
result.emplace_back(begin, end - begin);
return result;
}
/** Splits an input string into view substrings; cannot specify element return type */
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InStringT, typename MemberT = std::basic_string_view<typename InStringT::value_type>>
constexpr ContainerT<MemberT, ContainerArgsT...> split_view(const InStringT& in_string, typename InStringT::value_type in_delim) {
return split<ContainerT, MemberT, ContainerArgsT...>(in_string, in_delim);
}
/** Splits an input string into view substrings (same as split, but different default return type) */
template<typename InStringT, typename ResultT = std::vector<std::basic_string_view<typename InStringT::value_type>>>
constexpr ResultT split_view(const InStringT& in_string, typename InStringT::value_type in_delim) {
return split<InStringT, ResultT>(in_string, in_delim);
template<typename InStringT, typename ResultT = std::basic_string_view<typename InStringT::value_type>>
constexpr std::pair<ResultT, ResultT> split_once_view(const InStringT& in_string, typename InStringT::value_type in_delim) {
return split_once<InStringT, ResultT>(in_string, in_delim);
}
/** Splits an input string into a specified number of view substrings */
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InStringT, typename MemberT = std::basic_string_view<typename InStringT::value_type>>
constexpr auto split_n_view(const InStringT& in_string, typename InStringT::value_type in_delim, size_t in_limit) {
return split_n<ContainerT, MemberT, ContainerArgsT...>(in_string, in_delim, in_limit);
}
} // namespace jessilib

138
src/test/split.cpp

@ -20,6 +20,7 @@
#include <cassert>
#include <deque>
#include <list>
#include <random>
#include "test.hpp"
using namespace jessilib;
@ -54,12 +55,24 @@ public:
};
TYPED_TEST_SUITE(SplitViewStringTest, char_types);
template<typename T>
class SplitOnceTest : public ::testing::Test {
public:
};
TYPED_TEST_SUITE(SplitOnceTest, char_types);
template<typename T>
class SplitNTest : public ::testing::Test {
public:
};
TYPED_TEST_SUITE(SplitNTest, char_types);
template<typename T>
std::basic_string<T> make_word(size_t length = 8, T delim = static_cast<T>(0)) {
std::basic_string<T> result;
if (length == 0) {
throw std::runtime_error{ "length == 0" };
return {};
}
result.push_back(delim + 1);
@ -86,15 +99,58 @@ std::basic_string_view<T> make_word_view(size_t length = 8, T delim = static_cas
return s_result;
}
template<typename T>
struct RandomTestData {
RandomTestData(T in_delim)
: m_delim{ in_delim } {
operator()();
}
void operator()() {
m_tokens.clear();
m_str.clear();
std::mt19937 randgen(static_cast<unsigned int>(std::chrono::system_clock::now().time_since_epoch().count()));
std::uniform_int_distribution<uint32_t> word_count_distribution(5, 64);
std::uniform_int_distribution<uint32_t> word_length_distribution(0, 16);
auto random_words = word_count_distribution(randgen);
while (m_tokens.size() < random_words) {
m_tokens.push_back(make_word<T>(word_length_distribution(randgen)));
m_str += m_tokens.back();
if (m_tokens.size() < random_words) {
m_str += m_delim;
}
}
}
std::basic_string<T> get_remainder(size_t in_times_split) {
std::basic_string<T> result;
while (in_times_split < m_tokens.size()) {
result += m_tokens[in_times_split];
++in_times_split;
if (in_times_split < m_tokens.size()) {
result += m_delim;
}
}
return result;
}
T m_delim;
std::basic_string<T> m_str;
std::vector<std::basic_string<T>> m_tokens;
};
TYPED_TEST(SplitSVTest, empty) {
std::basic_string_view<TypeParam> empty;
std::vector<decltype(empty)> split_result = split(empty, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(empty, static_cast<TypeParam>(0));
EXPECT_TRUE(split_result.empty());
}
TYPED_TEST(SplitSVTest, single_word) {
std::basic_string_view<TypeParam> single_word = make_word_view<TypeParam>();
std::vector<decltype(single_word)> split_result = split(single_word, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(single_word, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 1);
EXPECT_EQ(split_result[0].size(), 8);
}
@ -103,7 +159,7 @@ TYPED_TEST(SplitSVTest, single_word_trailing_delim) {
auto word = make_word<TypeParam>();
word += static_cast<TypeParam>(0);
std::basic_string_view<TypeParam> single_word = word;
std::vector<decltype(single_word)> split_result = split(single_word, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(single_word, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 2);
EXPECT_EQ(split_result[0].size(), 8);
EXPECT_EQ(split_result[1].size(), 0);
@ -114,7 +170,7 @@ TYPED_TEST(SplitSVTest, single_word_prefix_delim) {
word += static_cast<TypeParam>(0);
word += make_word<TypeParam>();
std::basic_string_view<TypeParam> single_word = word;
std::vector<decltype(single_word)> split_result = split(single_word, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(single_word, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 2);
EXPECT_EQ(split_result[0].size(), 0);
EXPECT_EQ(split_result[1].size(), 8);
@ -126,7 +182,7 @@ TYPED_TEST(SplitSVTest, single_word_surround_delim) {
word += make_word<TypeParam>();
word += static_cast<TypeParam>(0);
std::basic_string_view<TypeParam> single_word = word;
std::vector<decltype(single_word)> split_result = split(single_word, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(single_word, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 3);
EXPECT_EQ(split_result[0].size(), 0);
EXPECT_EQ(split_result[1].size(), 8);
@ -138,7 +194,7 @@ TYPED_TEST(SplitSVTest, two_words) {
word += static_cast<TypeParam>(0);
word += make_word<TypeParam>();
std::basic_string_view<TypeParam> words = word;
std::vector<decltype(words)> split_result = split(words, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(words, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 2);
EXPECT_EQ(split_result[0].size(), 8);
EXPECT_EQ(split_result[1].size(), 8);
@ -151,14 +207,62 @@ TYPED_TEST(SplitSVTest, three_words) {
word += static_cast<TypeParam>(0);
word += make_word<TypeParam>(9);
std::basic_string_view<TypeParam> words = word;
std::vector<decltype(words)> split_result = split(words, static_cast<TypeParam>(0));
std::vector<std::basic_string<TypeParam>> split_result = split(words, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 3);
EXPECT_EQ(split_result[0].size(), 3);
EXPECT_EQ(split_result[1].size(), 5);
EXPECT_EQ(split_result[2].size(), 9);
}
/** std::string split test, really just testing compilation */
/** SplitOnceTest */
TYPED_TEST(SplitOnceTest, random) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
std::pair<std::basic_string<TypeParam>, std::basic_string<TypeParam>> split_result = split_once(data.m_str, data.m_delim);
EXPECT_EQ(split_result.first, data.m_tokens[0]);
EXPECT_EQ(split_result.second, data.get_remainder(1));
}
TYPED_TEST(SplitOnceTest, random_view) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
std::pair<std::basic_string_view<TypeParam>, std::basic_string_view<TypeParam>> split_result = split_once_view(data.m_str, data.m_delim);
EXPECT_EQ(split_result.first, data.m_tokens[0]);
EXPECT_EQ(split_result.second, data.get_remainder(1));
}
/** SplitNTest */
TYPED_TEST(SplitNTest, random) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
constexpr size_t n = 4;
std::vector<std::basic_string<TypeParam>> split_result = split_n(data.m_str, data.m_delim, n);
// Tokens shall be same up until last one (n + 1)
EXPECT_EQ(split_result.size(), n + 1);
for (size_t index = 0; index != n; ++index) {
EXPECT_EQ(split_result[index], data.m_tokens[index]);
}
EXPECT_EQ(split_result[n], data.get_remainder(n));
}
TYPED_TEST(SplitNTest, random_view) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
constexpr size_t n = 4;
std::vector<std::basic_string_view<TypeParam>> split_result = split_n_view(data.m_str, data.m_delim, n);
// Tokens shall be same up until last one (n + 1)
EXPECT_EQ(split_result.size(), n + 1);
for (size_t index = 0; index != n; ++index) {
EXPECT_EQ(split_result[index], data.m_tokens[index]);
}
EXPECT_EQ(split_result[n], data.get_remainder(n));
}
/** std::string split test, really just testing compilation and returned types */
TYPED_TEST(SplitSringTest, empty) {
std::basic_string<TypeParam> empty;
@ -173,17 +277,23 @@ TYPED_TEST(SplitSringTest, single_word) {
EXPECT_EQ(split_result[0].size(), 8);
}
TYPED_TEST(SplitSringTest, random) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
auto split_result = split(data.m_str, data.m_delim);
EXPECT_EQ(split_result, data.m_tokens);
}
/** Some basic tests for compiling with different containers */
TYPED_TEST(SplitSVTest, empty_deque) {
std::basic_string_view<TypeParam> empty;
std::deque<decltype(empty)> split_result = split<decltype(empty), std::deque<decltype(empty)>>(empty, static_cast<TypeParam>(0));
std::deque<std::basic_string<TypeParam>> split_result = split<std::deque>(empty, static_cast<TypeParam>(0));
EXPECT_TRUE(split_result.empty());
}
TYPED_TEST(SplitSVTest, empty_list) {
std::basic_string_view<TypeParam> empty;
std::list<decltype(empty)> split_result = split<decltype(empty), std::list<decltype(empty)>>(empty, static_cast<TypeParam>(0));
std::list<std::basic_string<TypeParam>> split_result = split<std::list>(empty, static_cast<TypeParam>(0));
EXPECT_TRUE(split_result.empty());
}
@ -212,3 +322,9 @@ TYPED_TEST(SplitViewStringTest, single_word) {
std::vector<std::basic_string_view<TypeParam>> split_result = split_view(single_word, static_cast<TypeParam>(0));
EXPECT_EQ(split_result.size(), 1);
}
TYPED_TEST(SplitViewStringTest, random) {
RandomTestData<TypeParam> data{ static_cast<TypeParam>(0) };
auto split_result = split_view(data.m_str, data.m_delim);
EXPECT_EQ(split_result.size(), data.m_tokens.size());
}

Loading…
Cancel
Save