mirror of https://github.com/JAJames/jessilib.git
Jessica James
3 years ago
5 changed files with 893 additions and 10 deletions
@ -0,0 +1,519 @@ |
|||
/**
|
|||
* Copyright (C) 2021 Jessica James. |
|||
* |
|||
* Permission to use, copy, modify, and/or distribute this software for any |
|||
* purpose with or without fee is hereby granted, provided that the above |
|||
* copyright notice and this permission notice appear in all copies. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
|||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
|||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|||
* |
|||
* Written by Jessica James <jessica.aj@outlook.com> |
|||
*/ |
|||
|
|||
/**
|
|||
* @file word_split.hpp |
|||
* @author Jessica James |
|||
* |
|||
* Over-engineered and over-genericized versions of word_split, word_split_once, and word_split_n, with lots of syntactical sugar |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include <string_view> |
|||
#include <vector> |
|||
|
|||
namespace jessilib { |
|||
|
|||
template<template<typename...> typename ContainerT, typename ElementT, typename...> |
|||
struct word_split_defaults { |
|||
using member_type = std::basic_string<ElementT>; |
|||
using container_type = ContainerT<member_type>; |
|||
}; |
|||
|
|||
template<template<typename...> typename ContainerT, typename ElementT, typename FirstOptional, typename... ContainerArgsT> |
|||
struct word_split_defaults<ContainerT, ElementT, FirstOptional, ContainerArgsT...> { |
|||
using member_type = FirstOptional; |
|||
using container_type = ContainerT<FirstOptional, ContainerArgsT...>; |
|||
}; |
|||
|
|||
// Can probably be specialized for types which don't take in iterators _or_
|
|||
template<typename MemberT, typename ItrT, typename EndT, typename std::enable_if<!std::is_constructible<MemberT, ItrT, EndT>::value>::type* = nullptr> |
|||
MemberT make_word_split_member(ItrT in_itr, EndT in_end) { |
|||
// Intended for string_view
|
|||
if constexpr (std::is_pointer_v<MemberT>) { |
|||
return { in_itr, static_cast<size_t>(in_end - in_itr) }; |
|||
} |
|||
|
|||
if (in_itr == in_end) { |
|||
return {}; |
|||
} |
|||
|
|||
return { &*in_itr, static_cast<size_t>(in_end - in_itr) }; |
|||
} |
|||
|
|||
template<typename MemberT, typename ItrT, typename EndT, typename std::enable_if<std::is_constructible<MemberT, ItrT, EndT>::value>::type* = nullptr> |
|||
MemberT make_word_split_member(ItrT in_itr, EndT in_end) { |
|||
// Can construct with iterators, so construct with iterators
|
|||
return { in_itr, in_end }; |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into substrings based on words |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace Whitespace to split upon |
|||
* @return Container populated with |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT> |
|||
constexpr auto word_split(ItrT begin, EndT end, ElementT in_whitespace) { |
|||
using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>; |
|||
using member_type = typename word_split_defaults_type::member_type; |
|||
using container_type = typename word_split_defaults_type::container_type; |
|||
|
|||
container_type result; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
// Skip over any preceeding whitespace
|
|||
while (begin != end |
|||
&& *begin == in_whitespace) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr != end;) { |
|||
if (*itr == in_whitespace) { |
|||
// End of word reached; push token to result and skip over any whitespace
|
|||
result.push_back(make_word_split_member<member_type>(begin, itr)); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& *itr == in_whitespace) { |
|||
++itr; |
|||
} |
|||
|
|||
begin = itr; |
|||
continue; |
|||
} |
|||
|
|||
++itr; |
|||
} |
|||
|
|||
// Push final token to the end if not empty
|
|||
if (begin != end) { |
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
} |
|||
|
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into substrings |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace_begin Start of range containing the whitespace values |
|||
* @param in_whitespace_end End of range containing the whitespace values |
|||
* @return Container populated with |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT> |
|||
constexpr auto word_split(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) { |
|||
using ElementT = std::remove_cvref_t<decltype(*begin)>; |
|||
using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>; |
|||
using member_type = typename word_split_defaults_type::member_type; |
|||
using container_type = typename word_split_defaults_type::container_type; |
|||
|
|||
if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { |
|||
return word_split<ContainerT, ContainerArgsT...>(begin, end, *in_whitespace_begin); |
|||
} |
|||
|
|||
container_type result{}; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
if (in_whitespace_begin >= in_whitespace_end) { |
|||
// Absent whitespace, therefore no match, therefore return input as single token
|
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
return result; |
|||
} |
|||
|
|||
auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { |
|||
return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; |
|||
}; |
|||
|
|||
while (begin != end |
|||
&& is_whitespace(*begin)) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr < end;) { |
|||
if (is_whitespace(*itr)) { |
|||
// Push token to result
|
|||
result.push_back(make_word_split_member<member_type>(begin, itr)); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& is_whitespace(*itr)) { |
|||
++itr; |
|||
} |
|||
|
|||
begin = itr; |
|||
continue; |
|||
} |
|||
|
|||
++itr; |
|||
} |
|||
|
|||
// Push final token to the end if not empty
|
|||
if (begin != end) { |
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
} |
|||
|
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into substrings |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param in_string String to split |
|||
* @param in_whitespace Whitespace to split upon |
|||
* @return Container populated with |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> |
|||
constexpr auto word_split(const InputT& in_string, typename InputT::value_type in_whitespace) { |
|||
return word_split<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace); |
|||
} |
|||
|
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT, |
|||
typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr> |
|||
constexpr auto word_split(const InputT& in_string, const SpaceT& in_whitespace) { |
|||
return word_split<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end()); |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into 2 substrings at and omitting any input whitespace. Returns: |
|||
* An empty pair if in_string is empty, |
|||
* otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, |
|||
* otherwise, a pair split at first instance of a string of whitespace |
|||
* |
|||
* @tparam ResultMemberT String type used to populate the result |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace Whitespace to split on |
|||
* @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` |
|||
*/ |
|||
template<typename... OptionalMemberT, typename ItrT, typename EndT, typename ElementT> |
|||
constexpr auto word_split_once(ItrT begin, EndT end, ElementT in_whitespace) { |
|||
static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); |
|||
using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>; |
|||
|
|||
std::pair<MemberT, MemberT> result; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
while (begin != end |
|||
&& *begin == in_whitespace) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr != end; ++itr) { |
|||
if (*itr == in_whitespace) { |
|||
// in_whitespace found; word_split upon it
|
|||
result.first = make_word_split_member<MemberT>(begin, itr); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& *itr == in_whitespace) { |
|||
++itr; |
|||
} |
|||
|
|||
result.second = make_word_split_member<MemberT>(itr, end); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
// in_whitespace not found
|
|||
result.first = make_word_split_member<MemberT>(begin, end); |
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into 2 substrings at and omitting any input whitespace. Returns: |
|||
* An empty pair if in_string is empty, |
|||
* otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, |
|||
* otherwise, a pair split at first instance of any whitespace |
|||
* |
|||
* @tparam ResultMemberT String type used to populate the result |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace_begin Start of range containing the whitespace values |
|||
* @param in_whitespace_end End of range containing the whitespace values |
|||
* @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` |
|||
*/ |
|||
template<typename... OptionalMemberT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT> |
|||
constexpr auto word_split_once(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) { |
|||
static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); |
|||
using ElementT = std::remove_cvref_t<decltype(*begin)>; |
|||
using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>; |
|||
|
|||
if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { |
|||
return word_split_once<OptionalMemberT...>(begin, end, *in_whitespace_begin); |
|||
} |
|||
|
|||
std::pair<MemberT, MemberT> result; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
if (in_whitespace_begin >= in_whitespace_end) { |
|||
// Absent whitespace, therefore no match, therefore return input as single token
|
|||
result.first = make_word_split_member<MemberT>(begin, end); |
|||
return result; |
|||
} |
|||
|
|||
auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { |
|||
return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; |
|||
}; |
|||
|
|||
// Skip over preceeding whitespace
|
|||
while (begin != end |
|||
&& is_whitespace(*begin)) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr < end;) { |
|||
if (is_whitespace(itr)) { |
|||
// in_whitespace found; word_split upon it
|
|||
result.first = make_word_split_member<MemberT>(begin, itr); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& is_whitespace(*itr)) { |
|||
++itr; |
|||
} |
|||
|
|||
result.second = make_word_split_member<MemberT>(itr, end); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
// in_whitespace not found
|
|||
result.first = make_word_split_member<MemberT>(begin, end); |
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits an input string into 2 substrings at and omitting any input whitespace. Returns: |
|||
* An empty pair if in_string is empty, |
|||
* otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`, |
|||
* otherwise, a pair split at first instance of any whitespace |
|||
* |
|||
* @tparam InputT String type being passed into split_once |
|||
* @tparam ResultMemberT String type used to populate the result |
|||
* @param in_string string to split |
|||
* @param in_whitespace Whitespace to split on |
|||
* @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last` |
|||
*/ |
|||
template<typename... OptionalMemberT, typename InputT> |
|||
constexpr auto word_split_once(const InputT& in_string, typename InputT::value_type in_whitespace) { |
|||
static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); |
|||
using ElementT = typename InputT::value_type; |
|||
using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>; |
|||
|
|||
return word_split_once<MemberT>(in_string.begin(), in_string.end(), in_whitespace); |
|||
} |
|||
|
|||
template<typename... OptionalMemberT, typename InputT, typename SpaceT, |
|||
typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr> |
|||
constexpr auto word_split_once(const InputT& in_string, const SpaceT& in_whitespace) { |
|||
static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT"); |
|||
return word_split_once<OptionalMemberT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end()); |
|||
} |
|||
|
|||
/**
|
|||
* Splits a range of elements up to a specified number of times and returns the result |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace Whitespace to split upon |
|||
* @param in_limit Maximum number of times to split |
|||
* @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT> |
|||
constexpr auto word_split_n(ItrT begin, EndT end, ElementT in_whitespace, size_t in_limit) { |
|||
using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>; |
|||
using member_type = typename word_split_defaults_type::member_type; |
|||
using container_type = typename word_split_defaults_type::container_type; |
|||
|
|||
container_type result; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
while (begin != end |
|||
&& *begin == in_whitespace) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr != end && in_limit != 0; ++itr) { |
|||
if (*itr == in_whitespace) { |
|||
// Push token to result
|
|||
result.push_back(make_word_split_member<member_type>(begin, itr)); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& *itr == in_whitespace) { |
|||
++itr; |
|||
} |
|||
|
|||
begin = itr; |
|||
--in_limit; |
|||
} |
|||
} |
|||
|
|||
// Push final token to the end if not empty
|
|||
if (begin != end) { |
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
} |
|||
|
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits a range of elements up to a specified number of times and returns the result |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param begin Start of range of elements to split |
|||
* @param end End of range of elements to split |
|||
* @param in_whitespace_begin Start of range containing the whitespace values |
|||
* @param in_whitespace_end End of range containing the whitespace values |
|||
* @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT> |
|||
constexpr auto word_split_n(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end, size_t in_limit) { |
|||
using ElementT = std::remove_cvref_t<decltype(*begin)>; |
|||
using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>; |
|||
using member_type = typename word_split_defaults_type::member_type; |
|||
using container_type = typename word_split_defaults_type::container_type; |
|||
|
|||
if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) { |
|||
return word_split_n<ContainerT, ContainerArgsT...>(begin, end, *in_whitespace_begin, in_limit); |
|||
} |
|||
|
|||
container_type result{}; |
|||
if (begin >= end) { |
|||
// Nothing to word_split
|
|||
return result; |
|||
} |
|||
|
|||
if (in_whitespace_begin >= in_whitespace_end) { |
|||
// Absent whitespace, therefore no match, therefore return input as single token
|
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
return result; |
|||
} |
|||
|
|||
auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) { |
|||
return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end; |
|||
}; |
|||
|
|||
while (begin != end |
|||
&& is_whitespace(*begin)) { |
|||
++begin; |
|||
} |
|||
|
|||
for (auto itr = begin; itr != end && in_limit != 0;) { |
|||
if (std::equal(in_whitespace_begin, in_whitespace_end, itr)) { |
|||
// Push token to result
|
|||
result.push_back(make_word_split_member<member_type>(begin, itr)); |
|||
|
|||
++itr; |
|||
while (itr != end |
|||
&& is_whitespace(*itr)) { |
|||
++itr; |
|||
} |
|||
|
|||
begin = itr; |
|||
--in_limit; |
|||
continue; |
|||
} |
|||
|
|||
++itr; |
|||
} |
|||
|
|||
// Push final token to the end; may be empty
|
|||
result.push_back(make_word_split_member<member_type>(begin, end)); |
|||
|
|||
return result; |
|||
} |
|||
|
|||
/**
|
|||
* Splits a string up to a specified number of times and returns the result |
|||
* |
|||
* @tparam ContainerT Container type to store the results in |
|||
* @tparam ContainerArgsT Optional template parameters for ContainerT |
|||
* @param in_string String to split |
|||
* @param in_whitespace Whitespace to split upon |
|||
* @param in_limit Maximum number of times to split |
|||
* @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder |
|||
*/ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT> |
|||
constexpr auto word_split_n(const InputT& in_string, typename InputT::value_type in_whitespace, size_t in_limit) { |
|||
return word_split_n<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace, in_limit); |
|||
} |
|||
|
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT, |
|||
typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr> |
|||
constexpr auto word_split_n(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) { |
|||
return word_split_n<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end(), in_limit); |
|||
} |
|||
|
|||
/** Splits an input string into view substrings; cannot specify element return type */ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT> |
|||
constexpr auto word_split_view(const InputT& in_string, const SpaceT& in_whitespace) { |
|||
using MemberT = std::basic_string_view<typename InputT::value_type>; |
|||
return word_split<ContainerT, MemberT, ContainerArgsT...>(in_string, in_whitespace); |
|||
} |
|||
|
|||
/** Splits an input string into view substring pair */ |
|||
template<typename... NothingT, typename InputT, typename SpaceT> |
|||
constexpr auto word_split_once_view(const InputT& in_string, const SpaceT& in_whitespace) { |
|||
// Using a static assertion instead of `auto` for inputs, because this is the only place it would be used, and it'd
|
|||
// require -fconcepts which isn't currently used. Replace inputs with `auto` later and remove template parameters
|
|||
// if -fconcepts is ever added.
|
|||
static_assert(sizeof...(NothingT) == 0, "word_split_once_view does not accept template parameters"); |
|||
using MemberT = std::basic_string_view<typename InputT::value_type>; |
|||
return word_split_once<MemberT>(in_string, in_whitespace); |
|||
} |
|||
|
|||
/** Splits an input string into a specified number of view substrings */ |
|||
template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, |
|||
typename InputT, typename SpaceT> |
|||
constexpr auto word_split_n_view(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) { |
|||
using MemberT = std::basic_string_view<typename InputT::value_type>; |
|||
return word_split_n<ContainerT, MemberT, ContainerArgsT...>(in_string, in_whitespace, in_limit); |
|||
} |
|||
|
|||
} // namespace jessilib
|
@ -0,0 +1,280 @@ |
|||
/**
|
|||
* Copyright (C) 2021 Jessica James. |
|||
* |
|||
* Permission to use, copy, modify, and/or distribute this software for any |
|||
* purpose with or without fee is hereby granted, provided that the above |
|||
* copyright notice and this permission notice appear in all copies. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
|||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
|||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|||
* |
|||
* Written by Jessica James <jessica.aj@outlook.com> |
|||
*/ |
|||
|
|||
#include "jessilib/word_split.hpp" |
|||
#include "test_split.hpp" |
|||
|
|||
using namespace jessilib; |
|||
using namespace std::literals; |
|||
|
|||
TEST(jessi, lazy) { |
|||
std::string sentence = " this is a sentence "; |
|||
auto split_result = word_split(sentence, ' '); |
|||
EXPECT_EQ(split_result.size(), 4); |
|||
EXPECT_EQ(split_result[0], "this"); |
|||
EXPECT_EQ(split_result[1], "is"); |
|||
EXPECT_EQ(split_result[2], "a"); |
|||
EXPECT_EQ(split_result[3], "sentence"); |
|||
} |
|||
|
|||
using char_types = ::testing::Types<char, unsigned char, signed char, wchar_t, char8_t, char16_t, char32_t>; |
|||
|
|||
template<typename T> |
|||
class WordSplitSVTest : public ::testing::Test { |
|||
public: |
|||
}; |
|||
TYPED_TEST_SUITE(WordSplitSVTest, char_types); |
|||
|
|||
template<typename T> |
|||
class WordSplitStringTest : public ::testing::Test { |
|||
public: |
|||
}; |
|||
TYPED_TEST_SUITE(WordSplitStringTest, char_types); |
|||
|
|||
template<typename T> |
|||
class WordSplitOnceTest : public ::testing::Test { |
|||
public: |
|||
}; |
|||
TYPED_TEST_SUITE(WordSplitOnceTest, char_types); |
|||
|
|||
template<typename T> |
|||
class WordSplitNTest : public ::testing::Test { |
|||
public: |
|||
}; |
|||
TYPED_TEST_SUITE(WordSplitNTest, char_types); |
|||
|
|||
TYPED_TEST(WordSplitSVTest, empty) { |
|||
std::basic_string_view<TypeParam> empty; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(empty, default_delim<TypeParam>); |
|||
EXPECT_TRUE(split_result.empty()); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, single_word) { |
|||
std::basic_string_view<TypeParam> single_word = make_word_view<TypeParam>(); |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, single_word_trailing_delim) { |
|||
auto word = make_word<TypeParam>(); |
|||
word += default_delim<TypeParam>; |
|||
std::basic_string_view<TypeParam> single_word = word; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, single_word_prefix_delim) { |
|||
std::basic_string<TypeParam> word; |
|||
word += default_delim<TypeParam>; |
|||
word += make_word<TypeParam>(); |
|||
std::basic_string_view<TypeParam> single_word = word; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, single_word_surround_delim) { |
|||
std::basic_string<TypeParam> word; |
|||
word += default_delim<TypeParam>; |
|||
word += make_word<TypeParam>(); |
|||
word += default_delim<TypeParam>; |
|||
std::basic_string_view<TypeParam> single_word = word; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, two_words) { |
|||
auto word = make_word<TypeParam>(); |
|||
word += default_delim<TypeParam>; |
|||
word += make_word<TypeParam>(); |
|||
std::basic_string_view<TypeParam> words = word; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(words, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 2); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
EXPECT_EQ(split_result[1].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitSVTest, three_words) { |
|||
auto word = make_word<TypeParam>(3); |
|||
word += default_delim<TypeParam>; |
|||
word += make_word<TypeParam>(5); |
|||
word += default_delim<TypeParam>; |
|||
word += default_delim<TypeParam>; |
|||
word += make_word<TypeParam>(9); |
|||
std::basic_string_view<TypeParam> words = word; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(words, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 3); |
|||
EXPECT_EQ(split_result[0].size(), 3); |
|||
EXPECT_EQ(split_result[1].size(), 5); |
|||
EXPECT_EQ(split_result[2].size(), 9); |
|||
} |
|||
|
|||
/** WordSplitOnceTest */ |
|||
|
|||
TYPED_TEST(WordSplitOnceTest, random) { |
|||
RandomWordTestData<TypeParam> data{}; |
|||
std::pair<std::basic_string<TypeParam>, std::basic_string<TypeParam>> split_result = word_split_once(data.m_str, default_delim<TypeParam>); |
|||
|
|||
EXPECT_EQ(split_result.first, data.m_tokens[0]); |
|||
EXPECT_EQ(split_result.second, data.get_remainder(1)); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitOnceTest, random_vector) { |
|||
using vector_type = std::vector<TypeParam>; |
|||
RandomWordTestData<TypeParam, vector_type> data{}; |
|||
std::pair<vector_type, vector_type> split_result = word_split_once<vector_type>(data.m_str, default_delim<TypeParam>); |
|||
|
|||
EXPECT_EQ(split_result.first, data.m_tokens[0]); |
|||
EXPECT_EQ(split_result.second, data.get_remainder(1)); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitOnceTest, random_view) { |
|||
RandomWordTestData<TypeParam> data{}; |
|||
std::pair<std::basic_string_view<TypeParam>, std::basic_string_view<TypeParam>> split_result = word_split_once_view(data.m_str, default_delim<TypeParam>); |
|||
|
|||
EXPECT_EQ(split_result.first, data.m_tokens[0]); |
|||
EXPECT_EQ(split_result.second, data.get_remainder(1)); |
|||
} |
|||
|
|||
/** SplitNTest */ |
|||
|
|||
TYPED_TEST(WordSplitNTest, random) { |
|||
RandomWordTestData<TypeParam> data{}; |
|||
constexpr size_t n = 4; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split_n(data.m_str, default_delim<TypeParam>, n); |
|||
|
|||
// Tokens shall be same up until last one (n + 1)
|
|||
EXPECT_EQ(split_result.size(), n + 1); |
|||
for (size_t index = 0; index != n; ++index) { |
|||
EXPECT_EQ(split_result[index], data.m_tokens[index]); |
|||
} |
|||
|
|||
EXPECT_EQ(split_result[n], data.get_remainder(n)); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitNTest, random_vector) { |
|||
RandomWordTestData<TypeParam, std::vector<TypeParam>> data{}; |
|||
constexpr size_t n = 4; |
|||
std::vector<std::vector<TypeParam>> split_result = word_split_n<std::vector, std::vector<TypeParam>>(data.m_str, default_delim<TypeParam>, n); |
|||
|
|||
// Tokens shall be same up until last one (n + 1)
|
|||
EXPECT_EQ(split_result.size(), n + 1); |
|||
for (size_t index = 0; index != n; ++index) { |
|||
EXPECT_EQ(split_result[index], data.m_tokens[index]); |
|||
} |
|||
|
|||
EXPECT_EQ(split_result[n], data.get_remainder(n)); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitNTest, random_view) { |
|||
RandomWordTestData<TypeParam> data{}; |
|||
constexpr size_t n = 4; |
|||
std::vector<std::basic_string_view<TypeParam>> split_result = word_split_n_view(data.m_str, default_delim<TypeParam>, n); |
|||
|
|||
// Tokens shall be same up until last one (n + 1)
|
|||
EXPECT_EQ(split_result.size(), n + 1); |
|||
for (size_t index = 0; index != n; ++index) { |
|||
EXPECT_EQ(split_result[index], data.m_tokens[index]); |
|||
} |
|||
|
|||
EXPECT_EQ(split_result[n], data.get_remainder(n)); |
|||
} |
|||
|
|||
/** std::string word_split test, really just testing compilation and returned types */ |
|||
|
|||
TYPED_TEST(WordSplitStringTest, empty) { |
|||
std::basic_string<TypeParam> empty; |
|||
std::vector<decltype(empty)> split_result = word_split(empty, default_delim<TypeParam>); |
|||
EXPECT_TRUE(split_result.empty()); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, empty_long) { |
|||
std::basic_string<TypeParam> empty; |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
std::vector<decltype(empty)> split_result = word_split(empty, delim); |
|||
EXPECT_TRUE(split_result.empty()); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, single_word) { |
|||
std::basic_string<TypeParam> single_word = make_word<TypeParam>(); |
|||
std::vector<decltype(single_word)> split_result = word_split(single_word, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, single_word_long) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
std::basic_string<TypeParam> single_word = make_word<TypeParam>(delim); |
|||
std::vector<decltype(single_word)> split_result = word_split(single_word, delim); |
|||
EXPECT_EQ(split_result.size(), 1); |
|||
EXPECT_EQ(split_result[0].size(), 8); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random) { |
|||
RandomWordTestData<TypeParam> data{}; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_long) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
RandomWordTestData<TypeParam> data{ delim }; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_vector) { |
|||
RandomWordTestData<TypeParam, std::vector<TypeParam>> data{}; |
|||
std::vector<std::vector<TypeParam>> split_result = word_split<std::vector, std::vector<TypeParam>>(data.m_str, default_delim<TypeParam>); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_long_trailing_delim) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
RandomWordTestData<TypeParam> data{ delim }; |
|||
data.m_str += delim; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_long_prefix_delim) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
RandomWordTestData<TypeParam> data{ delim }; |
|||
data.m_str = delim + data.m_str; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_long_trailing_two_delim) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
RandomWordTestData<TypeParam> data{ delim }; |
|||
data.m_str += delim + delim; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
|||
|
|||
TYPED_TEST(WordSplitStringTest, random_long_prefix_two_delim) { |
|||
auto delim = make_delim_long<TypeParam>(8); |
|||
RandomWordTestData<TypeParam> data{ delim }; |
|||
data.m_str = delim + delim + data.m_str; |
|||
std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim); |
|||
EXPECT_EQ(split_result, data.m_tokens); |
|||
} |
Loading…
Reference in new issue