Add word_split, word_split_once, and word_split_n_once, taking in whitespace instead of a delimiter

3 years ago · 157a3cac03
5 changed files with 893 additions and 10 deletions
--- a/src/include/jessilib/split.hpp
+++ b/src/include/jessilib/split.hpp
@ -336,7 +336,6 @@ constexpr auto split_n(ItrT begin, EndT end, ElementT in_delim, size_t in_limit)
 * @tparam ContainerArgsT Optional template parameters for ContainerT
 * @param begin Start of range of elements to split
 * @param end End of range of elements to split
- * @param in_delim Delimiter to split upon
 * @param in_delim_begin Start of range containing the delimiter
 * @param in_delim_end End of range containing the delimiter
 * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder
--- a/src/include/jessilib/word_split.hpp
+++ b/src/include/jessilib/word_split.hpp
@ -0,0 +1,519 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+/**
+ * @file word_split.hpp
+ * @author Jessica James
+ *
+ * Over-engineered and over-genericized versions of word_split, word_split_once, and word_split_n, with lots of syntactical sugar
+ */
+
+#pragma once
+
+#include <string_view>
+#include <vector>
+
+namespace jessilib {
+
+template<template<typename...> typename ContainerT, typename ElementT, typename...>
+struct word_split_defaults {
+	using member_type = std::basic_string<ElementT>;
+	using container_type = ContainerT<member_type>;
+};
+
+template<template<typename...> typename ContainerT, typename ElementT, typename FirstOptional, typename... ContainerArgsT>
+struct word_split_defaults<ContainerT, ElementT, FirstOptional, ContainerArgsT...> {
+	using member_type = FirstOptional;
+	using container_type = ContainerT<FirstOptional, ContainerArgsT...>;
+};
+
+// Can probably be specialized for types which don't take in iterators _or_
+template<typename MemberT, typename ItrT, typename EndT, typename std::enable_if<!std::is_constructible<MemberT, ItrT, EndT>::value>::type* = nullptr>
+MemberT make_word_split_member(ItrT in_itr, EndT in_end) {
+	// Intended for string_view
+	if constexpr (std::is_pointer_v<MemberT>) {
+		return { in_itr, static_cast<size_t>(in_end - in_itr) };
+	}
+
+	if (in_itr == in_end) {
+		return {};
+	}
+
+	return { &*in_itr, static_cast<size_t>(in_end - in_itr) };
+}
+
+template<typename MemberT, typename ItrT, typename EndT, typename std::enable_if<std::is_constructible<MemberT, ItrT, EndT>::value>::type* = nullptr>
+MemberT make_word_split_member(ItrT in_itr, EndT in_end) {
+	// Can construct with iterators, so construct with iterators
+	return { in_itr, in_end };
+}
+
+/**
+ * Splits an input string into substrings based on words
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace Whitespace to split upon
+ * @return Container populated with
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT>
+constexpr auto word_split(ItrT begin, EndT end, ElementT in_whitespace) {
+	using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>;
+	using member_type = typename word_split_defaults_type::member_type;
+	using container_type = typename word_split_defaults_type::container_type;
+
+	container_type result;
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	// Skip over any preceeding whitespace
+	while (begin != end
+		&& *begin == in_whitespace) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr != end;) {
+		if (*itr == in_whitespace) {
+			// End of word reached; push token to result and skip over any whitespace
+			result.push_back(make_word_split_member<member_type>(begin, itr));
+
+			++itr;
+			while (itr != end
+				&& *itr == in_whitespace) {
+				++itr;
+			}
+
+			begin = itr;
+			continue;
+		}
+
+		++itr;
+	}
+
+	// Push final token to the end if not empty
+	if (begin != end) {
+		result.push_back(make_word_split_member<member_type>(begin, end));
+	}
+
+	return result;
+}
+
+/**
+ * Splits an input string into substrings
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace_begin Start of range containing the whitespace values
+ * @param in_whitespace_end End of range containing the whitespace values
+ * @return Container populated with
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT>
+constexpr auto word_split(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) {
+	using ElementT = std::remove_cvref_t<decltype(*begin)>;
+	using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>;
+	using member_type = typename word_split_defaults_type::member_type;
+	using container_type = typename word_split_defaults_type::container_type;
+
+	if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) {
+		return word_split<ContainerT, ContainerArgsT...>(begin, end, *in_whitespace_begin);
+	}
+
+	container_type result{};
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	if (in_whitespace_begin >= in_whitespace_end) {
+		// Absent whitespace, therefore no match, therefore return input as single token
+		result.push_back(make_word_split_member<member_type>(begin, end));
+		return result;
+	}
+
+	auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) {
+		return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end;
+	};
+
+	while (begin != end
+		&& is_whitespace(*begin)) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr < end;) {
+		if (is_whitespace(*itr)) {
+			// Push token to result
+			result.push_back(make_word_split_member<member_type>(begin, itr));
+
+			++itr;
+			while (itr != end
+				&& is_whitespace(*itr)) {
+				++itr;
+			}
+
+			begin = itr;
+			continue;
+		}
+
+		++itr;
+	}
+
+	// Push final token to the end if not empty
+	if (begin != end) {
+		result.push_back(make_word_split_member<member_type>(begin, end));
+	}
+
+	return result;
+}
+
+/**
+ * Splits an input string into substrings
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param in_string String to split
+ * @param in_whitespace Whitespace to split upon
+ * @return Container populated with
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT>
+constexpr auto word_split(const InputT& in_string, typename InputT::value_type in_whitespace) {
+	return word_split<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace);
+}
+
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT,
+	typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr>
+constexpr auto word_split(const InputT& in_string, const SpaceT& in_whitespace) {
+	return word_split<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end());
+}
+
+/**
+ * Splits an input string into 2 substrings at and omitting any input whitespace. Returns:
+ * An empty pair if in_string is empty,
+ * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`,
+ * otherwise, a pair split at first instance of a string of whitespace
+ *
+ * @tparam ResultMemberT String type used to populate the result
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace Whitespace to split on
+ * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last`
+ */
+template<typename... OptionalMemberT, typename ItrT, typename EndT, typename ElementT>
+constexpr auto word_split_once(ItrT begin, EndT end, ElementT in_whitespace) {
+	static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT");
+	using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>;
+
+	std::pair<MemberT, MemberT> result;
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	while (begin != end
+		&& *begin == in_whitespace) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr != end; ++itr) {
+		if (*itr == in_whitespace) {
+			// in_whitespace found; word_split upon it
+			result.first = make_word_split_member<MemberT>(begin, itr);
+
+			++itr;
+			while (itr != end
+				&& *itr == in_whitespace) {
+				++itr;
+			}
+
+			result.second = make_word_split_member<MemberT>(itr, end);
+			return result;
+		}
+	}
+
+	// in_whitespace not found
+	result.first = make_word_split_member<MemberT>(begin, end);
+	return result;
+}
+
+/**
+ * Splits an input string into 2 substrings at and omitting any input whitespace. Returns:
+ * An empty pair if in_string is empty,
+ * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`,
+ * otherwise, a pair split at first instance of any whitespace
+ *
+ * @tparam ResultMemberT String type used to populate the result
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace_begin Start of range containing the whitespace values
+ * @param in_whitespace_end End of range containing the whitespace values
+ * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last`
+ */
+template<typename... OptionalMemberT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT>
+constexpr auto word_split_once(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end) {
+	static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT");
+	using ElementT = std::remove_cvref_t<decltype(*begin)>;
+	using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>;
+
+	if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) {
+		return word_split_once<OptionalMemberT...>(begin, end, *in_whitespace_begin);
+	}
+
+	std::pair<MemberT, MemberT> result;
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	if (in_whitespace_begin >= in_whitespace_end) {
+		// Absent whitespace, therefore no match, therefore return input as single token
+		result.first = make_word_split_member<MemberT>(begin, end);
+		return result;
+	}
+
+	auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) {
+		return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end;
+	};
+
+	// Skip over preceeding whitespace
+	while (begin != end
+		&& is_whitespace(*begin)) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr < end;) {
+		if (is_whitespace(itr)) {
+			// in_whitespace found; word_split upon it
+			result.first = make_word_split_member<MemberT>(begin, itr);
+
+			++itr;
+			while (itr != end
+				&& is_whitespace(*itr)) {
+				++itr;
+			}
+
+			result.second = make_word_split_member<MemberT>(itr, end);
+			return result;
+		}
+	}
+
+	// in_whitespace not found
+	result.first = make_word_split_member<MemberT>(begin, end);
+	return result;
+}
+
+/**
+ * Splits an input string into 2 substrings at and omitting any input whitespace. Returns:
+ * An empty pair if in_string is empty,
+ * otherwise if whitespace is not present, a pair who's `second` member is empty and `first` member is equal to `in_string`,
+ * otherwise, a pair split at first instance of any whitespace
+ *
+ * @tparam InputT String type being passed into split_once
+ * @tparam ResultMemberT String type used to populate the result
+ * @param in_string string to split
+ * @param in_whitespace Whitespace to split on
+ * @return A pair representing `in_string` split at some whitespace, with first word stored in `first` and rest of sentence in `last`
+ */
+template<typename... OptionalMemberT, typename InputT>
+constexpr auto word_split_once(const InputT& in_string, typename InputT::value_type in_whitespace) {
+	static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT");
+	using ElementT = typename InputT::value_type;
+	using MemberT = std::tuple_element_t<0, std::tuple<OptionalMemberT..., std::basic_string<ElementT>>>;
+
+	return word_split_once<MemberT>(in_string.begin(), in_string.end(), in_whitespace);
+}
+
+template<typename... OptionalMemberT, typename InputT, typename SpaceT,
+	typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr>
+constexpr auto word_split_once(const InputT& in_string, const SpaceT& in_whitespace) {
+	static_assert(sizeof...(OptionalMemberT) <= 1, "Too many member types specified for OptionalMemberT");
+	return word_split_once<OptionalMemberT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end());
+}
+
+/**
+ * Splits a range of elements up to a specified number of times and returns the result
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace Whitespace to split upon
+ * @param in_limit Maximum number of times to split
+ * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename ElementT>
+constexpr auto word_split_n(ItrT begin, EndT end, ElementT in_whitespace, size_t in_limit) {
+	using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>;
+	using member_type = typename word_split_defaults_type::member_type;
+	using container_type = typename word_split_defaults_type::container_type;
+
+	container_type result;
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	while (begin != end
+		&& *begin == in_whitespace) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr != end && in_limit != 0; ++itr) {
+		if (*itr == in_whitespace) {
+			// Push token to result
+			result.push_back(make_word_split_member<member_type>(begin, itr));
+
+			++itr;
+			while (itr != end
+				&& *itr == in_whitespace) {
+				++itr;
+			}
+
+			begin = itr;
+			--in_limit;
+		}
+	}
+
+	// Push final token to the end if not empty
+	if (begin != end) {
+		result.push_back(make_word_split_member<member_type>(begin, end));
+	}
+
+	return result;
+}
+
+/**
+ * Splits a range of elements up to a specified number of times and returns the result
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param begin Start of range of elements to split
+ * @param end End of range of elements to split
+ * @param in_whitespace_begin Start of range containing the whitespace values
+ * @param in_whitespace_end End of range containing the whitespace values
+ * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename ItrT, typename EndT, typename SpaceItrT, typename SpaceEndT>
+constexpr auto word_split_n(ItrT begin, EndT end, SpaceItrT in_whitespace_begin, SpaceEndT in_whitespace_end, size_t in_limit) {
+	using ElementT = std::remove_cvref_t<decltype(*begin)>;
+	using word_split_defaults_type = word_split_defaults<ContainerT, ElementT, ContainerArgsT...>;
+	using member_type = typename word_split_defaults_type::member_type;
+	using container_type = typename word_split_defaults_type::container_type;
+
+	if (std::distance(in_whitespace_begin, in_whitespace_end) == 1) {
+		return word_split_n<ContainerT, ContainerArgsT...>(begin, end, *in_whitespace_begin, in_limit);
+	}
+
+	container_type result{};
+	if (begin >= end) {
+		// Nothing to word_split
+		return result;
+	}
+
+	if (in_whitespace_begin >= in_whitespace_end) {
+		// Absent whitespace, therefore no match, therefore return input as single token
+		result.push_back(make_word_split_member<member_type>(begin, end));
+		return result;
+	}
+
+	auto is_whitespace = [in_whitespace_begin, in_whitespace_end](ElementT in_element) {
+		return std::find(in_whitespace_begin, in_whitespace_end, in_element) != in_whitespace_end;
+	};
+
+	while (begin != end
+		&& is_whitespace(*begin)) {
+		++begin;
+	}
+
+	for (auto itr = begin; itr != end && in_limit != 0;) {
+		if (std::equal(in_whitespace_begin, in_whitespace_end, itr)) {
+			// Push token to result
+			result.push_back(make_word_split_member<member_type>(begin, itr));
+
+			++itr;
+			while (itr != end
+				&& is_whitespace(*itr)) {
+				++itr;
+			}
+
+			begin = itr;
+			--in_limit;
+			continue;
+		}
+
+		++itr;
+	}
+
+	// Push final token to the end; may be empty
+	result.push_back(make_word_split_member<member_type>(begin, end));
+
+	return result;
+}
+
+/**
+ * Splits a string up to a specified number of times and returns the result
+ *
+ * @tparam ContainerT Container type to store the results in
+ * @tparam ContainerArgsT Optional template parameters for ContainerT
+ * @param in_string String to split
+ * @param in_whitespace Whitespace to split upon
+ * @param in_limit Maximum number of times to split
+ * @return Container containing to up `in_limit` + 1 substrings; result[in_limit] is the unprocessed remainder
+ */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT>
+constexpr auto word_split_n(const InputT& in_string, typename InputT::value_type in_whitespace, size_t in_limit) {
+	return word_split_n<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace, in_limit);
+}
+
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT,
+	typename std::enable_if<!std::is_same<SpaceT, typename InputT::value_type>::value>::type* = nullptr>
+constexpr auto word_split_n(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) {
+	return word_split_n<ContainerT, ContainerArgsT...>(in_string.begin(), in_string.end(), in_whitespace.begin(), in_whitespace.end(), in_limit);
+}
+
+/** Splits an input string into view substrings; cannot specify element return type */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT, typename InputT, typename SpaceT>
+constexpr auto word_split_view(const InputT& in_string, const SpaceT& in_whitespace) {
+	using MemberT = std::basic_string_view<typename InputT::value_type>;
+	return word_split<ContainerT, MemberT, ContainerArgsT...>(in_string, in_whitespace);
+}
+
+/** Splits an input string into view substring pair */
+template<typename... NothingT, typename InputT, typename SpaceT>
+constexpr auto word_split_once_view(const InputT& in_string, const SpaceT& in_whitespace) {
+	// Using a static assertion instead of `auto` for inputs, because this is the only place it would be used, and it'd
+	// require -fconcepts which isn't currently used. Replace inputs with `auto` later and remove template parameters
+	// if -fconcepts is ever added.
+	static_assert(sizeof...(NothingT) == 0, "word_split_once_view does not accept template parameters");
+	using MemberT = std::basic_string_view<typename InputT::value_type>;
+	return word_split_once<MemberT>(in_string, in_whitespace);
+}
+
+/** Splits an input string into a specified number of view substrings */
+template<template<typename...> typename ContainerT = std::vector, typename... ContainerArgsT,
+	typename InputT, typename SpaceT>
+constexpr auto word_split_n_view(const InputT& in_string, const SpaceT& in_whitespace, size_t in_limit) {
+	using MemberT = std::basic_string_view<typename InputT::value_type>;
+	return word_split_n<ContainerT, MemberT, ContainerArgsT...>(in_string, in_whitespace, in_limit);
+}
+
+} // namespace jessilib
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -1,6 +1,6 @@
 # Setup source files
 set(SOURCE_FILES
-        timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp)
+        timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp)

 # Setup gtest
 set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
--- a/src/test/test_split.hpp
+++ b/src/test/test_split.hpp
@ -25,18 +25,31 @@
 template<typename T>
 constexpr T default_delim{};

-template<typename T, typename ResultT = std::basic_string<T>>
-ResultT make_word(size_t length = 8, T delim = default_delim<T>) {
+template<typename T, typename ResultT = std::basic_string<T>, typename DelimT,
+	typename std::enable_if_t<!std::is_scalar_v<DelimT>>* = nullptr>
+ResultT make_word(DelimT delim, size_t length = 8) {
 	ResultT result;

 	if (length == 0) {
-		return {};
+		return result;
+	}
+
+	if (delim.size() == 0) {
+		delim.push_back(default_delim<T>);
+	}
+
+	// Add initial character
+	{
+		auto chr = delim.back() + 1;
+		while (std::find(delim.begin(), delim.end(), chr) != delim.end()) {
+			++chr;
+		}
+		result.push_back(chr);
 	}

-	result.push_back(delim + 1);
 	while (result.size() < length) {
 		auto chr = result.back() + 1;
-		if (chr == delim) {
+		while (std::find(delim.begin(), delim.end(), chr) != delim.end()) {
 			++chr;
 		}
 		result.push_back(chr);
@ -50,6 +63,11 @@ ResultT make_word(size_t length = 8, T delim = default_delim<T>) {
 	return result;
 }

+template<typename T, typename ResultT = std::basic_string<T>>
+ResultT make_word(size_t length = 8, T delim = default_delim<T>) {
+	return make_word<T, ResultT>(std::basic_string<T>{ delim }, length);
+}
+
 template<typename T, typename ResultT = std::basic_string<T>>
 ResultT make_delim_long(size_t length = 8, T in_delim = default_delim<T>) {
 	// in this context, in_delim should be whatever was previously passed to make_word
@ -74,17 +92,17 @@ struct RandomTestData {
 		: m_fixed_word_count{ in_fixed_word_count },
 		m_fixed_word_length{ in_fixed_word_length } {
 		m_delim.insert(m_delim.end(), in_delim);
-		operator()();
+		populate();
 	}

 	RandomTestData(StringT in_delim, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0)
 		: m_delim{ in_delim },
 		m_fixed_word_count{ in_fixed_word_count },
 		m_fixed_word_length{ in_fixed_word_length } {
-		operator()();
+		populate();
 	}

-	void operator()() {
+	void populate() {
 		m_tokens.clear();
 		m_str.clear();

@ -133,3 +151,70 @@ struct RandomTestData {
 	StringT m_str;
 	std::vector<StringT> m_tokens;
 };
+
+template<typename T, typename StringT = std::basic_string<T>>
+struct RandomWordTestData {
+	RandomWordTestData(T in_delim = default_delim<T>, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0)
+		: m_fixed_word_count{ in_fixed_word_count },
+		m_fixed_word_length{ in_fixed_word_length } {
+		m_delim.insert(m_delim.end(), in_delim);
+		populate();
+	}
+
+	RandomWordTestData(StringT in_delim, size_t in_fixed_word_count = 0, size_t in_fixed_word_length = 0)
+	: m_delim{ in_delim },
+	m_fixed_word_count{ in_fixed_word_count },
+	m_fixed_word_length{ in_fixed_word_length } {
+		populate();
+	}
+
+	void populate() {
+		m_tokens.clear();
+		m_str.clear();
+
+		std::mt19937 randgen(static_cast<unsigned int>(std::chrono::system_clock::now().time_since_epoch().count()));
+		std::uniform_int_distribution<uint32_t> word_count_distribution(5, 64);
+		std::uniform_int_distribution<uint32_t> word_length_distribution(0, 16);
+
+		auto word_count = m_fixed_word_count;
+		if (word_count == 0) {
+			word_count = word_count_distribution(randgen);
+		}
+
+		while (m_tokens.size() < word_count) {
+			auto word_length = m_fixed_word_length;
+			if (word_length == 0) {
+				word_length = word_length_distribution(randgen);
+			}
+			m_tokens.push_back(make_word<T, StringT>(m_delim, word_length));
+			if (m_tokens.back().empty()) {
+				m_tokens.pop_back();
+			}
+			else {
+				m_token_indexes.push_back(m_str.size());
+				m_str.insert(m_str.end(), m_tokens.back().begin(), m_tokens.back().end());
+			}
+			m_str.insert(m_str.end(), m_delim.begin(), m_delim.end());
+		}
+	}
+
+	StringT get_remainder(size_t in_times_split) {
+		StringT result;
+		if (in_times_split < m_tokens.size()) {
+			auto index = m_token_indexes[in_times_split];
+			result.insert(result.end(), m_str.begin() + index, m_str.end());
+		}
+
+		return result;
+	}
+
+	// Inputs
+	StringT m_delim;
+	size_t m_fixed_word_count{};
+	size_t m_fixed_word_length{};
+
+	// Outputs
+	StringT m_str;
+	std::vector<StringT> m_tokens;
+	std::vector<size_t> m_token_indexes;
+};
--- a/src/test/word_split.cpp
+++ b/src/test/word_split.cpp
@ -0,0 +1,280 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+#include "jessilib/word_split.hpp"
+#include "test_split.hpp"
+
+using namespace jessilib;
+using namespace std::literals;
+
+TEST(jessi, lazy) {
+	std::string sentence = "   this           is a    sentence ";
+	auto split_result = word_split(sentence, ' ');
+	EXPECT_EQ(split_result.size(), 4);
+	EXPECT_EQ(split_result[0], "this");
+	EXPECT_EQ(split_result[1], "is");
+	EXPECT_EQ(split_result[2], "a");
+	EXPECT_EQ(split_result[3], "sentence");
+}
+
+using char_types = ::testing::Types<char, unsigned char, signed char, wchar_t, char8_t, char16_t, char32_t>;
+
+template<typename T>
+class WordSplitSVTest : public ::testing::Test {
+public:
+};
+TYPED_TEST_SUITE(WordSplitSVTest, char_types);
+
+template<typename T>
+class WordSplitStringTest : public ::testing::Test {
+public:
+};
+TYPED_TEST_SUITE(WordSplitStringTest, char_types);
+
+template<typename T>
+class WordSplitOnceTest : public ::testing::Test {
+public:
+};
+TYPED_TEST_SUITE(WordSplitOnceTest, char_types);
+
+template<typename T>
+class WordSplitNTest : public ::testing::Test {
+public:
+};
+TYPED_TEST_SUITE(WordSplitNTest, char_types);
+
+TYPED_TEST(WordSplitSVTest, empty) {
+	std::basic_string_view<TypeParam> empty;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(empty, default_delim<TypeParam>);
+	EXPECT_TRUE(split_result.empty());
+}
+
+TYPED_TEST(WordSplitSVTest, single_word) {
+	std::basic_string_view<TypeParam> single_word = make_word_view<TypeParam>();
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitSVTest, single_word_trailing_delim) {
+	auto word = make_word<TypeParam>();
+	word += default_delim<TypeParam>;
+	std::basic_string_view<TypeParam> single_word = word;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitSVTest, single_word_prefix_delim) {
+	std::basic_string<TypeParam> word;
+	word += default_delim<TypeParam>;
+	word += make_word<TypeParam>();
+	std::basic_string_view<TypeParam> single_word = word;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitSVTest, single_word_surround_delim) {
+	std::basic_string<TypeParam> word;
+	word += default_delim<TypeParam>;
+	word += make_word<TypeParam>();
+	word += default_delim<TypeParam>;
+	std::basic_string_view<TypeParam> single_word = word;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(single_word, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitSVTest, two_words) {
+	auto word = make_word<TypeParam>();
+	word += default_delim<TypeParam>;
+	word += make_word<TypeParam>();
+	std::basic_string_view<TypeParam> words = word;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(words, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 2);
+	EXPECT_EQ(split_result[0].size(), 8);
+	EXPECT_EQ(split_result[1].size(), 8);
+}
+
+TYPED_TEST(WordSplitSVTest, three_words) {
+	auto word = make_word<TypeParam>(3);
+	word += default_delim<TypeParam>;
+	word += make_word<TypeParam>(5);
+	word += default_delim<TypeParam>;
+	word += default_delim<TypeParam>;
+	word += make_word<TypeParam>(9);
+	std::basic_string_view<TypeParam> words = word;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(words, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 3);
+	EXPECT_EQ(split_result[0].size(), 3);
+	EXPECT_EQ(split_result[1].size(), 5);
+	EXPECT_EQ(split_result[2].size(), 9);
+}
+
+/** WordSplitOnceTest */
+
+TYPED_TEST(WordSplitOnceTest, random) {
+	RandomWordTestData<TypeParam> data{};
+	std::pair<std::basic_string<TypeParam>, std::basic_string<TypeParam>> split_result = word_split_once(data.m_str, default_delim<TypeParam>);
+
+	EXPECT_EQ(split_result.first, data.m_tokens[0]);
+	EXPECT_EQ(split_result.second, data.get_remainder(1));
+}
+
+TYPED_TEST(WordSplitOnceTest, random_vector) {
+	using vector_type = std::vector<TypeParam>;
+	RandomWordTestData<TypeParam, vector_type> data{};
+	std::pair<vector_type, vector_type> split_result = word_split_once<vector_type>(data.m_str, default_delim<TypeParam>);
+
+	EXPECT_EQ(split_result.first, data.m_tokens[0]);
+	EXPECT_EQ(split_result.second, data.get_remainder(1));
+}
+
+TYPED_TEST(WordSplitOnceTest, random_view) {
+	RandomWordTestData<TypeParam> data{};
+	std::pair<std::basic_string_view<TypeParam>, std::basic_string_view<TypeParam>> split_result = word_split_once_view(data.m_str, default_delim<TypeParam>);
+
+	EXPECT_EQ(split_result.first, data.m_tokens[0]);
+	EXPECT_EQ(split_result.second, data.get_remainder(1));
+}
+
+/** SplitNTest */
+
+TYPED_TEST(WordSplitNTest, random) {
+	RandomWordTestData<TypeParam> data{};
+	constexpr size_t n = 4;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split_n(data.m_str, default_delim<TypeParam>, n);
+
+	// Tokens shall be same up until last one (n + 1)
+	EXPECT_EQ(split_result.size(), n + 1);
+	for (size_t index = 0; index != n; ++index) {
+		EXPECT_EQ(split_result[index], data.m_tokens[index]);
+	}
+
+	EXPECT_EQ(split_result[n], data.get_remainder(n));
+}
+
+TYPED_TEST(WordSplitNTest, random_vector) {
+	RandomWordTestData<TypeParam, std::vector<TypeParam>> data{};
+	constexpr size_t n = 4;
+	std::vector<std::vector<TypeParam>> split_result = word_split_n<std::vector, std::vector<TypeParam>>(data.m_str, default_delim<TypeParam>, n);
+
+	// Tokens shall be same up until last one (n + 1)
+	EXPECT_EQ(split_result.size(), n + 1);
+	for (size_t index = 0; index != n; ++index) {
+		EXPECT_EQ(split_result[index], data.m_tokens[index]);
+	}
+
+	EXPECT_EQ(split_result[n], data.get_remainder(n));
+}
+
+TYPED_TEST(WordSplitNTest, random_view) {
+	RandomWordTestData<TypeParam> data{};
+	constexpr size_t n = 4;
+	std::vector<std::basic_string_view<TypeParam>> split_result = word_split_n_view(data.m_str, default_delim<TypeParam>, n);
+
+	// Tokens shall be same up until last one (n + 1)
+	EXPECT_EQ(split_result.size(), n + 1);
+	for (size_t index = 0; index != n; ++index) {
+		EXPECT_EQ(split_result[index], data.m_tokens[index]);
+	}
+
+	EXPECT_EQ(split_result[n], data.get_remainder(n));
+}
+
+/** std::string word_split test, really just testing compilation and returned types */
+
+TYPED_TEST(WordSplitStringTest, empty) {
+	std::basic_string<TypeParam> empty;
+	std::vector<decltype(empty)> split_result = word_split(empty, default_delim<TypeParam>);
+	EXPECT_TRUE(split_result.empty());
+}
+
+TYPED_TEST(WordSplitStringTest, empty_long) {
+	std::basic_string<TypeParam> empty;
+	auto delim = make_delim_long<TypeParam>(8);
+	std::vector<decltype(empty)> split_result = word_split(empty, delim);
+	EXPECT_TRUE(split_result.empty());
+}
+
+TYPED_TEST(WordSplitStringTest, single_word) {
+	std::basic_string<TypeParam> single_word = make_word<TypeParam>();
+	std::vector<decltype(single_word)> split_result = word_split(single_word, default_delim<TypeParam>);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitStringTest, single_word_long) {
+	auto delim = make_delim_long<TypeParam>(8);
+	std::basic_string<TypeParam> single_word = make_word<TypeParam>(delim);
+	std::vector<decltype(single_word)> split_result = word_split(single_word, delim);
+	EXPECT_EQ(split_result.size(), 1);
+	EXPECT_EQ(split_result[0].size(), 8);
+}
+
+TYPED_TEST(WordSplitStringTest, random) {
+	RandomWordTestData<TypeParam> data{};
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, default_delim<TypeParam>);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_long) {
+	auto delim = make_delim_long<TypeParam>(8);
+	RandomWordTestData<TypeParam> data{ delim };
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_vector) {
+	RandomWordTestData<TypeParam, std::vector<TypeParam>> data{};
+	std::vector<std::vector<TypeParam>> split_result = word_split<std::vector, std::vector<TypeParam>>(data.m_str, default_delim<TypeParam>);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_long_trailing_delim) {
+	auto delim = make_delim_long<TypeParam>(8);
+	RandomWordTestData<TypeParam> data{ delim };
+	data.m_str += delim;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_long_prefix_delim) {
+	auto delim = make_delim_long<TypeParam>(8);
+	RandomWordTestData<TypeParam> data{ delim };
+	data.m_str = delim + data.m_str;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_long_trailing_two_delim) {
+	auto delim = make_delim_long<TypeParam>(8);
+	RandomWordTestData<TypeParam> data{ delim };
+	data.m_str += delim + delim;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim);
+	EXPECT_EQ(split_result, data.m_tokens);
+}
+
+TYPED_TEST(WordSplitStringTest, random_long_prefix_two_delim) {
+	auto delim = make_delim_long<TypeParam>(8);
+	RandomWordTestData<TypeParam> data{ delim };
+	data.m_str = delim + delim + data.m_str;
+	std::vector<std::basic_string<TypeParam>> split_result = word_split(data.m_str, delim);
+	EXPECT_EQ(split_result, data.m_tokens);
+}