From e3142da3298c735a4d339de671d230f9263c5dcd Mon Sep 17 00:00:00 2001
From: Jessica James <jessica.aj@outlook.com>
Date: Sun, 5 Dec 2021 16:56:45 -0600
Subject: [PATCH] Initial implementation for syntax_tree, with
 deserialize_html_form as a prototype; add container::push; split query_string
 stuff to http_query.hpp

---
 src/include/jessilib/http_query.hpp       | 190 ++++++++++
 src/include/jessilib/object.hpp           |  29 +-
 src/include/jessilib/type_traits.hpp      | 143 ++++++-
 src/include/jessilib/unicode.hpp          | 415 +--------------------
 src/include/jessilib/unicode_base.hpp     |  56 +++
 src/include/jessilib/unicode_compare.hpp  | 430 ++++++++++++++++++++++
 src/include/jessilib/unicode_sequence.hpp | 147 +++-----
 src/include/jessilib/unicode_syntax.hpp   | 139 +++++++
 src/test/CMakeLists.txt                   |   2 +-
 src/test/http_query.cpp                   | 238 ++++++++++++
 src/test/unicode.cpp                      |   6 +
 src/test/unicode_sequence.cpp             | 137 +------
 12 files changed, 1281 insertions(+), 651 deletions(-)
 create mode 100644 src/include/jessilib/http_query.hpp
 create mode 100644 src/include/jessilib/unicode_compare.hpp
 create mode 100644 src/include/jessilib/unicode_syntax.hpp
 create mode 100644 src/test/http_query.cpp
diff --git a/src/include/jessilib/http_query.hpp b/src/include/jessilib/http_query.hpp
new file mode 100644
index 0000000..b60f5c3
--- /dev/null
+++ b/src/include/jessilib/http_query.hpp
@@ -0,0 +1,190 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+/**
+ * @file unicode_sequence.hpp
+ * @author Jessica James
+ *
+ * Unicode-aware syntax tree parsing utilities
+ */
+
+#pragma once
+
+#include "unicode_syntax.hpp"
+#include "unicode_sequence.hpp"
+#include "type_traits.hpp"
+
+namespace jessilib {
+
+/**
+ * Query string escape sequence parser
+ */
+
+template<typename CharT,
+	std::enable_if_t<sizeof(CharT) == 1>* = nullptr> // make_hex_sequence_pair isn't going to play well with other types
+static constexpr shrink_sequence_tree<CharT> http_query_escapes_root_tree{
+	make_hex_sequence_pair<CharT, U'%', 2, true, false>(),
+	make_simple_sequence_pair<CharT, U'+', ' '>()
+};
+static_assert(is_sorted<char, http_query_escapes_root_tree<char>, std::size(http_query_escapes_root_tree<char>)>(), "Tree must be pre-sorted");
+static_assert(is_sorted<char8_t, http_query_escapes_root_tree<char8_t>, std::size(http_query_escapes_root_tree<char8_t>)>(), "Tree must be pre-sorted");
+
+template<typename CharT,
+	std::enable_if_t<sizeof(CharT) == 1>* = nullptr>
+constexpr bool deserialize_http_query(std::basic_string<CharT>& inout_string) {
+	return apply_shrink_sequence_tree<CharT, http_query_escapes_root_tree<CharT>, std::size(http_query_escapes_root_tree<CharT>)>(inout_string);
+}
+
+// TODO: decide whether to take this approach, where query strings are assumed to represent UTF-8 text data, OR implement
+// such that calling deserialize_http_query will assume the relevant encoding (i.e: calling with char16_t would read in
+// escaped query values as bytes in codepoint char16_t, rather than utf-8 encoding sequence)
+/*template<typename CharT,
+	std::enable_if_t<sizeof(CharT) != 1>* = nullptr>
+bool deserialize_http_query(std::basic_string<CharT>& inout_string) {
+	//TODO: optimize this?
+	std::basic_string<char8_t> u8query_string = string_cast<char8_t>(inout_string);
+	bool result = deserialize_http_query<char8_t>(u8query_string);
+	inout_string = string_cast<CharT>(u8query_string);
+	return result;
+}*/
+
+/**
+ * HTML form parser
+ */
+
+template<typename CharT, typename ContainerT>
+struct HTMLFormContext {
+	using container_type = ContainerT;
+	container_type& out_container;
+	CharT* write_head;
+	const CharT* key_start = write_head;
+	const CharT* value_start{}; // value_start is also key_end
+};
+
+template<typename CharT, typename ContextT, char32_t InCodepointV>
+constexpr syntax_tree_member<CharT, ContextT> make_value_start_pair() {
+	// '='
+	return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>&) constexpr {
+		if (inout_context.value_start != nullptr) {
+			// There's already a value pending; this must just be part of the value.
+			inout_context.write_head += encode_codepoint(inout_context.write_head, InCodepointV);
+			return true;
+		}
+
+		// Start pending_value
+		inout_context.value_start = inout_context.write_head;
+		return true;
+	} };
+}
+
+template<typename CharT, typename ContextT>
+constexpr bool value_end_action(ContextT& inout_context, std::basic_string_view<CharT>&) {
+	const CharT* value_end = inout_context.write_head;
+	const CharT* key_start = inout_context.key_start;
+	const CharT* value_start = inout_context.value_start;
+	if (value_start != nullptr) {
+		// Terminate key & value, push them to table
+		std::basic_string_view<CharT> key{ key_start, static_cast<size_t>(value_start - key_start) };
+		std::basic_string_view<CharT> value{ value_start, static_cast<size_t>(value_end - value_start) };
+		jessilib::container::push(inout_context.out_container, key, value);
+
+		// Start reading next key
+		inout_context.key_start = value_end;
+		inout_context.value_start = nullptr;
+		return true;
+	}
+
+	// This is a valueless key; terminate the key and push it
+	std::basic_string_view<CharT> key{ key_start, static_cast<size_t>(value_end - key_start) };
+	jessilib::container::push(inout_context.out_container, key, std::basic_string_view<CharT>{});
+
+	// Start reading next key
+	inout_context.key_start = value_end;
+	return true;
+}
+
+template<typename CharT, typename ContextT, char32_t InCodepointV>
+constexpr syntax_tree_member<CharT, ContextT> make_value_end_pair() {
+	// '&'
+	return { InCodepointV, value_end_action<CharT, ContextT> };
+}
+
+template<typename CharT, typename ContextT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
+constexpr syntax_tree_member<CharT, ContextT> make_hex_syntax_shrink_pair() {
+	return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) constexpr {
+		return hex_shrink_sequence_action<CharT, MaxDigitsV, ExactDigitsV, IsUnicode>(inout_context.write_head, inout_read_view);
+	} };
+}
+
+template<typename CharT, typename ContextT, char32_t InCodepointV, char8_t OutCodepointV>
+constexpr syntax_tree_member<CharT, ContextT> make_simple_shrink_pair() {
+	return {
+		InCodepointV,
+		[](ContextT& inout_context, std::basic_string_view<CharT>&) constexpr {
+			*inout_context.write_head = static_cast<CharT>(OutCodepointV);
+			++inout_context.write_head;
+			return true;
+		}
+	};
+}
+
+template<typename CharT, typename ContextT>
+bool html_form_default_action(get_endpoint_result decode, ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) {
+	// A regular character; copy it and advance the read/write heads
+	CharT*& write_head = inout_context.write_head;
+	CharT* write_end = write_head + decode.units;
+	while (write_head != write_end) {
+		*write_head = inout_read_view.front();
+		++write_head;
+		inout_read_view.remove_prefix(1);
+	}
+
+	return true;
+}
+
+template<typename CharT, typename ContainerT,
+	std::enable_if_t<sizeof(CharT) == 1>* = nullptr> // make_hex_sequence_pair isn't going to play well with other types
+static constexpr syntax_tree<CharT, HTMLFormContext<CharT, ContainerT>> html_form_root_tree{
+	make_hex_syntax_shrink_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'%', 2, true, false>(),
+	make_value_end_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'&'>(),
+	make_simple_shrink_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'+', ' '>(),
+	make_value_start_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'='>()
+};
+
+template<typename CharT, typename ContainerT,
+	std::enable_if_t<sizeof(CharT) == 1>* = nullptr>
+constexpr bool deserialize_html_form(ContainerT& out_container, std::basic_string<CharT>& inout_string) {
+	if (inout_string.empty()) {
+		return true; // even though apply_syntax_tree checks for this, check it here anyways so we don't call value_end_action
+	}
+
+	HTMLFormContext<CharT, ContainerT> context{ out_container, inout_string.data() };
+	constexpr auto& html_form_tree = html_form_root_tree<CharT, ContainerT>;
+	static_assert(is_sorted<char, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted");
+
+	std::basic_string_view<CharT> read_view{ inout_string };
+	if (apply_syntax_tree<CharT, decltype(context), html_form_tree, std::size(html_form_tree), html_form_default_action>
+	    (context, read_view)) {
+		value_end_action<CharT, decltype(context)>(context, read_view);
+		return true;
+	}
+
+	return false;
+}
+
+} // namespace jessilib
diff --git a/src/include/jessilib/object.hpp b/src/include/jessilib/object.hpp
index 3cf625f..32cce7c 100644
--- a/src/include/jessilib/object.hpp
+++ b/src/include/jessilib/object.hpp
@@ -101,7 +101,7 @@ public:
 	template<typename T,
 		typename std::enable_if<is_backing<typename std::decay<T>::type>::value
 		&& !is_sequence_container<typename std::decay<T>::type>::value
-		&& (!is_associative_container<typename std::decay<T>::type>::value || std::is_same<typename remove_cvref<T>::type, map_type>::value)>::type* = nullptr>
+		&& (!is_associative_container<typename std::decay<T>::type>::value || std::is_same<typename std::remove_cvref<T>::type, map_type>::value)>::type* = nullptr>
 	object(T&& in_value)
 		: m_value{ typename is_backing<typename std::decay<T>::type>::type{ std::forward<T>(in_value) } } {
 		// Empty ctor body
@@ -140,10 +140,10 @@ public:
 
 	// Non-map_type associative containers (container<string_type, T>)
 	template<typename T,
-		typename std::enable_if<is_associative_container<typename remove_cvref<T>::type>::value
-			&& (std::is_convertible<typename is_associative_container<typename remove_cvref<T>::type>::key_type, string_type>::value
-			|| std::is_convertible<typename is_associative_container<typename remove_cvref<T>::type>::key_type, string_view_type>::value)
-			&& !std::is_same<typename is_associative_container<typename remove_cvref<T>::type>::value_type, object>::value>::type* = nullptr>
+		typename std::enable_if<is_associative_container<typename std::remove_cvref<T>::type>::value
+			&& (std::is_convertible<typename is_associative_container<typename std::remove_cvref<T>::type>::key_type, string_type>::value
+			|| std::is_convertible<typename is_associative_container<typename std::remove_cvref<T>::type>::key_type, string_view_type>::value)
+			&& !std::is_same<typename is_associative_container<typename std::remove_cvref<T>::type>::value_type, object>::value>::type* = nullptr>
 	object(T&& in_value)
 		: m_value{ map_type{} } {
 		auto& map = std::get<map_type>(m_value);
@@ -474,6 +474,25 @@ private:
 	// represent as a map, whereas an actual xml document is sequenced
 }; // object
 
+namespace container {
+
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<std::is_same_v<ContainerT, object>>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	auto object_type = inout_container.type();
+	if (object_type == object::type::null || object_type == object::type::map) {
+		// Push to map if null or map type
+		inout_container[in_key] = in_value;
+	}
+	else if (object_type == object::type::array) {
+		// Push to back of array if array type
+		inout_container[inout_container.size()][in_key] = in_value;
+	}
+	// else // do nothing; pushing a key/value pair isn't valid here
+}
+
+} // namespace container
+
 } // namespace jessilib
 
 
diff --git a/src/include/jessilib/type_traits.hpp b/src/include/jessilib/type_traits.hpp
index 85fd80d..223deea 100644
--- a/src/include/jessilib/type_traits.hpp
+++ b/src/include/jessilib/type_traits.hpp
@@ -20,9 +20,14 @@
 
 #include <cstddef>
 #include <type_traits>
+
+// Container types we're using, more or less purely because we can't forward declare these at all
 #include <vector>
 #include <list>
 #include <forward_list>
+#include <stack>
+#include <queue>
+#include <deque>
 #include <set>
 #include <unordered_set>
 #include <map>
@@ -32,13 +37,6 @@
 
 namespace jessilib {
 
-/** remove_cvref (can be replaced with C++20) */
-
-template<class T>
-struct remove_cvref {
-	typedef std::remove_cv_t<std::remove_reference_t<T>> type;
-};
-
 /** is_basic_string */
 
 template<typename T>
@@ -65,6 +63,20 @@ struct is_basic_string_view<std::basic_string_view<T>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+/** is_pair */
+
+template<typename T>
+struct is_pair : std::false_type {};
+
+template<typename LeftT, typename RightT>
+struct is_pair<std::pair<LeftT, RightT>> {
+	using first_type = LeftT;
+	using second_type = RightT;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
 /** is_vector */
 
 template<typename T>
@@ -104,6 +116,45 @@ struct is_forward_list<std::forward_list<T>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+/** is_stack */
+
+template<typename T>
+struct is_stack : std::false_type {};
+
+template<typename T>
+struct is_stack<std::stack<T>> {
+	using type = T;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
+/** is_queue */
+
+template<typename T>
+struct is_queue : std::false_type {};
+
+template<typename T>
+struct is_queue<std::queue<T>> {
+	using type = T;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
+/** is_deque */
+
+template<typename T>
+struct is_deque : std::false_type {};
+
+template<typename T>
+struct is_deque<std::deque<T>> {
+	using type = T;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
 /** is_set */
 
 template<typename T>
@@ -170,6 +221,20 @@ struct is_map<std::map<KeyT, ValueT>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+/** is_multimap */
+
+template<typename T>
+struct is_multimap : std::false_type {};
+
+template<typename KeyT, typename ValueT>
+struct is_multimap<std::multimap<KeyT, ValueT>> {
+	using key_type = KeyT;
+	using value_type = ValueT;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
 /** is_unordered_map */
 
 template<typename T>
@@ -184,6 +249,18 @@ struct is_unordered_map<std::unordered_map<KeyT, ValueT>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+template<typename T>
+struct is_unordered_multimap : std::false_type {};
+
+template<typename KeyT, typename ValueT>
+struct is_unordered_multimap<std::unordered_multimap<KeyT, ValueT>> {
+	using key_type = KeyT;
+	using value_type = ValueT;
+	static constexpr bool value{ true };
+	constexpr operator bool() const noexcept { return true; }
+	constexpr bool operator()() const noexcept { return true; }
+};
+
 /** is_associative_container */
 
 template<typename T>
@@ -236,6 +313,7 @@ struct is_sequence_container<std::forward_list<T>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+// Sets are really associative containers, not sequence...
 template<typename T>
 struct is_sequence_container<std::set<T>> {
 	using type = T;
@@ -268,4 +346,55 @@ struct is_sequence_container<std::unordered_multiset<T>> {
 	constexpr bool operator()() const noexcept { return true; }
 };
 
+/**
+ * Push helper for pushing key/value pairs to arbitrary container types
+ *
+ * If ContainerT is associative: set key/value
+ * If ContainerT is multi-associative: add key/value
+ * If ContainerT is sequential: push key/value pair to back
+ */
+namespace container {
+/** Pushing to associative containers */
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_map<ContainerT>::value || is_unordered_map<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	inout_container[in_key] = in_value;
+}
+
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_set<ContainerT>::value || is_unordered_set<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	auto insert_result = inout_container.insert({in_key, in_value});
+	if (!insert_result) {
+		*insert_result.first = { in_key, in_value };
+	}
+}
+
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_multimap<ContainerT>::value || is_unordered_multimap<ContainerT>::value
+	|| is_multiset<ContainerT>::value || is_unordered_multiset<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	inout_container.insert({in_key, in_value});
+}
+
+/** Pushing to sequential containers */
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_vector<ContainerT>::value || is_list<ContainerT>::value || is_deque<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	inout_container.push_back({in_key, in_value});
+}
+
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_queue<ContainerT>::value || is_stack<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	inout_container.push({in_key, in_value});
+}
+
+template<typename ContainerT, typename LeftT, typename RightT,
+	typename std::enable_if_t<is_forward_list<ContainerT>::value>* = nullptr>
+constexpr void push(ContainerT& inout_container, LeftT&& in_key, RightT&& in_value) {
+	inout_container.push_front({in_key, in_value});
+}
+
+} // namespace container
 } // namespace jessilib
diff --git a/src/include/jessilib/unicode.hpp b/src/include/jessilib/unicode.hpp
index 3ab4818..25aadab 100644
--- a/src/include/jessilib/unicode.hpp
+++ b/src/include/jessilib/unicode.hpp
@@ -18,10 +18,7 @@
 
 #pragma once
 
-#include <string>
-#include <string_view>
-#include <ostream>
-#include "unicode_base.hpp"
+#include "unicode_compare.hpp"
 
 namespace jessilib {
 
@@ -179,246 +176,6 @@ std::basic_string<OutCharT> string_cast(const InT& in_string) {
 	}
 }
 
-/**
- * Checks if two codepoints are equal to each-other (case insensitive)
- *
- * @param lhs First codepoint to compare
- * @param rhs Second codepoint to compare
- * @return True if the characters are equal, false otherwise
- */
-inline bool equalsi(char32_t lhs, char32_t rhs) {
-	return lhs == rhs
-		|| fold(lhs) == fold(rhs);
-}
-
-// Should just make these methods container-type agnostic rather than this mess...
-#define ADAPT_BASIC_STRING(method) \
-	template<typename LhsCharT, typename RhsCharT> \
-	auto method(const std::basic_string<LhsCharT>& lhs, std::basic_string_view<RhsCharT> rhs) { \
-		return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), rhs); } \
-	template<typename LhsCharT, typename RhsCharT> \
-	auto method(std::basic_string_view<LhsCharT> lhs, const std::basic_string<RhsCharT>& rhs) { \
-		return method(lhs, static_cast<std::basic_string_view<RhsCharT>>(rhs)); } \
-	template<typename LhsCharT, typename RhsCharT> \
-	auto method(const std::basic_string<LhsCharT>& lhs, const std::basic_string<RhsCharT>& rhs) { \
-		return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), static_cast<std::basic_string_view<RhsCharT>>(rhs)); }
-
-/**
- * Checks if two strings are equal
- *
- * @tparam LhsCharT Character type for left-hand parameter
- * @tparam RhsCharT Character type for right-hand parameter
- * @param lhs First string to compare
- * @param rhs Second string to compare against
- * @return True if the strings are equal, false otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-bool equals(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) {
-	// If lhs and rhs are the same type, compare their sizes and quickly return if not same
-	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
-		return lhs == rhs;
-	}
-
-	while (!lhs.empty() && !rhs.empty()) {
-		auto lhs_front = decode_codepoint(lhs);
-		auto rhs_front = decode_codepoint(rhs);
-
-		if (lhs_front.units == 0
-			|| rhs_front.units == 0) {
-			// Failed to decode front codepoint; bad unicode sequence
-			return false;
-		}
-
-		if (lhs_front.codepoint != rhs_front.codepoint) {
-			// Codepoints aren't the same
-			return false;
-		}
-
-		// Codepoints are equal; trim off the fronts and continue
-		lhs.remove_prefix(lhs_front.units);
-		rhs.remove_prefix(rhs_front.units);
-	}
-
-	return lhs.empty() && rhs.empty();
-}
-
-ADAPT_BASIC_STRING(equals)
-
-/**
- * Checks if two strings are equal (case insensitive)
- *
- * @tparam LhsCharT Character type for left-hand parameter
- * @tparam RhsCharT Character type for right-hand parameter
- * @param lhs First string to compare
- * @param rhs Second string to compare against
- * @return True if the strings are equal, false otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-bool equalsi(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) {
-	// If lhs and rhs are the same type, compare their sizes and quickly return if not same
-	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
-		if (lhs.size() != rhs.size()) {
-			return false;
-		}
-	}
-
-	while (!lhs.empty() && !rhs.empty()) {
-		auto lhs_front = decode_codepoint(lhs);
-		auto rhs_front = decode_codepoint(rhs);
-
-		if (lhs_front.units == 0
-			|| rhs_front.units == 0) {
-			// Failed to decode front codepoint; bad unicode sequence
-			return false;
-		}
-
-		if (!equalsi(lhs_front.codepoint, rhs_front.codepoint)) {
-			// Codepoints don't fold to same value
-			return false;
-		}
-
-		// Codepoints are equal; trim off the fronts and continue
-		lhs.remove_prefix(lhs_front.units);
-		rhs.remove_prefix(rhs_front.units);
-	}
-
-	return lhs.empty() && rhs.empty();
-}
-
-ADAPT_BASIC_STRING(equalsi)
-
-/**
- * Checks if a string starts with a substring
- *
- * @tparam LhsCharT Character type for underlying string
- * @tparam RhsCharT Character type for prefix string
- * @param in_string String to check for prefix
- * @param in_prefix Substring prefix to check for
- * @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-size_t starts_with_length(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
-	// If in_string and in_prefix are the same type, compare their sizes and quickly return if in_string is too small
-	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
-		if (in_string.size() < in_prefix.size()) {
-			return 0;
-		}
-	}
-
-	size_t codepoints_removed{};
-	while (!in_string.empty() && !in_prefix.empty()) {
-		get_endpoint_result string_front = decode_codepoint(in_string);
-		get_endpoint_result prefix_front = decode_codepoint(in_prefix);
-
-		if (string_front.units == 0
-			|| prefix_front.units == 0) {
-			// Failed to decode front codepoint; bad unicode sequence
-			return 0;
-		}
-
-		if (string_front.codepoint != prefix_front.codepoint) {
-			// Codepoints aren't the same
-			return 0;
-		}
-
-		// Codepoints are equal; trim off the fronts and continue
-		in_string.remove_prefix(string_front.units);
-		in_prefix.remove_prefix(prefix_front.units);
-		codepoints_removed += string_front.units;
-	}
-
-	if (!in_prefix.empty()) {
-		// We reached end of in_string before end of prefix
-		return 0;
-	}
-
-	return codepoints_removed;
-}
-
-ADAPT_BASIC_STRING(starts_with_length)
-
-/**
- * Checks if a string starts with a substring (case insensitive)
- *
- * @tparam LhsCharT Character type for underlying string
- * @tparam RhsCharT Character type for prefix string
- * @param in_string String to check for prefix
- * @param in_prefix Substring prefix to check for
- * @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-size_t starts_with_lengthi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
-	// If in_string and in_prefix are the same type, skip decoding each point
-	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
-		if (in_string.size() < in_prefix.size()) {
-			return 0;
-		}
-	}
-
-	size_t codepoints_removed{};
-	while (!in_string.empty() && !in_prefix.empty()) {
-		get_endpoint_result string_front = decode_codepoint(in_string);
-		get_endpoint_result prefix_front = decode_codepoint(in_prefix);
-
-		if (string_front.units == 0
-			|| prefix_front.units == 0) {
-			// Failed to decode front codepoint; bad unicode sequence
-			return 0;
-		}
-
-		if (!equalsi(string_front.codepoint, prefix_front.codepoint)) {
-			// Codepoints don't fold to same value
-			return 0;
-		}
-
-		// Codepoints are equal; trim off the fronts and continue
-		in_string.remove_prefix(string_front.units);
-		in_prefix.remove_prefix(prefix_front.units);
-		codepoints_removed += string_front.units;
-	}
-
-	if (!in_prefix.empty()) {
-		// We reached end of in_string before end of prefix
-		return 0;
-	}
-
-	return codepoints_removed;
-}
-
-ADAPT_BASIC_STRING(starts_with_lengthi)
-
-/**
- * Checks if a string starts with a substring
- *
- * @tparam LhsCharT Character type for underlying string
- * @tparam RhsCharT Character type for prefix string
- * @param in_string String to check for prefix
- * @param in_prefix Prefix to check for
- * @return True if both strings are valid and in_string starts with in_prefix, false otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-bool starts_with(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
-	return starts_with_length<LhsCharT, RhsCharT>(in_string, in_prefix) != 0;
-}
-
-ADAPT_BASIC_STRING(starts_with)
-
-/**
- * Checks if a string starts with a substring (case insensitive)
- *
- * @tparam LhsCharT Character type for underlying string
- * @tparam RhsCharT Character type for prefix string
- * @param in_string String to check for prefix
- * @param in_prefix Prefix to check for
- * @return True if both strings are valid and in_string starts with in_prefix, false otherwise
- */
-template<typename LhsCharT, typename RhsCharT>
-bool starts_withi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
-	return starts_with_lengthi<LhsCharT, RhsCharT>(in_string, in_prefix) != 0;
-}
-
-ADAPT_BASIC_STRING(starts_withi)
-
 /**
  * Searches a string for a specified substring
  *
@@ -651,176 +408,6 @@ OutT join(ArgsT&&... args) {
 	return result;
 }
 
-/**
- * Calculates the hash of a string based on its codepoints, such that a unicode string will always produce the same hash
- * regardless of underlying encoding
- *
- * This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text
- */
-struct text_hash {
-	using is_transparent = std::true_type;
-
-	template<typename CharT>
-	static uint64_t hash(const CharT* data, const CharT* end) {
-		uint64_t hash = 14695981039346656037ULL;
-
-		get_endpoint_result decode;
-		while (data != end) {
-			decode = decode_codepoint(data, end);
-			if (decode.units == 0) {
-				return hash;
-			}
-
-			hash = hash ^ decode.codepoint;
-			hash = hash * 1099511628211ULL;
-			data += decode.units;
-		}
-
-		return hash;
-	}
-
-	auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char8_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char16_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char32_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-};
-
-struct text_equal {
-	using is_transparent = std::true_type;
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
-		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
-		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
-		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
-		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-};
-
-/**
- * Calculates the hash of a string based on its folded codepoints, such that a unicode string will always produce the
- * same hash regardless of underlying encoding or the casing of its values.
- *
- * This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text
- */
-struct text_hashi {
-	using is_transparent = std::true_type;
-
-	template<typename CharT>
-	static uint64_t hash(const CharT* data, const CharT* end) {
-		uint64_t hash = 14695981039346656037ULL;
-
-		get_endpoint_result decode;
-		while (data != end) {
-			decode = decode_codepoint(data, end - data);
-			if (decode.units == 0) {
-				return hash;
-			}
-
-			hash = hash ^ fold(decode.codepoint);
-			hash = hash * 1099511628211ULL;
-			data += decode.units;
-		}
-
-		return hash;
-	}
-
-	auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char8_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char16_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-
-	auto operator()(std::basic_string_view<char32_t> in_key) const noexcept {
-		return hash(in_key.data(), in_key.data() + in_key.size());
-	}
-};
-
-struct text_equali {
-	using is_transparent = std::true_type;
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
-		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
-		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
-		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-
-	template<typename LhsCharT, typename RhsCharT>
-	bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
-		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
-	}
-};
-
 /** to_lower / to_upper */
 //char32_t to_lower(char32_t in_chr); // TODO: implement
 //char32_t to_upper(char32_t in_chr); // TODO: implement
diff --git a/src/include/jessilib/unicode_base.hpp b/src/include/jessilib/unicode_base.hpp
index 30c1e3b..034c7a0 100644
--- a/src/include/jessilib/unicode_base.hpp
+++ b/src/include/jessilib/unicode_base.hpp
@@ -167,6 +167,62 @@ using encode_buffer_type = CharT[unicode_traits<CharT>::max_units_per_codepoint]
 char32_t fold(char32_t in_codepoint); // Folds codepoint for case-insensitive checks (not for human output)
 constexpr int as_base(char32_t in_character, unsigned int base); // The value represented by in_character in terms of base if valid, -1 otherwise
 
+/**
+ * Checks if two codepoints are equal to each-other (case insensitive)
+ *
+ * @param lhs First codepoint to compare
+ * @param rhs Second codepoint to compare
+ * @return True if the characters are equal, false otherwise
+ */
+inline bool equalsi(char32_t lhs, char32_t rhs) {
+	return lhs == rhs
+		|| fold(lhs) == fold(rhs);
+}
+
+template<char32_t InCodepointV>
+struct codepoint_info {
+private:
+	template<typename CharT>
+	static constexpr size_t encode_codepoint_length(char32_t in_codepoint) {
+		encode_buffer_type<CharT> buffer{};
+		return encode_codepoint(buffer, in_codepoint);
+	}
+
+public:
+	static constexpr char32_t value = InCodepointV;
+	template<typename CharT>
+	static constexpr size_t encode_length = encode_codepoint_length<CharT>(InCodepointV);
+
+	template<typename CharT>
+	using encode_buffer = CharT[encode_length<CharT>];
+
+	static constexpr size_t utf8_length = encode_length<char8_t>;
+	static constexpr size_t utf16_length = encode_length<char16_t>;
+	static constexpr size_t utf32_length = encode_length<char32_t>;
+	static constexpr size_t wchar_length = encode_length<wchar_t>;
+
+	using utf8_buffer = char8_t[utf8_length];
+	using utf16_buffer = char16_t[utf16_length];
+	using utf32_buffer = char32_t[utf32_length];
+	using wchar_buffer = wchar_t[wchar_length];
+
+	static constexpr void encode(utf8_buffer& buffer) {
+		encode_codepoint(buffer, InCodepointV);
+	}
+
+	static constexpr void encode(utf16_buffer& buffer) {
+		encode_codepoint(buffer, InCodepointV);
+	}
+
+	static constexpr void encode(utf32_buffer& buffer) {
+		encode_codepoint(buffer, InCodepointV);
+	}
+
+	static constexpr void encode(wchar_buffer& buffer) {
+		encode_codepoint(buffer, InCodepointV);
+	}
+};
+
 /**
  * Inline constexpr encode implementation
  */
diff --git a/src/include/jessilib/unicode_compare.hpp b/src/include/jessilib/unicode_compare.hpp
new file mode 100644
index 0000000..7a68e30
--- /dev/null
+++ b/src/include/jessilib/unicode_compare.hpp
@@ -0,0 +1,430 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+/**
+ * @file unicode_hash.hpp
+ * @author Jessica James
+ *
+ * Unicode-aware encoding-agnostic string comparison & hashing utilities
+ */
+
+#pragma once
+
+#include "unicode_base.hpp"
+
+namespace jessilib {
+
+// Should just make these methods container-type agnostic rather than this mess...
+#define ADAPT_BASIC_STRING(method) \
+	template<typename LhsCharT, typename RhsCharT> \
+	auto method(const std::basic_string<LhsCharT>& lhs, std::basic_string_view<RhsCharT> rhs) { \
+		return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), rhs); } \
+	template<typename LhsCharT, typename RhsCharT> \
+	auto method(std::basic_string_view<LhsCharT> lhs, const std::basic_string<RhsCharT>& rhs) { \
+		return method(lhs, static_cast<std::basic_string_view<RhsCharT>>(rhs)); } \
+	template<typename LhsCharT, typename RhsCharT> \
+	auto method(const std::basic_string<LhsCharT>& lhs, const std::basic_string<RhsCharT>& rhs) { \
+		return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), static_cast<std::basic_string_view<RhsCharT>>(rhs)); }
+
+/**
+ * Checks if two strings are equal
+ *
+ * @tparam LhsCharT Character type for left-hand parameter
+ * @tparam RhsCharT Character type for right-hand parameter
+ * @param lhs First string to compare
+ * @param rhs Second string to compare against
+ * @return True if the strings are equal, false otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+bool equals(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) {
+	// If lhs and rhs are the same type, compare their sizes and quickly return if not same
+	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
+		return lhs == rhs;
+	}
+
+	while (!lhs.empty() && !rhs.empty()) {
+		auto lhs_front = decode_codepoint(lhs);
+		auto rhs_front = decode_codepoint(rhs);
+
+		if (lhs_front.units == 0
+			|| rhs_front.units == 0) {
+			// Failed to decode front codepoint; bad unicode sequence
+			return false;
+		}
+
+		if (lhs_front.codepoint != rhs_front.codepoint) {
+			// Codepoints aren't the same
+			return false;
+		}
+
+		// Codepoints are equal; trim off the fronts and continue
+		lhs.remove_prefix(lhs_front.units);
+		rhs.remove_prefix(rhs_front.units);
+	}
+
+	return lhs.empty() && rhs.empty();
+}
+
+ADAPT_BASIC_STRING(equals)
+
+/**
+ * Checks if two strings are equal (case insensitive)
+ *
+ * @tparam LhsCharT Character type for left-hand parameter
+ * @tparam RhsCharT Character type for right-hand parameter
+ * @param lhs First string to compare
+ * @param rhs Second string to compare against
+ * @return True if the strings are equal, false otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+bool equalsi(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) {
+	// If lhs and rhs are the same type, compare their sizes and quickly return if not same
+	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
+		if (lhs.size() != rhs.size()) {
+			return false;
+		}
+	}
+
+	while (!lhs.empty() && !rhs.empty()) {
+		auto lhs_front = decode_codepoint(lhs);
+		auto rhs_front = decode_codepoint(rhs);
+
+		if (lhs_front.units == 0
+			|| rhs_front.units == 0) {
+			// Failed to decode front codepoint; bad unicode sequence
+			return false;
+		}
+
+		if (!equalsi(lhs_front.codepoint, rhs_front.codepoint)) {
+			// Codepoints don't fold to same value
+			return false;
+		}
+
+		// Codepoints are equal; trim off the fronts and continue
+		lhs.remove_prefix(lhs_front.units);
+		rhs.remove_prefix(rhs_front.units);
+	}
+
+	return lhs.empty() && rhs.empty();
+}
+
+ADAPT_BASIC_STRING(equalsi)
+
+/**
+ * Checks if a string starts with a substring
+ *
+ * @tparam LhsCharT Character type for underlying string
+ * @tparam RhsCharT Character type for prefix string
+ * @param in_string String to check for prefix
+ * @param in_prefix Substring prefix to check for
+ * @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+size_t starts_with_length(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
+	// If in_string and in_prefix are the same type, compare their sizes and quickly return if in_string is too small
+	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
+		if (in_string.size() < in_prefix.size()) {
+			return 0;
+		}
+	}
+
+	size_t codepoints_removed{};
+	while (!in_string.empty() && !in_prefix.empty()) {
+		get_endpoint_result string_front = decode_codepoint(in_string);
+		get_endpoint_result prefix_front = decode_codepoint(in_prefix);
+
+		if (string_front.units == 0
+			|| prefix_front.units == 0) {
+			// Failed to decode front codepoint; bad unicode sequence
+			return 0;
+		}
+
+		if (string_front.codepoint != prefix_front.codepoint) {
+			// Codepoints aren't the same
+			return 0;
+		}
+
+		// Codepoints are equal; trim off the fronts and continue
+		in_string.remove_prefix(string_front.units);
+		in_prefix.remove_prefix(prefix_front.units);
+		codepoints_removed += string_front.units;
+	}
+
+	if (!in_prefix.empty()) {
+		// We reached end of in_string before end of prefix
+		return 0;
+	}
+
+	return codepoints_removed;
+}
+
+ADAPT_BASIC_STRING(starts_with_length)
+
+/**
+ * Checks if a string starts with a substring (case insensitive)
+ *
+ * @tparam LhsCharT Character type for underlying string
+ * @tparam RhsCharT Character type for prefix string
+ * @param in_string String to check for prefix
+ * @param in_prefix Substring prefix to check for
+ * @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+size_t starts_with_lengthi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
+	// If in_string and in_prefix are the same type, skip decoding each point
+	if constexpr (std::is_same_v<LhsCharT, RhsCharT>) {
+		if (in_string.size() < in_prefix.size()) {
+			return 0;
+		}
+	}
+
+	size_t codepoints_removed{};
+	while (!in_string.empty() && !in_prefix.empty()) {
+		get_endpoint_result string_front = decode_codepoint(in_string);
+		get_endpoint_result prefix_front = decode_codepoint(in_prefix);
+
+		if (string_front.units == 0
+			|| prefix_front.units == 0) {
+			// Failed to decode front codepoint; bad unicode sequence
+			return 0;
+		}
+
+		if (!equalsi(string_front.codepoint, prefix_front.codepoint)) {
+			// Codepoints don't fold to same value
+			return 0;
+		}
+
+		// Codepoints are equal; trim off the fronts and continue
+		in_string.remove_prefix(string_front.units);
+		in_prefix.remove_prefix(prefix_front.units);
+		codepoints_removed += string_front.units;
+	}
+
+	if (!in_prefix.empty()) {
+		// We reached end of in_string before end of prefix
+		return 0;
+	}
+
+	return codepoints_removed;
+}
+
+ADAPT_BASIC_STRING(starts_with_lengthi)
+
+/**
+ * Checks if a string starts with a substring
+ *
+ * @tparam LhsCharT Character type for underlying string
+ * @tparam RhsCharT Character type for prefix string
+ * @param in_string String to check for prefix
+ * @param in_prefix Prefix to check for
+ * @return True if both strings are valid and in_string starts with in_prefix, false otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+bool starts_with(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
+	return starts_with_length<LhsCharT, RhsCharT>(in_string, in_prefix) != 0;
+}
+
+ADAPT_BASIC_STRING(starts_with)
+
+/**
+ * Checks if a string starts with a substring (case insensitive)
+ *
+ * @tparam LhsCharT Character type for underlying string
+ * @tparam RhsCharT Character type for prefix string
+ * @param in_string String to check for prefix
+ * @param in_prefix Prefix to check for
+ * @return True if both strings are valid and in_string starts with in_prefix, false otherwise
+ */
+template<typename LhsCharT, typename RhsCharT>
+bool starts_withi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) {
+	return starts_with_lengthi<LhsCharT, RhsCharT>(in_string, in_prefix) != 0;
+}
+
+ADAPT_BASIC_STRING(starts_withi)
+
+/**
+ * Calculates the hash of a string based on its codepoints, such that a unicode string will always produce the same hash
+ * regardless of underlying encoding
+ *
+ * This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text
+ */
+struct text_hash {
+	using is_transparent = std::true_type;
+
+	template<typename CharT>
+	static uint64_t hash(const CharT* data, const CharT* end) {
+		uint64_t hash = 14695981039346656037ULL;
+
+		get_endpoint_result decode;
+		while (data != end) {
+			decode = decode_codepoint(data, end);
+			if (decode.units == 0) {
+				return hash;
+			}
+
+			hash = hash ^ decode.codepoint;
+			hash = hash * 1099511628211ULL;
+			data += decode.units;
+		}
+
+		return hash;
+	}
+
+	auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char8_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char16_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char32_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+};
+
+struct text_equal {
+	using is_transparent = std::true_type;
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
+		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
+		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
+		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
+		return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+};
+
+/**
+ * Calculates the hash of a string based on its folded codepoints, such that a unicode string will always produce the
+ * same hash regardless of underlying encoding or the casing of its values.
+ *
+ * This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text
+ */
+struct text_hashi {
+	using is_transparent = std::true_type;
+
+	template<typename CharT>
+	static uint64_t hash(const CharT* data, const CharT* end) {
+		uint64_t hash = 14695981039346656037ULL;
+
+		get_endpoint_result decode;
+		while (data != end) {
+			decode = decode_codepoint(data, end - data);
+			if (decode.units == 0) {
+				return hash;
+			}
+
+			hash = hash ^ fold(decode.codepoint);
+			hash = hash * 1099511628211ULL;
+			data += decode.units;
+		}
+
+		return hash;
+	}
+
+	auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char8_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char16_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+
+	auto operator()(std::basic_string_view<char32_t> in_key) const noexcept {
+		return hash(in_key.data(), in_key.data() + in_key.size());
+	}
+};
+
+struct text_equali {
+	using is_transparent = std::true_type;
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
+		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
+		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept {
+		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+
+	template<typename LhsCharT, typename RhsCharT>
+	bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept {
+		return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs);
+	}
+};
+
+} // namespace jessilib
diff --git a/src/include/jessilib/unicode_sequence.hpp b/src/include/jessilib/unicode_sequence.hpp
index a912e66..34bc600 100644
--- a/src/include/jessilib/unicode_sequence.hpp
+++ b/src/include/jessilib/unicode_sequence.hpp
@@ -237,68 +237,71 @@ constexpr shrink_sequence_tree_member<CharT> make_octal_sequence_pair() {
 	};
 }
 
-template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
-constexpr shrink_sequence_tree_member<CharT> make_hex_sequence_pair() {
-	static_assert(MaxDigitsV > 0);
-
-	return {
-		InCodepointV,
-		[](CharT*& in_write_head, std::basic_string_view<CharT>& read_view) constexpr {
-			// Does not modify
-			auto read_hex = [](uint32_t& out_value, std::basic_string_view<CharT> in_view, size_t max_digits) {
-				size_t result{};
-				int hex_value;
-				out_value = 0;
-				while (result != max_digits
-					&& !in_view.empty()) {
-					hex_value = as_base(in_view.front(), 16); // hexadecimal characters are always 1 unit
-					if (hex_value < 0) {
-						// Not a hexadecimal character; push what we have and handle this
-						return result;
-					}
-
-					out_value <<= 4;
-					out_value |= hex_value;
-
-					in_view.remove_prefix(1);
-					++result;
-				}
-
-				// Number of elements that are hexadecimal digits
+template<typename CharT, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
+constexpr bool hex_shrink_sequence_action(CharT*& in_write_head, std::basic_string_view<CharT>& read_view) {
+	// Does not modify
+	auto read_hex = [](uint32_t& out_value, std::basic_string_view<CharT> in_view, size_t max_digits) constexpr {
+		size_t result{};
+		int hex_value;
+		out_value = 0;
+		while (result != max_digits
+			&& !in_view.empty()) {
+			hex_value = as_base(in_view.front(), 16); // hexadecimal characters are always 1 unit
+			if (hex_value < 0) {
+				// Not a hexadecimal character; push what we have and handle this
 				return result;
-			};
+			}
 
-			// Read in hex value
-			uint32_t hex_value;
-			size_t units_read = read_hex(hex_value, read_view, MaxDigitsV);
+			out_value <<= 4;
+			out_value |= hex_value;
 
-			// Sanity check digits read
-			if constexpr(ExactDigitsV) {
-				if (units_read != MaxDigitsV) {
-					// We expected example MaxDigitsV digits; fail
-					return false;
-				}
-			}
-			else {
-				if (units_read == 0) {
-					// We didn't read any digits; fail
-					return false;
-				}
-			}
+			in_view.remove_prefix(1);
+			++result;
+		}
 
-			// We read an acceptable number of digits; write the unit and call it a day
-			read_view.remove_prefix(units_read);
-			if constexpr (IsUnicode) {
-				in_write_head += encode_codepoint(in_write_head, hex_value);
-			}
-			else {
-				static_assert(MaxDigitsV <= sizeof(CharT) * 2);
-				*in_write_head = static_cast<CharT>(hex_value);
-				++in_write_head;
-			}
+		// Number of elements that are hexadecimal digits
+		return result;
+	};
 
-			return true;
+	// Read in hex value
+	uint32_t hex_value;
+	size_t units_read = read_hex(hex_value, read_view, MaxDigitsV);
+
+	// Sanity check digits read
+	if constexpr(ExactDigitsV) {
+		if (units_read != MaxDigitsV) {
+			// We expected example MaxDigitsV digits; fail
+			return false;
 		}
+	}
+	else {
+		if (units_read == 0) {
+			// We didn't read any digits; fail
+			return false;
+		}
+	}
+
+	// We read an acceptable number of digits; write the unit and call it a day
+	read_view.remove_prefix(units_read);
+	if constexpr (IsUnicode) {
+		in_write_head += encode_codepoint(in_write_head, hex_value);
+	}
+	else {
+		static_assert(MaxDigitsV <= sizeof(CharT) * 2);
+		*in_write_head = static_cast<CharT>(hex_value);
+		++in_write_head;
+	}
+
+	return true;
+}
+
+template<typename CharT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode>
+constexpr shrink_sequence_tree_member<CharT> make_hex_sequence_pair() {
+	static_assert(MaxDigitsV > 0);
+
+	return {
+		InCodepointV,
+		hex_shrink_sequence_action<CharT, MaxDigitsV, ExactDigitsV, IsUnicode>
 	};
 }
 
@@ -394,36 +397,4 @@ constexpr bool apply_cpp_escape_sequences(std::basic_string<CharT>& inout_string
 	return apply_shrink_sequence_tree<CharT, cpp_escapes_root_tree<CharT>, std::size(cpp_escapes_root_tree<CharT>)>(inout_string);
 }
 
-/**
- * Query string escape sequence parser
- */
-
-template<typename CharT,
-	std::enable_if_t<sizeof(CharT) == 1>* = nullptr> // make_hex_sequence_pair isn't going to play well with other types
-static constexpr shrink_sequence_tree<CharT> http_query_escapes_root_tree{
-	make_hex_sequence_pair<CharT, U'%', 2, true, false>(),
-	make_simple_sequence_pair<CharT, U'+', ' '>()
-};
-static_assert(is_sorted<char, http_query_escapes_root_tree<char>, std::size(http_query_escapes_root_tree<char>)>(), "Tree must be pre-sorted");
-static_assert(is_sorted<char8_t, http_query_escapes_root_tree<char8_t>, std::size(http_query_escapes_root_tree<char8_t>)>(), "Tree must be pre-sorted");
-
-template<typename CharT,
-    std::enable_if_t<sizeof(CharT) == 1>* = nullptr>
-constexpr bool deserialize_http_query(std::basic_string<CharT>& inout_string) {
-	return apply_shrink_sequence_tree<CharT, http_query_escapes_root_tree<CharT>, std::size(http_query_escapes_root_tree<CharT>)>(inout_string);
-}
-
-// TODO: decide whether to take this approach, where query strings are assumed to represent UTF-8 text data, OR implement
-// such that calling deserialize_http_query will assume the relevant encoding (i.e: calling with char16_t would read in
-// escaped query values as bytes in codepoint char16_t, rather than utf-8 encoding sequence)
-/*template<typename CharT,
-	std::enable_if_t<sizeof(CharT) != 1>* = nullptr>
-bool deserialize_http_query(std::basic_string<CharT>& inout_string) {
-	//TODO: optimize this?
-	std::basic_string<char8_t> u8query_string = string_cast<char8_t>(inout_string);
-	bool result = deserialize_http_query<char8_t>(u8query_string);
-	inout_string = string_cast<CharT>(u8query_string);
-	return result;
-}*/
-
 } // namespace jessilib
diff --git a/src/include/jessilib/unicode_syntax.hpp b/src/include/jessilib/unicode_syntax.hpp
new file mode 100644
index 0000000..a37a7cd
--- /dev/null
+++ b/src/include/jessilib/unicode_syntax.hpp
@@ -0,0 +1,139 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+/**
+ * @file unicode_sequence.hpp
+ * @author Jessica James
+ *
+ * Unicode-aware syntax tree parsing utilities
+ */
+
+#pragma once
+
+#include "unicode_base.hpp"
+
+namespace jessilib {
+
+/**
+ * Syntax tree; move this to another file later
+ */
+
+template<typename CharT, typename ContextT>
+using syntax_tree_action = bool(*)(ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view);
+
+template<typename CharT, typename ContextT>
+using default_syntax_tree_action = bool(*)(get_endpoint_result in_codepoint, ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view);
+
+template<typename CharT, typename ContextT>
+using syntax_tree = const std::pair<char32_t, syntax_tree_action<CharT, ContextT>>[];
+
+template<typename CharT, typename ContextT>
+using syntax_tree_member = const std::pair<char32_t, syntax_tree_action<CharT, ContextT>>;
+
+template<typename CharT, typename ContextT>
+constexpr bool syntax_tree_member_compare(const syntax_tree_member<CharT, ContextT>& in_lhs, const char32_t in_rhs) {
+	return in_lhs.first < in_rhs;
+}
+
+// Lessers on left
+template<typename CharT, typename ContextT, const syntax_tree<CharT, ContextT> TreeBegin, size_t TreeSize>
+constexpr bool is_sorted() {
+	auto head = TreeBegin;
+	constexpr auto end = TreeBegin + TreeSize;
+
+	if (head == end) {
+		return true;
+	}
+
+	while (head + 1 != end) {
+		const auto next = head + 1;
+		if (head->first > next->first) {
+			return false;
+		}
+
+		++head;
+	}
+
+	return true;
+}
+
+template<typename CharT, typename ContextT>
+bool fail_action(get_endpoint_result, ContextT&, std::basic_string_view<CharT>&) {
+	return false;
+}
+
+template<typename CharT, typename ContextT>
+bool noop_action(get_endpoint_result decode, ContextT&, std::basic_string_view<CharT>& inout_read_view) {
+	inout_read_view.remove_prefix(decode.units);
+	return true;
+}
+
+template<typename CharT, typename ContextT, char32_t InCodepointV, const syntax_tree<CharT, ContextT> SubTreeBegin, size_t SubTreeSize, default_syntax_tree_action<CharT, ContextT> DefaultActionF = fail_action<CharT, ContextT>>
+constexpr syntax_tree_member<CharT, ContextT> make_tree_pair() {
+	return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) constexpr {
+		auto decode = decode_codepoint(inout_read_view);
+		if (decode.units == 0) {
+			return false;
+		}
+
+		constexpr syntax_tree_member<CharT, ContextT>* SubTreeEnd = SubTreeBegin + SubTreeSize;
+		auto parser = std::lower_bound(SubTreeBegin, SubTreeEnd, decode.codepoint, &syntax_tree_member_compare<CharT>);
+		if (parser == SubTreeEnd || parser->first != decode.codepoint) {
+			return DefaultActionF(decode, inout_context, inout_read_view);
+		}
+
+		// This is a parsed sequence; pass it to the parser
+		inout_read_view.remove_prefix(decode.units);
+		return (parser->second)(inout_context, inout_read_view);
+	} };
+}
+
+template<typename CharT, typename ContextT, const syntax_tree<CharT, ContextT> SequenceTreeBegin, size_t SequenceTreeSize,
+	default_syntax_tree_action<CharT, ContextT> DefaultActionF = noop_action<CharT, ContextT>>
+constexpr bool apply_syntax_tree(ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) {
+	if (inout_read_view.empty()) {
+		// Nothing to parse
+		return true;
+	}
+
+	get_endpoint_result decode;
+	constexpr auto SubTreeEnd = SequenceTreeBegin + SequenceTreeSize;
+	while ((decode = decode_codepoint(inout_read_view)).units != 0) {
+		auto parser = std::lower_bound(SequenceTreeBegin, SubTreeEnd, decode.codepoint, &syntax_tree_member_compare<CharT, ContextT>);
+		if (parser == SubTreeEnd || parser->first != decode.codepoint) {
+			// Just a normal character; pass it to the default handler
+			if (!DefaultActionF(decode, inout_context, inout_read_view)) {
+				return false;
+			}
+
+			continue;
+		}
+
+		// This is a parsed sequence; pass it to the parser instead
+		inout_read_view.remove_prefix(decode.units);
+		if (!(parser->second)(inout_context, inout_read_view)) {
+			// Bad input received; give up
+			return false;
+		}
+	}
+
+	// We've finished parsing successfully
+	return true;
+}
+
+} // namespace jessilib
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index f6175e3..9aad6ee 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Setup source files
 set(SOURCE_FILES
-        timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp unicode_sequence.cpp)
+        timer.cpp thread_pool.cpp util.cpp object.cpp parser.cpp config.cpp parsers/json.cpp unicode.cpp app_parameters.cpp io/color.cpp duration.cpp split.cpp split_compilation.cpp word_split.cpp unicode_sequence.cpp http_query.cpp)
 
 # Setup gtest
 set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
diff --git a/src/test/http_query.cpp b/src/test/http_query.cpp
new file mode 100644
index 0000000..fd51b67
--- /dev/null
+++ b/src/test/http_query.cpp
@@ -0,0 +1,238 @@
+/**
+ * Copyright (C) 2021 Jessica James.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Written by Jessica James <jessica.aj@outlook.com>
+ */
+
+#include "jessilib/http_query.hpp"
+#include <charconv>
+#include "test.hpp"
+
+using namespace std::literals;
+
+// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
+#ifdef __cpp_lib_constexpr_string
+constexpr std::string query_constexpr(std::string_view in_expression) {
+	std::string result{ in_expression };
+	jessilib::deserialize_http_query(result);
+	return result;
+}
+static_assert(query_constexpr("test"s) == "test"s);
+static_assert(query_constexpr("first+second"s) == "first second"s);
+static_assert(query_constexpr("first%20second"s) == "first second"s);
+#endif // __cpp_lib_constexpr_string
+
+using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
+using utf8_char_types = ::testing::Types<char, char8_t>;
+
+template<typename T>
+class QuerySequenceTest : public ::testing::Test {
+public:
+};
+TYPED_TEST_SUITE(QuerySequenceTest, utf8_char_types);
+
+constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing
+
+TYPED_TEST(QuerySequenceTest, single_chars) {
+	// [U+0000, U+100FF)
+	for (char32_t codepoint = 0; codepoint < MAX_LOOP_CODEPOINT; ++codepoint) {
+		std::basic_string<TypeParam> expected;
+		size_t units = jessilib::encode_codepoint(expected, codepoint);
+		EXPECT_NE(units, 0);
+		EXPECT_EQ(units, expected.size());
+
+		// Construct the query string
+		std::basic_string<TypeParam> query_string;
+		for (auto& unit : expected) {
+			char encoded[3] { '%', 0, 0 };
+			char* encoded_end = encoded + sizeof(encoded);
+			auto to_chars_result = std::to_chars(encoded + 1, encoded_end, static_cast<unsigned char>(unit), 16);
+			ASSERT_EQ(to_chars_result.ec, std::errc{}) // assertion will fail when `unit` is signed type
+							<< "For unit " << static_cast<int>(unit) << " in codepoint " << static_cast<int>(codepoint) << std::endl;
+
+			if (to_chars_result.ptr != encoded_end) {
+				// Only wrote one hex; shift it
+				encoded[2] = encoded[1];
+				encoded[1] = '0';
+			}
+
+			EXPECT_EQ(encoded[0], '%');
+			EXPECT_NE(encoded[1], 0);
+			EXPECT_NE(encoded[2], 0);
+			query_string.insert(query_string.end(), encoded, encoded_end);
+		}
+		EXPECT_EQ(query_string.size(), expected.size() * 3);
+
+		// Decode & check the query string
+		jessilib::deserialize_http_query(query_string);
+		EXPECT_EQ(query_string, expected);
+	}
+}
+
+TYPED_TEST(QuerySequenceTest, invalids) {
+	std::basic_string<TypeParam> query_string, long_query_string;
+	for (size_t unit = 0; unit <= 0xFF; ++unit) {
+		TypeParam encoded[2] { '%', static_cast<TypeParam>(unit) };
+		TypeParam* encoded_end = encoded + sizeof(encoded);
+		query_string.insert(query_string.end(), encoded, encoded_end);
+
+		long_query_string += query_string;
+		jessilib::deserialize_http_query(query_string);
+		EXPECT_TRUE(query_string.empty())
+						<< "in unit: " << unit << std::endl;
+	}
+
+	jessilib::deserialize_http_query(long_query_string);
+	EXPECT_TRUE(long_query_string.empty());
+}
+
+TYPED_TEST(QuerySequenceTest, invalids_2len) {
+	std::basic_string<TypeParam> query_string, long_query_string;
+	for (size_t unit = 0; unit <= 0xFFFF; ++unit) {
+		TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
+		TypeParam second = static_cast<TypeParam>(unit & 0xFF);
+		if (jessilib::as_base(first, 16) >= 0
+			&& jessilib::as_base(second, 16) >= 0) {
+			continue;
+		}
+		TypeParam encoded[3] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second) };
+		TypeParam* encoded_end = encoded + sizeof(encoded);
+		query_string.insert(query_string.end(), encoded, encoded_end);
+
+		long_query_string += query_string;
+		jessilib::deserialize_http_query(query_string);
+		EXPECT_TRUE(query_string.empty())
+						<< "in unit: " << unit << std::endl;
+	}
+
+	jessilib::deserialize_http_query(long_query_string);
+	EXPECT_TRUE(long_query_string.empty());
+}
+
+TYPED_TEST(QuerySequenceTest, invalids_trailing) {
+	std::basic_string<TypeParam> query_string, long_query_string;
+	for (size_t unit = 0; unit <= 0xFF; ++unit) {
+		TypeParam encoded[3] { '%', static_cast<TypeParam>(unit), '%' };
+		TypeParam* encoded_end = encoded + sizeof(encoded);
+		query_string.insert(query_string.end(), encoded, encoded_end);
+
+		long_query_string += query_string;
+		jessilib::deserialize_http_query(query_string);
+		EXPECT_TRUE(query_string.empty())
+						<< "in unit: " << unit << std::endl;
+	}
+
+	jessilib::deserialize_http_query(long_query_string);
+	EXPECT_TRUE(long_query_string.empty());
+}
+
+TYPED_TEST(QuerySequenceTest, invalids_2len_trailing) {
+	std::basic_string<TypeParam> query_string, long_query_string;
+	for (size_t unit = 0; unit <= 0xFFFF; ++unit) {
+		TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
+		TypeParam second = static_cast<TypeParam>(unit & 0xFF);
+		if (jessilib::as_base(first, 16) >= 0
+			&& jessilib::as_base(second, 16) >= 0) {
+			continue;
+		}
+		TypeParam encoded[4] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second), '%' };
+		TypeParam* encoded_end = encoded + sizeof(encoded);
+		query_string.insert(query_string.end(), encoded, encoded_end);
+
+		long_query_string += query_string;
+		jessilib::deserialize_http_query(query_string);
+		EXPECT_TRUE(query_string.empty())
+						<< "in unit: " << unit << std::endl;
+	}
+
+	jessilib::deserialize_http_query(long_query_string);
+	EXPECT_TRUE(long_query_string.empty());
+}
+
+TEST(HtmlFormParser, empty) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text;
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.empty());
+	EXPECT_TRUE(parsed_result.empty());
+}
+
+TEST(HtmlFormParser, one_key) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "key";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_EQ(query_text, "key");
+	EXPECT_EQ(parsed_result.size(), 1);
+	EXPECT_EQ(parsed_result[0].first, query_text);
+	EXPECT_TRUE(parsed_result[0].second.empty());
+}
+
+TEST(HtmlFormParser, one_key_and_value) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "key=value";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.starts_with("keyvalue"));
+	EXPECT_EQ(parsed_result.size(), 1);
+	EXPECT_EQ(parsed_result[0].first, "key");
+	EXPECT_EQ(parsed_result[0].second, "value");
+}
+
+TEST(HtmlFormParser, one_key_and_value_trailing) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "key=value&";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.starts_with("keyvalue"));
+	EXPECT_EQ(parsed_result.size(), 2);
+	EXPECT_EQ(parsed_result[0].first, "key");
+	EXPECT_EQ(parsed_result[0].second, "value");
+	EXPECT_TRUE(parsed_result[1].first.empty());
+	EXPECT_TRUE(parsed_result[1].second.empty());
+}
+
+TEST(HtmlFormParser, two_key_one_value) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "key=value&second_key";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.starts_with("keyvaluesecond_key"));
+	EXPECT_EQ(parsed_result.size(), 2);
+	EXPECT_EQ(parsed_result[0].first, "key");
+	EXPECT_EQ(parsed_result[0].second, "value");
+	EXPECT_EQ(parsed_result[1].first, "second_key");
+	EXPECT_TRUE(parsed_result[1].second.empty());
+}
+
+TEST(HtmlFormParser, two_key_two_value) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "key=value&second_key=second=value";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.starts_with("keyvaluesecond_keysecond=value"));
+	EXPECT_EQ(parsed_result.size(), 2);
+	EXPECT_EQ(parsed_result[0].first, "key");
+	EXPECT_EQ(parsed_result[0].second, "value");
+	EXPECT_EQ(parsed_result[1].first, "second_key");
+	EXPECT_EQ(parsed_result[1].second, "second=value");
+}
+
+TEST(HtmlFormParser, some_sequences) {
+	std::vector<std::pair<std::string_view, std::string_view>> parsed_result;
+	std::string query_text = "k+y=va+u%20&%73econd%5Fke%79=second_valu%65";
+	EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text));
+	EXPECT_TRUE(query_text.starts_with("k yva u second_keysecond_value"));
+	EXPECT_EQ(parsed_result.size(), 2);
+	EXPECT_EQ(parsed_result[0].first, "k y");
+	EXPECT_EQ(parsed_result[0].second, "va u ");
+	EXPECT_EQ(parsed_result[1].first, "second_key");
+	EXPECT_EQ(parsed_result[1].second, "second_value");
+}
diff --git a/src/test/unicode.cpp b/src/test/unicode.cpp
index 859374d..077f729 100644
--- a/src/test/unicode.cpp
+++ b/src/test/unicode.cpp
@@ -25,6 +25,12 @@
 using namespace jessilib;
 using namespace std::literals;
 
+static_assert(codepoint_info<U'\n'>::utf8_length == 1);
+static_assert(codepoint_info<U'\n'>::utf16_length == 1);
+static_assert(codepoint_info<U'\n'>::utf32_length == 1);
+static_assert(codepoint_info<U'\n'>::wchar_length == 1);
+static_assert(codepoint_info<U'\n'>::encode_length<char8_t> == 1);
+
 /** encode_codepoint */
 
 TEST(UTF8Test, encode_codepoint) {
diff --git a/src/test/unicode_sequence.cpp b/src/test/unicode_sequence.cpp
index 7ab26c5..64fcedc 100644
--- a/src/test/unicode_sequence.cpp
+++ b/src/test/unicode_sequence.cpp
@@ -21,7 +21,7 @@
 #include "jessilib/unicode.hpp" // string_cast
 #include "test.hpp"
 
-using namespace std;
+using namespace std::literals;
 
 // Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
 #ifdef __cpp_lib_constexpr_string
@@ -30,17 +30,8 @@ constexpr std::string cpp_constexpr(std::string_view in_expression) {
 	jessilib::apply_cpp_escape_sequences(result);
 	return result;
 }
-
-constexpr std::string query_constexpr(std::string_view in_expression) {
-	std::string result{ in_expression };
-	jessilib::deserialize_http_query(result);
-	return result;
-}
 static_assert(cpp_constexpr("test"s) == "test"s);
 static_assert(cpp_constexpr("\\r\\n"s) == "\r\n"s);
-static_assert(query_constexpr("test"s) == "test"s);
-static_assert(query_constexpr("first+second"s) == "first second"s);
-static_assert(query_constexpr("first%20second"s) == "first second"s);
 #endif // __cpp_lib_constexpr_string
 
 using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>;
@@ -57,12 +48,6 @@ public:
 };
 TYPED_TEST_SUITE(UnicodeSequenceTest, char_types);
 
-template<typename T>
-class UnicodeUTF8SequenceTest : public ::testing::Test {
-public:
-};
-TYPED_TEST_SUITE(UnicodeUTF8SequenceTest, utf8_char_types);
-
 constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing
 
 #define TEST_CPP_SEQUENCE(expr) \
@@ -212,123 +197,3 @@ TYPED_TEST(UnicodeSequenceTest, cpp_u32) {
 		EXPECT_EQ(decode.codepoint, static_cast<char32_t>(codepoint));
 	}
 }
-
-/**
- * Query strings
- */
-
-TYPED_TEST(UnicodeUTF8SequenceTest, single_chars) {
-	// [U+0000, U+100FF)
-	for (char32_t codepoint = 0; codepoint < MAX_LOOP_CODEPOINT; ++codepoint) {
-		std::basic_string<TypeParam> expected;
-		size_t units = jessilib::encode_codepoint(expected, codepoint);
-		EXPECT_NE(units, 0);
-		EXPECT_EQ(units, expected.size());
-
-		// Construct the query string
-		std::basic_string<TypeParam> query_string;
-		for (auto& unit : expected) {
-			char encoded[3] { '%', 0, 0 };
-			char* encoded_end = encoded + sizeof(encoded);
-			auto to_chars_result = std::to_chars(encoded + 1, encoded_end, static_cast<unsigned char>(unit), 16);
-			ASSERT_EQ(to_chars_result.ec, std::errc{}) // assertion will fail when `unit` is signed type
-				<< "For unit " << static_cast<int>(unit) << " in codepoint " << static_cast<int>(codepoint) << std::endl;
-
-			if (to_chars_result.ptr != encoded_end) {
-				// Only wrote one hex; shift it
-				encoded[2] = encoded[1];
-				encoded[1] = '0';
-			}
-
-			EXPECT_EQ(encoded[0], '%');
-			EXPECT_NE(encoded[1], 0);
-			EXPECT_NE(encoded[2], 0);
-			query_string.insert(query_string.end(), encoded, encoded_end);
-		}
-		EXPECT_EQ(query_string.size(), expected.size() * 3);
-
-		// Decode & check the query string
-		jessilib::deserialize_http_query(query_string);
-		EXPECT_EQ(query_string, expected);
-	}
-}
-
-TYPED_TEST(UnicodeUTF8SequenceTest, invalids) {
-	std::basic_string<TypeParam> query_string, long_query_string;
-	for (size_t unit = 0; unit <= 0xFF; ++unit) {
-		TypeParam encoded[2] { '%', static_cast<TypeParam>(unit) };
-		TypeParam* encoded_end = encoded + sizeof(encoded);
-		query_string.insert(query_string.end(), encoded, encoded_end);
-
-		long_query_string += query_string;
-		jessilib::deserialize_http_query(query_string);
-		EXPECT_TRUE(query_string.empty())
-			<< "in unit: " << unit << std::endl;
-	}
-
-	jessilib::deserialize_http_query(long_query_string);
-	EXPECT_TRUE(long_query_string.empty());
-}
-
-TYPED_TEST(UnicodeUTF8SequenceTest, invalids_2len) {
-	std::basic_string<TypeParam> query_string, long_query_string;
-	for (size_t unit = 0; unit <= 0xFFFF; ++unit) {
-		TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
-		TypeParam second = static_cast<TypeParam>(unit & 0xFF);
-		if (jessilib::as_base(first, 16) >= 0
-			&& jessilib::as_base(second, 16) >= 0) {
-			continue;
-		}
-		TypeParam encoded[3] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second) };
-		TypeParam* encoded_end = encoded + sizeof(encoded);
-		query_string.insert(query_string.end(), encoded, encoded_end);
-
-		long_query_string += query_string;
-		jessilib::deserialize_http_query(query_string);
-		EXPECT_TRUE(query_string.empty())
-						<< "in unit: " << unit << std::endl;
-	}
-
-	jessilib::deserialize_http_query(long_query_string);
-	EXPECT_TRUE(long_query_string.empty());
-}
-
-TYPED_TEST(UnicodeUTF8SequenceTest, invalids_trailing) {
-	std::basic_string<TypeParam> query_string, long_query_string;
-	for (size_t unit = 0; unit <= 0xFF; ++unit) {
-		TypeParam encoded[3] { '%', static_cast<TypeParam>(unit), '%' };
-		TypeParam* encoded_end = encoded + sizeof(encoded);
-		query_string.insert(query_string.end(), encoded, encoded_end);
-
-		long_query_string += query_string;
-		jessilib::deserialize_http_query(query_string);
-		EXPECT_TRUE(query_string.empty())
-						<< "in unit: " << unit << std::endl;
-	}
-
-	jessilib::deserialize_http_query(long_query_string);
-	EXPECT_TRUE(long_query_string.empty());
-}
-
-TYPED_TEST(UnicodeUTF8SequenceTest, invalids_2len_trailing) {
-	std::basic_string<TypeParam> query_string, long_query_string;
-	for (size_t unit = 0; unit <= 0xFFFF; ++unit) {
-		TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
-		TypeParam second = static_cast<TypeParam>(unit & 0xFF);
-		if (jessilib::as_base(first, 16) >= 0
-			&& jessilib::as_base(second, 16) >= 0) {
-			continue;
-		}
-		TypeParam encoded[4] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second), '%' };
-		TypeParam* encoded_end = encoded + sizeof(encoded);
-		query_string.insert(query_string.end(), encoded, encoded_end);
-
-		long_query_string += query_string;
-		jessilib::deserialize_http_query(query_string);
-		EXPECT_TRUE(query_string.empty())
-						<< "in unit: " << unit << std::endl;
-	}
-
-	jessilib::deserialize_http_query(long_query_string);
-	EXPECT_TRUE(long_query_string.empty());
-}