mirror of https://github.com/JAJames/jessilib.git
Jessica James
3 years ago
12 changed files with 1281 additions and 651 deletions
@ -0,0 +1,190 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
/**
|
||||
|
* @file unicode_sequence.hpp |
||||
|
* @author Jessica James |
||||
|
* |
||||
|
* Unicode-aware syntax tree parsing utilities |
||||
|
*/ |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "unicode_syntax.hpp" |
||||
|
#include "unicode_sequence.hpp" |
||||
|
#include "type_traits.hpp" |
||||
|
|
||||
|
namespace jessilib { |
||||
|
|
||||
|
/**
|
||||
|
* Query string escape sequence parser |
||||
|
*/ |
||||
|
|
||||
|
template<typename CharT, |
||||
|
std::enable_if_t<sizeof(CharT) == 1>* = nullptr> // make_hex_sequence_pair isn't going to play well with other types
|
||||
|
static constexpr shrink_sequence_tree<CharT> http_query_escapes_root_tree{ |
||||
|
make_hex_sequence_pair<CharT, U'%', 2, true, false>(), |
||||
|
make_simple_sequence_pair<CharT, U'+', ' '>() |
||||
|
}; |
||||
|
static_assert(is_sorted<char, http_query_escapes_root_tree<char>, std::size(http_query_escapes_root_tree<char>)>(), "Tree must be pre-sorted"); |
||||
|
static_assert(is_sorted<char8_t, http_query_escapes_root_tree<char8_t>, std::size(http_query_escapes_root_tree<char8_t>)>(), "Tree must be pre-sorted"); |
||||
|
|
||||
|
template<typename CharT, |
||||
|
std::enable_if_t<sizeof(CharT) == 1>* = nullptr> |
||||
|
constexpr bool deserialize_http_query(std::basic_string<CharT>& inout_string) { |
||||
|
return apply_shrink_sequence_tree<CharT, http_query_escapes_root_tree<CharT>, std::size(http_query_escapes_root_tree<CharT>)>(inout_string); |
||||
|
} |
||||
|
|
||||
|
// TODO: decide whether to take this approach, where query strings are assumed to represent UTF-8 text data, OR implement
|
||||
|
// such that calling deserialize_http_query will assume the relevant encoding (i.e: calling with char16_t would read in
|
||||
|
// escaped query values as bytes in codepoint char16_t, rather than utf-8 encoding sequence)
|
||||
|
/*template<typename CharT,
|
||||
|
std::enable_if_t<sizeof(CharT) != 1>* = nullptr> |
||||
|
bool deserialize_http_query(std::basic_string<CharT>& inout_string) { |
||||
|
//TODO: optimize this?
|
||||
|
std::basic_string<char8_t> u8query_string = string_cast<char8_t>(inout_string); |
||||
|
bool result = deserialize_http_query<char8_t>(u8query_string); |
||||
|
inout_string = string_cast<CharT>(u8query_string); |
||||
|
return result; |
||||
|
}*/ |
||||
|
|
||||
|
/**
|
||||
|
* HTML form parser |
||||
|
*/ |
||||
|
|
||||
|
template<typename CharT, typename ContainerT> |
||||
|
struct HTMLFormContext { |
||||
|
using container_type = ContainerT; |
||||
|
container_type& out_container; |
||||
|
CharT* write_head; |
||||
|
const CharT* key_start = write_head; |
||||
|
const CharT* value_start{}; // value_start is also key_end
|
||||
|
}; |
||||
|
|
||||
|
template<typename CharT, typename ContextT, char32_t InCodepointV> |
||||
|
constexpr syntax_tree_member<CharT, ContextT> make_value_start_pair() { |
||||
|
// '='
|
||||
|
return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>&) constexpr { |
||||
|
if (inout_context.value_start != nullptr) { |
||||
|
// There's already a value pending; this must just be part of the value.
|
||||
|
inout_context.write_head += encode_codepoint(inout_context.write_head, InCodepointV); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// Start pending_value
|
||||
|
inout_context.value_start = inout_context.write_head; |
||||
|
return true; |
||||
|
} }; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
constexpr bool value_end_action(ContextT& inout_context, std::basic_string_view<CharT>&) { |
||||
|
const CharT* value_end = inout_context.write_head; |
||||
|
const CharT* key_start = inout_context.key_start; |
||||
|
const CharT* value_start = inout_context.value_start; |
||||
|
if (value_start != nullptr) { |
||||
|
// Terminate key & value, push them to table
|
||||
|
std::basic_string_view<CharT> key{ key_start, static_cast<size_t>(value_start - key_start) }; |
||||
|
std::basic_string_view<CharT> value{ value_start, static_cast<size_t>(value_end - value_start) }; |
||||
|
jessilib::container::push(inout_context.out_container, key, value); |
||||
|
|
||||
|
// Start reading next key
|
||||
|
inout_context.key_start = value_end; |
||||
|
inout_context.value_start = nullptr; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// This is a valueless key; terminate the key and push it
|
||||
|
std::basic_string_view<CharT> key{ key_start, static_cast<size_t>(value_end - key_start) }; |
||||
|
jessilib::container::push(inout_context.out_container, key, std::basic_string_view<CharT>{}); |
||||
|
|
||||
|
// Start reading next key
|
||||
|
inout_context.key_start = value_end; |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT, char32_t InCodepointV> |
||||
|
constexpr syntax_tree_member<CharT, ContextT> make_value_end_pair() { |
||||
|
// '&'
|
||||
|
return { InCodepointV, value_end_action<CharT, ContextT> }; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT, char32_t InCodepointV, size_t MaxDigitsV, bool ExactDigitsV, bool IsUnicode> |
||||
|
constexpr syntax_tree_member<CharT, ContextT> make_hex_syntax_shrink_pair() { |
||||
|
return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) constexpr { |
||||
|
return hex_shrink_sequence_action<CharT, MaxDigitsV, ExactDigitsV, IsUnicode>(inout_context.write_head, inout_read_view); |
||||
|
} }; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT, char32_t InCodepointV, char8_t OutCodepointV> |
||||
|
constexpr syntax_tree_member<CharT, ContextT> make_simple_shrink_pair() { |
||||
|
return { |
||||
|
InCodepointV, |
||||
|
[](ContextT& inout_context, std::basic_string_view<CharT>&) constexpr { |
||||
|
*inout_context.write_head = static_cast<CharT>(OutCodepointV); |
||||
|
++inout_context.write_head; |
||||
|
return true; |
||||
|
} |
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
bool html_form_default_action(get_endpoint_result decode, ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) { |
||||
|
// A regular character; copy it and advance the read/write heads
|
||||
|
CharT*& write_head = inout_context.write_head; |
||||
|
CharT* write_end = write_head + decode.units; |
||||
|
while (write_head != write_end) { |
||||
|
*write_head = inout_read_view.front(); |
||||
|
++write_head; |
||||
|
inout_read_view.remove_prefix(1); |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContainerT, |
||||
|
std::enable_if_t<sizeof(CharT) == 1>* = nullptr> // make_hex_sequence_pair isn't going to play well with other types
|
||||
|
static constexpr syntax_tree<CharT, HTMLFormContext<CharT, ContainerT>> html_form_root_tree{ |
||||
|
make_hex_syntax_shrink_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'%', 2, true, false>(), |
||||
|
make_value_end_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'&'>(), |
||||
|
make_simple_shrink_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'+', ' '>(), |
||||
|
make_value_start_pair<CharT, HTMLFormContext<CharT, ContainerT>, U'='>() |
||||
|
}; |
||||
|
|
||||
|
template<typename CharT, typename ContainerT, |
||||
|
std::enable_if_t<sizeof(CharT) == 1>* = nullptr> |
||||
|
constexpr bool deserialize_html_form(ContainerT& out_container, std::basic_string<CharT>& inout_string) { |
||||
|
if (inout_string.empty()) { |
||||
|
return true; // even though apply_syntax_tree checks for this, check it here anyways so we don't call value_end_action
|
||||
|
} |
||||
|
|
||||
|
HTMLFormContext<CharT, ContainerT> context{ out_container, inout_string.data() }; |
||||
|
constexpr auto& html_form_tree = html_form_root_tree<CharT, ContainerT>; |
||||
|
static_assert(is_sorted<char, decltype(context), html_form_tree, std::size(html_form_tree)>(), "Tree must be pre-sorted"); |
||||
|
|
||||
|
std::basic_string_view<CharT> read_view{ inout_string }; |
||||
|
if (apply_syntax_tree<CharT, decltype(context), html_form_tree, std::size(html_form_tree), html_form_default_action> |
||||
|
(context, read_view)) { |
||||
|
value_end_action<CharT, decltype(context)>(context, read_view); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
} // namespace jessilib
|
@ -0,0 +1,430 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
/**
|
||||
|
* @file unicode_hash.hpp |
||||
|
* @author Jessica James |
||||
|
* |
||||
|
* Unicode-aware encoding-agnostic string comparison & hashing utilities |
||||
|
*/ |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "unicode_base.hpp" |
||||
|
|
||||
|
namespace jessilib { |
||||
|
|
||||
|
// Should just make these methods container-type agnostic rather than this mess...
|
||||
|
#define ADAPT_BASIC_STRING(method) \ |
||||
|
template<typename LhsCharT, typename RhsCharT> \ |
||||
|
auto method(const std::basic_string<LhsCharT>& lhs, std::basic_string_view<RhsCharT> rhs) { \ |
||||
|
return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), rhs); } \ |
||||
|
template<typename LhsCharT, typename RhsCharT> \ |
||||
|
auto method(std::basic_string_view<LhsCharT> lhs, const std::basic_string<RhsCharT>& rhs) { \ |
||||
|
return method(lhs, static_cast<std::basic_string_view<RhsCharT>>(rhs)); } \ |
||||
|
template<typename LhsCharT, typename RhsCharT> \ |
||||
|
auto method(const std::basic_string<LhsCharT>& lhs, const std::basic_string<RhsCharT>& rhs) { \ |
||||
|
return method(static_cast<std::basic_string_view<LhsCharT>>(lhs), static_cast<std::basic_string_view<RhsCharT>>(rhs)); } |
||||
|
|
||||
|
/**
|
||||
|
* Checks if two strings are equal |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for left-hand parameter |
||||
|
* @tparam RhsCharT Character type for right-hand parameter |
||||
|
* @param lhs First string to compare |
||||
|
* @param rhs Second string to compare against |
||||
|
* @return True if the strings are equal, false otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool equals(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) { |
||||
|
// If lhs and rhs are the same type, compare their sizes and quickly return if not same
|
||||
|
if constexpr (std::is_same_v<LhsCharT, RhsCharT>) { |
||||
|
return lhs == rhs; |
||||
|
} |
||||
|
|
||||
|
while (!lhs.empty() && !rhs.empty()) { |
||||
|
auto lhs_front = decode_codepoint(lhs); |
||||
|
auto rhs_front = decode_codepoint(rhs); |
||||
|
|
||||
|
if (lhs_front.units == 0 |
||||
|
|| rhs_front.units == 0) { |
||||
|
// Failed to decode front codepoint; bad unicode sequence
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if (lhs_front.codepoint != rhs_front.codepoint) { |
||||
|
// Codepoints aren't the same
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// Codepoints are equal; trim off the fronts and continue
|
||||
|
lhs.remove_prefix(lhs_front.units); |
||||
|
rhs.remove_prefix(rhs_front.units); |
||||
|
} |
||||
|
|
||||
|
return lhs.empty() && rhs.empty(); |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(equals) |
||||
|
|
||||
|
/**
|
||||
|
* Checks if two strings are equal (case insensitive) |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for left-hand parameter |
||||
|
* @tparam RhsCharT Character type for right-hand parameter |
||||
|
* @param lhs First string to compare |
||||
|
* @param rhs Second string to compare against |
||||
|
* @return True if the strings are equal, false otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool equalsi(std::basic_string_view<LhsCharT> lhs, std::basic_string_view<RhsCharT> rhs) { |
||||
|
// If lhs and rhs are the same type, compare their sizes and quickly return if not same
|
||||
|
if constexpr (std::is_same_v<LhsCharT, RhsCharT>) { |
||||
|
if (lhs.size() != rhs.size()) { |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
while (!lhs.empty() && !rhs.empty()) { |
||||
|
auto lhs_front = decode_codepoint(lhs); |
||||
|
auto rhs_front = decode_codepoint(rhs); |
||||
|
|
||||
|
if (lhs_front.units == 0 |
||||
|
|| rhs_front.units == 0) { |
||||
|
// Failed to decode front codepoint; bad unicode sequence
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if (!equalsi(lhs_front.codepoint, rhs_front.codepoint)) { |
||||
|
// Codepoints don't fold to same value
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// Codepoints are equal; trim off the fronts and continue
|
||||
|
lhs.remove_prefix(lhs_front.units); |
||||
|
rhs.remove_prefix(rhs_front.units); |
||||
|
} |
||||
|
|
||||
|
return lhs.empty() && rhs.empty(); |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(equalsi) |
||||
|
|
||||
|
/**
|
||||
|
* Checks if a string starts with a substring |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for underlying string |
||||
|
* @tparam RhsCharT Character type for prefix string |
||||
|
* @param in_string String to check for prefix |
||||
|
* @param in_prefix Substring prefix to check for |
||||
|
* @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
size_t starts_with_length(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) { |
||||
|
// If in_string and in_prefix are the same type, compare their sizes and quickly return if in_string is too small
|
||||
|
if constexpr (std::is_same_v<LhsCharT, RhsCharT>) { |
||||
|
if (in_string.size() < in_prefix.size()) { |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
size_t codepoints_removed{}; |
||||
|
while (!in_string.empty() && !in_prefix.empty()) { |
||||
|
get_endpoint_result string_front = decode_codepoint(in_string); |
||||
|
get_endpoint_result prefix_front = decode_codepoint(in_prefix); |
||||
|
|
||||
|
if (string_front.units == 0 |
||||
|
|| prefix_front.units == 0) { |
||||
|
// Failed to decode front codepoint; bad unicode sequence
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
if (string_front.codepoint != prefix_front.codepoint) { |
||||
|
// Codepoints aren't the same
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// Codepoints are equal; trim off the fronts and continue
|
||||
|
in_string.remove_prefix(string_front.units); |
||||
|
in_prefix.remove_prefix(prefix_front.units); |
||||
|
codepoints_removed += string_front.units; |
||||
|
} |
||||
|
|
||||
|
if (!in_prefix.empty()) { |
||||
|
// We reached end of in_string before end of prefix
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
return codepoints_removed; |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(starts_with_length) |
||||
|
|
||||
|
/**
|
||||
|
* Checks if a string starts with a substring (case insensitive) |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for underlying string |
||||
|
* @tparam RhsCharT Character type for prefix string |
||||
|
* @param in_string String to check for prefix |
||||
|
* @param in_prefix Substring prefix to check for |
||||
|
* @return Data length of in_prefix in terms of LhsCharT if in_string starts with in_prefix, 0 otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
size_t starts_with_lengthi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) { |
||||
|
// If in_string and in_prefix are the same type, skip decoding each point
|
||||
|
if constexpr (std::is_same_v<LhsCharT, RhsCharT>) { |
||||
|
if (in_string.size() < in_prefix.size()) { |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
size_t codepoints_removed{}; |
||||
|
while (!in_string.empty() && !in_prefix.empty()) { |
||||
|
get_endpoint_result string_front = decode_codepoint(in_string); |
||||
|
get_endpoint_result prefix_front = decode_codepoint(in_prefix); |
||||
|
|
||||
|
if (string_front.units == 0 |
||||
|
|| prefix_front.units == 0) { |
||||
|
// Failed to decode front codepoint; bad unicode sequence
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
if (!equalsi(string_front.codepoint, prefix_front.codepoint)) { |
||||
|
// Codepoints don't fold to same value
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// Codepoints are equal; trim off the fronts and continue
|
||||
|
in_string.remove_prefix(string_front.units); |
||||
|
in_prefix.remove_prefix(prefix_front.units); |
||||
|
codepoints_removed += string_front.units; |
||||
|
} |
||||
|
|
||||
|
if (!in_prefix.empty()) { |
||||
|
// We reached end of in_string before end of prefix
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
return codepoints_removed; |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(starts_with_lengthi) |
||||
|
|
||||
|
/**
|
||||
|
* Checks if a string starts with a substring |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for underlying string |
||||
|
* @tparam RhsCharT Character type for prefix string |
||||
|
* @param in_string String to check for prefix |
||||
|
* @param in_prefix Prefix to check for |
||||
|
* @return True if both strings are valid and in_string starts with in_prefix, false otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool starts_with(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) { |
||||
|
return starts_with_length<LhsCharT, RhsCharT>(in_string, in_prefix) != 0; |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(starts_with) |
||||
|
|
||||
|
/**
|
||||
|
* Checks if a string starts with a substring (case insensitive) |
||||
|
* |
||||
|
* @tparam LhsCharT Character type for underlying string |
||||
|
* @tparam RhsCharT Character type for prefix string |
||||
|
* @param in_string String to check for prefix |
||||
|
* @param in_prefix Prefix to check for |
||||
|
* @return True if both strings are valid and in_string starts with in_prefix, false otherwise |
||||
|
*/ |
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool starts_withi(std::basic_string_view<LhsCharT> in_string, std::basic_string_view<RhsCharT> in_prefix) { |
||||
|
return starts_with_lengthi<LhsCharT, RhsCharT>(in_string, in_prefix) != 0; |
||||
|
} |
||||
|
|
||||
|
ADAPT_BASIC_STRING(starts_withi) |
||||
|
|
||||
|
/**
|
||||
|
* Calculates the hash of a string based on its codepoints, such that a unicode string will always produce the same hash |
||||
|
* regardless of underlying encoding |
||||
|
* |
||||
|
* This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text |
||||
|
*/ |
||||
|
struct text_hash { |
||||
|
using is_transparent = std::true_type; |
||||
|
|
||||
|
template<typename CharT> |
||||
|
static uint64_t hash(const CharT* data, const CharT* end) { |
||||
|
uint64_t hash = 14695981039346656037ULL; |
||||
|
|
||||
|
get_endpoint_result decode; |
||||
|
while (data != end) { |
||||
|
decode = decode_codepoint(data, end); |
||||
|
if (decode.units == 0) { |
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
hash = hash ^ decode.codepoint; |
||||
|
hash = hash * 1099511628211ULL; |
||||
|
data += decode.units; |
||||
|
} |
||||
|
|
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char8_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char16_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char32_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
struct text_equal { |
||||
|
using is_transparent = std::true_type; |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { |
||||
|
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { |
||||
|
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { |
||||
|
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { |
||||
|
return equals<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
/**
|
||||
|
* Calculates the hash of a string based on its folded codepoints, such that a unicode string will always produce the |
||||
|
* same hash regardless of underlying encoding or the casing of its values. |
||||
|
* |
||||
|
* This is not intended for generating hashses of arbitrary data; it's specifically intended for strings of text |
||||
|
*/ |
||||
|
struct text_hashi { |
||||
|
using is_transparent = std::true_type; |
||||
|
|
||||
|
template<typename CharT> |
||||
|
static uint64_t hash(const CharT* data, const CharT* end) { |
||||
|
uint64_t hash = 14695981039346656037ULL; |
||||
|
|
||||
|
get_endpoint_result decode; |
||||
|
while (data != end) { |
||||
|
decode = decode_codepoint(data, end - data); |
||||
|
if (decode.units == 0) { |
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
hash = hash ^ fold(decode.codepoint); |
||||
|
hash = hash * 1099511628211ULL; |
||||
|
data += decode.units; |
||||
|
} |
||||
|
|
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char8_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char8_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char16_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char16_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(const std::basic_string<char32_t>& in_key) const noexcept { // ASSUMES UTF-8
|
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
|
||||
|
auto operator()(std::basic_string_view<char32_t> in_key) const noexcept { |
||||
|
return hash(in_key.data(), in_key.data() + in_key.size()); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
struct text_equali { |
||||
|
using is_transparent = std::true_type; |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(std::basic_string_view<LhsCharT> in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { |
||||
|
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(std::basic_string_view<LhsCharT> in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { |
||||
|
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(const std::basic_string<LhsCharT>& in_lhs, std::basic_string_view<RhsCharT> in_rhs) const noexcept { |
||||
|
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
|
||||
|
template<typename LhsCharT, typename RhsCharT> |
||||
|
bool operator()(const std::basic_string<LhsCharT>& in_lhs, const std::basic_string<RhsCharT>& in_rhs) const noexcept { |
||||
|
return equalsi<LhsCharT, RhsCharT>(in_lhs, in_rhs); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
} // namespace jessilib
|
@ -0,0 +1,139 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
/**
|
||||
|
* @file unicode_sequence.hpp |
||||
|
* @author Jessica James |
||||
|
* |
||||
|
* Unicode-aware syntax tree parsing utilities |
||||
|
*/ |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "unicode_base.hpp" |
||||
|
|
||||
|
namespace jessilib { |
||||
|
|
||||
|
/**
|
||||
|
* Syntax tree; move this to another file later |
||||
|
*/ |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
using syntax_tree_action = bool(*)(ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view); |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
using default_syntax_tree_action = bool(*)(get_endpoint_result in_codepoint, ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view); |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
using syntax_tree = const std::pair<char32_t, syntax_tree_action<CharT, ContextT>>[]; |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
using syntax_tree_member = const std::pair<char32_t, syntax_tree_action<CharT, ContextT>>; |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
constexpr bool syntax_tree_member_compare(const syntax_tree_member<CharT, ContextT>& in_lhs, const char32_t in_rhs) { |
||||
|
return in_lhs.first < in_rhs; |
||||
|
} |
||||
|
|
||||
|
// Lessers on left
|
||||
|
template<typename CharT, typename ContextT, const syntax_tree<CharT, ContextT> TreeBegin, size_t TreeSize> |
||||
|
constexpr bool is_sorted() { |
||||
|
auto head = TreeBegin; |
||||
|
constexpr auto end = TreeBegin + TreeSize; |
||||
|
|
||||
|
if (head == end) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
while (head + 1 != end) { |
||||
|
const auto next = head + 1; |
||||
|
if (head->first > next->first) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
++head; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
bool fail_action(get_endpoint_result, ContextT&, std::basic_string_view<CharT>&) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT> |
||||
|
bool noop_action(get_endpoint_result decode, ContextT&, std::basic_string_view<CharT>& inout_read_view) { |
||||
|
inout_read_view.remove_prefix(decode.units); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT, char32_t InCodepointV, const syntax_tree<CharT, ContextT> SubTreeBegin, size_t SubTreeSize, default_syntax_tree_action<CharT, ContextT> DefaultActionF = fail_action<CharT, ContextT>> |
||||
|
constexpr syntax_tree_member<CharT, ContextT> make_tree_pair() { |
||||
|
return { InCodepointV, [](ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) constexpr { |
||||
|
auto decode = decode_codepoint(inout_read_view); |
||||
|
if (decode.units == 0) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
constexpr syntax_tree_member<CharT, ContextT>* SubTreeEnd = SubTreeBegin + SubTreeSize; |
||||
|
auto parser = std::lower_bound(SubTreeBegin, SubTreeEnd, decode.codepoint, &syntax_tree_member_compare<CharT>); |
||||
|
if (parser == SubTreeEnd || parser->first != decode.codepoint) { |
||||
|
return DefaultActionF(decode, inout_context, inout_read_view); |
||||
|
} |
||||
|
|
||||
|
// This is a parsed sequence; pass it to the parser
|
||||
|
inout_read_view.remove_prefix(decode.units); |
||||
|
return (parser->second)(inout_context, inout_read_view); |
||||
|
} }; |
||||
|
} |
||||
|
|
||||
|
template<typename CharT, typename ContextT, const syntax_tree<CharT, ContextT> SequenceTreeBegin, size_t SequenceTreeSize, |
||||
|
default_syntax_tree_action<CharT, ContextT> DefaultActionF = noop_action<CharT, ContextT>> |
||||
|
constexpr bool apply_syntax_tree(ContextT& inout_context, std::basic_string_view<CharT>& inout_read_view) { |
||||
|
if (inout_read_view.empty()) { |
||||
|
// Nothing to parse
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
get_endpoint_result decode; |
||||
|
constexpr auto SubTreeEnd = SequenceTreeBegin + SequenceTreeSize; |
||||
|
while ((decode = decode_codepoint(inout_read_view)).units != 0) { |
||||
|
auto parser = std::lower_bound(SequenceTreeBegin, SubTreeEnd, decode.codepoint, &syntax_tree_member_compare<CharT, ContextT>); |
||||
|
if (parser == SubTreeEnd || parser->first != decode.codepoint) { |
||||
|
// Just a normal character; pass it to the default handler
|
||||
|
if (!DefaultActionF(decode, inout_context, inout_read_view)) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
// This is a parsed sequence; pass it to the parser instead
|
||||
|
inout_read_view.remove_prefix(decode.units); |
||||
|
if (!(parser->second)(inout_context, inout_read_view)) { |
||||
|
// Bad input received; give up
|
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// We've finished parsing successfully
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
} // namespace jessilib
|
@ -0,0 +1,238 @@ |
|||||
|
/**
|
||||
|
* Copyright (C) 2021 Jessica James. |
||||
|
* |
||||
|
* Permission to use, copy, modify, and/or distribute this software for any |
||||
|
* purpose with or without fee is hereby granted, provided that the above |
||||
|
* copyright notice and this permission notice appear in all copies. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
* |
||||
|
* Written by Jessica James <jessica.aj@outlook.com> |
||||
|
*/ |
||||
|
|
||||
|
#include "jessilib/http_query.hpp" |
||||
|
#include <charconv> |
||||
|
#include "test.hpp" |
||||
|
|
||||
|
using namespace std::literals; |
||||
|
|
||||
|
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
|
||||
|
#ifdef __cpp_lib_constexpr_string |
||||
|
constexpr std::string query_constexpr(std::string_view in_expression) { |
||||
|
std::string result{ in_expression }; |
||||
|
jessilib::deserialize_http_query(result); |
||||
|
return result; |
||||
|
} |
||||
|
static_assert(query_constexpr("test"s) == "test"s); |
||||
|
static_assert(query_constexpr("first+second"s) == "first second"s); |
||||
|
static_assert(query_constexpr("first%20second"s) == "first second"s); |
||||
|
#endif // __cpp_lib_constexpr_string
|
||||
|
|
||||
|
using char_types = ::testing::Types<char, char8_t, char16_t, char32_t>; |
||||
|
using utf8_char_types = ::testing::Types<char, char8_t>; |
||||
|
|
||||
|
template<typename T> |
||||
|
class QuerySequenceTest : public ::testing::Test { |
||||
|
public: |
||||
|
}; |
||||
|
TYPED_TEST_SUITE(QuerySequenceTest, utf8_char_types); |
||||
|
|
||||
|
constexpr char32_t MAX_LOOP_CODEPOINT = 0x100FF; // use 0x10FFFF for full testing
|
||||
|
|
||||
|
TYPED_TEST(QuerySequenceTest, single_chars) { |
||||
|
// [U+0000, U+100FF)
|
||||
|
for (char32_t codepoint = 0; codepoint < MAX_LOOP_CODEPOINT; ++codepoint) { |
||||
|
std::basic_string<TypeParam> expected; |
||||
|
size_t units = jessilib::encode_codepoint(expected, codepoint); |
||||
|
EXPECT_NE(units, 0); |
||||
|
EXPECT_EQ(units, expected.size()); |
||||
|
|
||||
|
// Construct the query string
|
||||
|
std::basic_string<TypeParam> query_string; |
||||
|
for (auto& unit : expected) { |
||||
|
char encoded[3] { '%', 0, 0 }; |
||||
|
char* encoded_end = encoded + sizeof(encoded); |
||||
|
auto to_chars_result = std::to_chars(encoded + 1, encoded_end, static_cast<unsigned char>(unit), 16); |
||||
|
ASSERT_EQ(to_chars_result.ec, std::errc{}) // assertion will fail when `unit` is signed type
|
||||
|
<< "For unit " << static_cast<int>(unit) << " in codepoint " << static_cast<int>(codepoint) << std::endl; |
||||
|
|
||||
|
if (to_chars_result.ptr != encoded_end) { |
||||
|
// Only wrote one hex; shift it
|
||||
|
encoded[2] = encoded[1]; |
||||
|
encoded[1] = '0'; |
||||
|
} |
||||
|
|
||||
|
EXPECT_EQ(encoded[0], '%'); |
||||
|
EXPECT_NE(encoded[1], 0); |
||||
|
EXPECT_NE(encoded[2], 0); |
||||
|
query_string.insert(query_string.end(), encoded, encoded_end); |
||||
|
} |
||||
|
EXPECT_EQ(query_string.size(), expected.size() * 3); |
||||
|
|
||||
|
// Decode & check the query string
|
||||
|
jessilib::deserialize_http_query(query_string); |
||||
|
EXPECT_EQ(query_string, expected); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(QuerySequenceTest, invalids) { |
||||
|
std::basic_string<TypeParam> query_string, long_query_string; |
||||
|
for (size_t unit = 0; unit <= 0xFF; ++unit) { |
||||
|
TypeParam encoded[2] { '%', static_cast<TypeParam>(unit) }; |
||||
|
TypeParam* encoded_end = encoded + sizeof(encoded); |
||||
|
query_string.insert(query_string.end(), encoded, encoded_end); |
||||
|
|
||||
|
long_query_string += query_string; |
||||
|
jessilib::deserialize_http_query(query_string); |
||||
|
EXPECT_TRUE(query_string.empty()) |
||||
|
<< "in unit: " << unit << std::endl; |
||||
|
} |
||||
|
|
||||
|
jessilib::deserialize_http_query(long_query_string); |
||||
|
EXPECT_TRUE(long_query_string.empty()); |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(QuerySequenceTest, invalids_2len) { |
||||
|
std::basic_string<TypeParam> query_string, long_query_string; |
||||
|
for (size_t unit = 0; unit <= 0xFFFF; ++unit) { |
||||
|
TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
|
||||
|
TypeParam second = static_cast<TypeParam>(unit & 0xFF); |
||||
|
if (jessilib::as_base(first, 16) >= 0 |
||||
|
&& jessilib::as_base(second, 16) >= 0) { |
||||
|
continue; |
||||
|
} |
||||
|
TypeParam encoded[3] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second) }; |
||||
|
TypeParam* encoded_end = encoded + sizeof(encoded); |
||||
|
query_string.insert(query_string.end(), encoded, encoded_end); |
||||
|
|
||||
|
long_query_string += query_string; |
||||
|
jessilib::deserialize_http_query(query_string); |
||||
|
EXPECT_TRUE(query_string.empty()) |
||||
|
<< "in unit: " << unit << std::endl; |
||||
|
} |
||||
|
|
||||
|
jessilib::deserialize_http_query(long_query_string); |
||||
|
EXPECT_TRUE(long_query_string.empty()); |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(QuerySequenceTest, invalids_trailing) { |
||||
|
std::basic_string<TypeParam> query_string, long_query_string; |
||||
|
for (size_t unit = 0; unit <= 0xFF; ++unit) { |
||||
|
TypeParam encoded[3] { '%', static_cast<TypeParam>(unit), '%' }; |
||||
|
TypeParam* encoded_end = encoded + sizeof(encoded); |
||||
|
query_string.insert(query_string.end(), encoded, encoded_end); |
||||
|
|
||||
|
long_query_string += query_string; |
||||
|
jessilib::deserialize_http_query(query_string); |
||||
|
EXPECT_TRUE(query_string.empty()) |
||||
|
<< "in unit: " << unit << std::endl; |
||||
|
} |
||||
|
|
||||
|
jessilib::deserialize_http_query(long_query_string); |
||||
|
EXPECT_TRUE(long_query_string.empty()); |
||||
|
} |
||||
|
|
||||
|
TYPED_TEST(QuerySequenceTest, invalids_2len_trailing) { |
||||
|
std::basic_string<TypeParam> query_string, long_query_string; |
||||
|
for (size_t unit = 0; unit <= 0xFFFF; ++unit) { |
||||
|
TypeParam first = static_cast<TypeParam>(unit >> 8); // order of these two doesn't matter
|
||||
|
TypeParam second = static_cast<TypeParam>(unit & 0xFF); |
||||
|
if (jessilib::as_base(first, 16) >= 0 |
||||
|
&& jessilib::as_base(second, 16) >= 0) { |
||||
|
continue; |
||||
|
} |
||||
|
TypeParam encoded[4] { '%', static_cast<TypeParam>(first), static_cast<TypeParam>(second), '%' }; |
||||
|
TypeParam* encoded_end = encoded + sizeof(encoded); |
||||
|
query_string.insert(query_string.end(), encoded, encoded_end); |
||||
|
|
||||
|
long_query_string += query_string; |
||||
|
jessilib::deserialize_http_query(query_string); |
||||
|
EXPECT_TRUE(query_string.empty()) |
||||
|
<< "in unit: " << unit << std::endl; |
||||
|
} |
||||
|
|
||||
|
jessilib::deserialize_http_query(long_query_string); |
||||
|
EXPECT_TRUE(long_query_string.empty()); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, empty) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.empty()); |
||||
|
EXPECT_TRUE(parsed_result.empty()); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, one_key) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "key"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_EQ(query_text, "key"); |
||||
|
EXPECT_EQ(parsed_result.size(), 1); |
||||
|
EXPECT_EQ(parsed_result[0].first, query_text); |
||||
|
EXPECT_TRUE(parsed_result[0].second.empty()); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, one_key_and_value) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "key=value"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.starts_with("keyvalue")); |
||||
|
EXPECT_EQ(parsed_result.size(), 1); |
||||
|
EXPECT_EQ(parsed_result[0].first, "key"); |
||||
|
EXPECT_EQ(parsed_result[0].second, "value"); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, one_key_and_value_trailing) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "key=value&"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.starts_with("keyvalue")); |
||||
|
EXPECT_EQ(parsed_result.size(), 2); |
||||
|
EXPECT_EQ(parsed_result[0].first, "key"); |
||||
|
EXPECT_EQ(parsed_result[0].second, "value"); |
||||
|
EXPECT_TRUE(parsed_result[1].first.empty()); |
||||
|
EXPECT_TRUE(parsed_result[1].second.empty()); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, two_key_one_value) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "key=value&second_key"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_key")); |
||||
|
EXPECT_EQ(parsed_result.size(), 2); |
||||
|
EXPECT_EQ(parsed_result[0].first, "key"); |
||||
|
EXPECT_EQ(parsed_result[0].second, "value"); |
||||
|
EXPECT_EQ(parsed_result[1].first, "second_key"); |
||||
|
EXPECT_TRUE(parsed_result[1].second.empty()); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, two_key_two_value) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "key=value&second_key=second=value"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.starts_with("keyvaluesecond_keysecond=value")); |
||||
|
EXPECT_EQ(parsed_result.size(), 2); |
||||
|
EXPECT_EQ(parsed_result[0].first, "key"); |
||||
|
EXPECT_EQ(parsed_result[0].second, "value"); |
||||
|
EXPECT_EQ(parsed_result[1].first, "second_key"); |
||||
|
EXPECT_EQ(parsed_result[1].second, "second=value"); |
||||
|
} |
||||
|
|
||||
|
TEST(HtmlFormParser, some_sequences) { |
||||
|
std::vector<std::pair<std::string_view, std::string_view>> parsed_result; |
||||
|
std::string query_text = "k+y=va+u%20&%73econd%5Fke%79=second_valu%65"; |
||||
|
EXPECT_TRUE(jessilib::deserialize_html_form(parsed_result, query_text)); |
||||
|
EXPECT_TRUE(query_text.starts_with("k yva u second_keysecond_value")); |
||||
|
EXPECT_EQ(parsed_result.size(), 2); |
||||
|
EXPECT_EQ(parsed_result[0].first, "k y"); |
||||
|
EXPECT_EQ(parsed_result[0].second, "va u "); |
||||
|
EXPECT_EQ(parsed_result[1].first, "second_key"); |
||||
|
EXPECT_EQ(parsed_result[1].second, "second_value"); |
||||
|
} |
Loading…
Reference in new issue