Compare commits

...

6 Commits

  1. 121
      src/common/app_parameters.cpp
  2. 151
      src/common/parsers/json.cpp
  3. 96
      src/include/jessilib/app_parameters.hpp
  4. 60
      src/include/jessilib/unicode.hpp
  5. 2
      src/include/jessilib/unicode_base.hpp
  6. 54
      src/test/app_parameters.cpp
  7. 2
      src/test/http_query.cpp
  8. 2
      src/test/unicode_sequence.cpp

121
src/common/app_parameters.cpp

@ -1,5 +1,5 @@
/**
* Copyright (C) 2019 Jessica James.
* Copyright (C) 2019-2021 Jessica James.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@ -17,28 +17,60 @@
*/
#include "app_parameters.hpp"
#include "jessilib/unicode.hpp"
#include "jessilib/split.hpp"
namespace jessilib {
app_parameters::app_parameters(int in_argc, char** in_argv)
: app_parameters{ in_argc, const_cast<const char**>(in_argv) } {
app_parameters::app_parameters(int, char** in_argv, char** in_envp)
: app_parameters{ vectorize_ntargs(in_argv), vectorize_ntargs(in_envp) } {
// Empty ctor body
}
app_parameters::app_parameters(int in_argc, const char** in_argv) {
// TODO: discard argc/argv and use GetCommandLineW on Windows
// TODO: not assume argv is utf-8; it often will not be
app_parameters::app_parameters(int, const char** in_argv, const char** in_envp)
: app_parameters{ vectorize_ntargs(in_argv), vectorize_ntargs(in_envp) } {
// Empty ctor body
}
app_parameters::app_parameters(int, wchar_t** in_argv, wchar_t** in_envp)
: app_parameters{ vectorize_ntargs(in_argv), vectorize_ntargs(in_envp) } {
// Empty ctor body
}
app_parameters::app_parameters(int, const wchar_t** in_argv, const wchar_t** in_envp)
: app_parameters{ vectorize_ntargs(in_argv), vectorize_ntargs(in_envp) } {
// Empty ctor body
}
#ifdef __cpp_lib_generic_unordered_lookup
#define WRAP_MAP_KEY(in_key) in_key
#else // We can't use std::string_view for InKeyType until GCC 11 & clang 12, and I still want to support GCC 9
#define WRAP_MAP_KEY(in_key) static_cast<string_type>(in_key)
#endif // __cpp_lib_generic_unordered_lookup
bool app_parameters::has_switch(string_view_type in_switch) const {
return m_switches_set.find(WRAP_MAP_KEY(in_switch)) != m_switches_set.end();
}
app_parameters::app_parameters(std::vector<string_type> in_args, std::vector<string_type> in_env) {
// Parse in environment variables first to ensure they're parsed before the early-out
for (const auto& env : in_env) {
auto split = jessilib::split_once(env, u8'=');
m_values[split.first] = split.second;
m_env_values[split.first] = std::move(split.second);
}
// Sanity safety check; should never happen
if (in_argc <= 0 || in_argv == nullptr) {
if (in_args.empty()) {
return;
}
// Populate path
m_path = reinterpret_cast<const char8_t*>(in_argv[0]);
m_path = in_args[0];
// Process args
std::u8string_view key;
std::u8string value;
string_view_type key;
string_type value;
auto flush_value = [&key, &value, this]() {
// This is the start of a key; flush what we were previously processing
if (!key.empty()) {
@ -46,29 +78,32 @@ app_parameters::app_parameters(int in_argc, const char** in_argv) {
m_switches.emplace_back(key);
}
else {
m_values.emplace(key, std::move(value));
string_type key_str{ key };
m_values[key_str] = value;
m_arg_values[key_str] = std::move(value);
value.clear();
}
}
};
for (int index = 1; index < in_argc; ++index) {
const char8_t* arg = reinterpret_cast<const char8_t*>(in_argv[index]);
if (arg != nullptr && *arg != '\0') {
for (size_t index = 1; index < in_args.size(); ++index) {
const string_type& arg = in_args[index];
if (!arg.empty()) {
// Check if this is a key or value
if (*arg == '-') {
if (arg.front() == '-') {
// Flush pending value (if any)
flush_value();
// Strip any leading '-' or '--' and set key
key = arg + 1;
if (key[0] == '-') {
key = arg;
key.remove_prefix(1);
if (key.front() == '-') {
key.remove_prefix(1);
}
// Parse key for any value denominator ('=')
size_t key_end = key.find('=');
if (key_end != std::u8string_view::npos) {
if (key_end != string_view_type::npos) {
// arg contains start of a value
value = key.substr(key_end + 1);
key = key.substr(0, key_end);
@ -93,26 +128,34 @@ app_parameters::app_parameters(int in_argc, const char** in_argv) {
flush_value();
// Populate m_switches_set from m_switches
m_switches_set = std::unordered_set<std::u8string_view>{ m_switches.begin(), m_switches.end() };
m_switches_set = { m_switches.begin(), m_switches.end() };
}
std::u8string_view app_parameters::path() const {
app_parameters::string_view_type app_parameters::path() const {
return m_path;
}
const std::vector<std::u8string_view>& app_parameters::arguments() const {
const std::vector<app_parameters::string_type>& app_parameters::arguments() const {
return m_args;
}
const std::vector<std::u8string_view>& app_parameters::switches() const {
const std::vector<app_parameters::string_type>& app_parameters::switches() const {
return m_switches;
}
const std::unordered_set<std::u8string_view>& app_parameters::switches_set() const {
const app_parameters::set_type& app_parameters::switches_set() const {
return m_switches_set;
}
const std::unordered_map<std::u8string_view, std::u8string>& app_parameters::values() const {
const app_parameters::map_type& app_parameters::arg_values() const {
return m_arg_values;
}
const app_parameters::map_type& app_parameters::env_values() const {
return m_env_values;
}
const app_parameters::map_type& app_parameters::values() const {
return m_values;
}
@ -126,20 +169,40 @@ object app_parameters::as_object() const {
return object{};
}
return std::map<std::u8string, object>{
return std::map<string_type, object>{
{ u8"Path"s, m_path },
{ u8"Args"s, m_args },
{ u8"Switches"s, m_switches },
{ u8"ArgValues"s, m_arg_values },
{ u8"EnvValues"s, m_env_values },
{ u8"Values"s, m_values }
};
}
bool app_parameters::has_switch(std::u8string_view in_switch) const {
return m_switches_set.find(in_switch) != m_switches_set.end();
app_parameters::string_view_type app_parameters::get_arg_value(string_view_type in_key, string_view_type in_default) const {
auto result = m_arg_values.find(WRAP_MAP_KEY(in_key));
// Safety check
if (result == m_arg_values.end()) {
return in_default;
}
return result->second;
}
app_parameters::string_view_type app_parameters::get_env_value(string_view_type in_key, string_view_type in_default) const {
auto result = m_env_values.find(WRAP_MAP_KEY(in_key));
// Safety check
if (result == m_env_values.end()) {
return in_default;
}
return result->second;
}
std::u8string_view app_parameters::get_value(std::u8string_view in_key, std::u8string_view in_default) const {
auto result = m_values.find(in_key);
app_parameters::string_view_type app_parameters::get_value(string_view_type in_key, string_view_type in_default) const {
auto result = m_values.find(WRAP_MAP_KEY(in_key));
// Safety check
if (result == m_values.end()) {

151
src/common/parsers/json.cpp

@ -19,6 +19,7 @@
#include "parsers/json.hpp"
#include <charconv>
#include "unicode.hpp"
#include "unicode_sequence.hpp"
#include "util.hpp"
using namespace std::literals;
@ -101,135 +102,41 @@ std::u8string read_json_string(std::string_view& in_data) {
// Remove leading quotation
in_data.remove_prefix(1);
// Iterate over view until we reach the ending quotation, or the end of the view
while (!in_data.empty()) {
switch (in_data.front()) {
// Escape sequence
case '\\':
// strip '\'
in_data.remove_prefix(1);
if (in_data.empty()) {
throw std::invalid_argument{ "Invalid JSON data; unexpected end of data when parsing escape sequence" };
}
// Parse escape type
switch (in_data.front()) {
// Quote
case '\"':
in_data.remove_prefix(1);
result += u8'\"';
break;
// Backslash
case '\\':
in_data.remove_prefix(1);
result += u8'\\';
break;
// Forward slash
case '/':
in_data.remove_prefix(1);
result += u8'/';
break;
// Backspace
case 'b':
in_data.remove_prefix(1);
result += u8'\b';
break;
// Formfeed
case 'f':
in_data.remove_prefix(1);
result += u8'\f';
break;
// Newline
case 'n':
in_data.remove_prefix(1);
result += u8'\n';
break;
// Carriage return
case 'r':
in_data.remove_prefix(1);
result += u8'\r';
break;
if (in_data.empty()) {
throw std::invalid_argument{ "Invalid JSON data; missing ending quote (\") when parsing string" };
}
// Horizontal tab
case 't':
in_data.remove_prefix(1);
result += u8'\t';
break;
// Unicode codepoint
case 'u': {
in_data.remove_prefix(1); // strip 'u'
if (in_data.size() < 4) {
throw std::invalid_argument{
"Invalid JSON data; unexpected end of data when parsing unicode escape sequence" };
}
char16_t codepoint = get_codepoint_from_hex(in_data);
in_data.remove_prefix(4); // strip codepoint hex
if (is_high_surrogate(codepoint) // If we have a high surrogate...
&& in_data.size() >= 6) { // And we have enough room for "\uXXXX"...
// Special case: we just parsed a high surrogate. Handle this with the low surrogate, if there is one
if (in_data.substr(0, 2) == "\\u"sv) {
// Another codepoint follows; read it in
in_data.remove_prefix(2); // strip "\u"
char16_t second_codepoint = get_codepoint_from_hex(in_data);
in_data.remove_prefix(4); // strip codepoint hex
if (is_low_surrogate(second_codepoint)) {
// We've got a valid surrogate pair; serialize the represented codepoint; decode it
codepoint = static_cast<char16_t>(decode_surrogate_pair(codepoint, second_codepoint).codepoint);
encode_codepoint(result, codepoint); // serialize the real codepoint
}
else {
// This is not a valid surrogate pair; serialize the codepoints directly
encode_codepoint(result, codepoint);
encode_codepoint(result, second_codepoint);
}
continue;
}
}
encode_codepoint(result, codepoint);
continue;
}
if (in_data.front() == '\"') {
in_data.remove_prefix(1);
advance_whitespace(in_data); // strip trailing spaces
return result;
}
default:
throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing escape sequence" };
}
size_t search_start = 1;
size_t end_pos;
while ((end_pos = in_data.find('\"', search_start)) != std::string_view::npos) {
// Quote found; check if it's escaped
if (in_data[end_pos - 1] != '\\') {
// Unescaped quote; must be end of string
break;
}
break;
search_start = end_pos + 1;
}
// End of string
case '\"':
in_data.remove_prefix(1); // strip trailing quotation
advance_whitespace(in_data); // strip trailing spaces
return result;
// Unicode sequence
default: {
auto codepoint = decode_codepoint(in_data);
if (codepoint.units == 0) {
// Invalid unicode sequence
throw std::invalid_argument{ "Invalid JSON data; unexpected token: '"s + in_data.front() + "' when parsing string" };
}
if (end_pos == std::string_view::npos) {
throw std::invalid_argument{ "Invalid JSON data; missing ending quote (\") when parsing string" };
}
// Valid unicode sequence
result.append(reinterpret_cast<const char8_t*>(in_data.data()), codepoint.units);
in_data.remove_prefix(codepoint.units);
break;
}
}
std::u8string_view string_data = jessilib::string_view_cast<char8_t>(in_data.substr(0, end_pos));
in_data.remove_prefix(string_data.size() + 1);
advance_whitespace(in_data); // strip trailing spaces
result = string_data;
if (!jessilib::apply_cpp_escape_sequences(result)) {
throw std::invalid_argument{ jessilib::join<std::string>("Invalid JSON data; invalid token or end of string: "sv, string_data) };
}
// We reached the end of the string_view before encountering an ending quote
throw std::invalid_argument{ "Invalid JSON data; missing ending quote (\") when parsing string" };
return result;
}
object read_json_number(std::string_view& in_data) {

96
src/include/jessilib/app_parameters.hpp

@ -1,5 +1,5 @@
/**
* Copyright (C) 2019 Jessica James.
* Copyright (C) 2019-2021 Jessica James.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@ -17,32 +17,94 @@
*/
#include "object.hpp"
#include "unicode.hpp"
namespace jessilib {
class app_parameters {
public:
app_parameters(int in_argc, char** in_argv);
app_parameters(int in_argc, const char** in_argv);
using string_type = std::u8string;
using string_view_type = std::u8string_view;
using set_type = std::unordered_set<string_type, jessilib::text_hashi, jessilib::text_equali>;
using map_type = std::unordered_map<string_type, string_type, jessilib::text_hashi, jessilib::text_equali>;
std::u8string_view path() const;
const std::vector<std::u8string_view>& arguments() const;
const std::vector<std::u8string_view>& switches() const;
const std::unordered_set<std::u8string_view>& switches_set() const;
const std::unordered_map<std::u8string_view, std::u8string>& values() const;
jessilib::object as_object() const;
app_parameters(int in_argc, char** in_argv, char** in_envp = nullptr);
app_parameters(int in_argc, const char** in_argv, const char** in_envp = nullptr);
app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp = nullptr);
app_parameters(int in_argc, const wchar_t** in_argv, const wchar_t** in_envp = nullptr);
app_parameters(std::vector<string_type> in_args, std::vector<string_type> in_env = {});
bool has_switch(std::u8string_view in_switch) const;
std::u8string_view get_value(std::u8string_view in_key, std::u8string_view in_default = {}) const;
[[nodiscard]] string_view_type path() const;
[[nodiscard]] const std::vector<string_type>& arguments() const;
operator jessilib::object() const { return as_object(); }
[[nodiscard]] const std::vector<string_type>& switches() const;
[[nodiscard]] const set_type& switches_set() const;
[[nodiscard]] const map_type& arg_values() const;
[[nodiscard]] const map_type& env_values() const;
[[nodiscard]] const map_type& values() const;
[[nodiscard]] jessilib::object as_object() const;
[[nodiscard]] bool has_switch(string_view_type in_switch) const;
[[nodiscard]] string_view_type get_arg_value(string_view_type in_key, string_view_type in_default = {}) const;
[[nodiscard]] string_view_type get_env_value(string_view_type in_key, string_view_type in_default = {}) const;
[[nodiscard]] string_view_type get_value(string_view_type in_key, string_view_type in_default = {}) const;
[[nodiscard]] inline operator jessilib::object() const { return as_object(); }
private:
std::u8string_view m_path;
std::vector<std::u8string_view> m_args;
std::vector<std::u8string_view> m_switches;
std::unordered_set<std::u8string_view> m_switches_set;
std::unordered_map<std::u8string_view, std::u8string> m_values;
string_type m_path;
std::vector<string_type> m_args;
std::vector<string_type> m_switches;
set_type m_switches_set;
map_type m_arg_values;
map_type m_env_values;
map_type m_values;
};
/**
* Converts null-terminated argument array of null-terminated strings to a vector of unicode strings
*
* @tparam OutCharT Unicode character data type
* @tparam InCharT Input character type (char for multi-byte string, or wchar_t for wide character strings)
* @param in_ntarg_array Null-terminated argument array to vectorize
* @return A vector of unicode strings recoded from the input
*/
template<typename OutCharT = char8_t, typename InCharT,
std::enable_if_t<std::is_same_v<std::remove_cvref_t<InCharT>, char>>* = nullptr>
std::vector<std::basic_string<OutCharT>> vectorize_ntargs(InCharT** in_ntarg_array) {
std::vector<std::basic_string<OutCharT>> result;
if (in_ntarg_array == nullptr) {
return result;
}
for (auto argv = in_ntarg_array; *argv != nullptr; ++argv) {
result.emplace_back(mbstring_to_ustring<OutCharT>(*argv).second);
}
return result;
}
/**
* Converts null-terminated argument array of null-terminated strings to a vector of unicode strings
*
* @tparam OutCharT Unicode character data type
* @tparam InCharT Input character type (char for multi-byte string, or wchar_t for wide character strings)
* @param in_ntarg_array Null-terminated argument array to vectorize
* @return A vector of unicode strings recoded from the input
*/
template<typename OutCharT = char8_t, typename InCharT,
std::enable_if_t<std::is_same_v<std::remove_cvref_t<InCharT>, wchar_t>>* = nullptr>
std::vector<std::basic_string<OutCharT>> vectorize_ntargs(InCharT** in_ntarg_array) {
std::vector<std::basic_string<OutCharT>> result;
if (in_ntarg_array == nullptr) {
return result;
}
for (auto argv = in_ntarg_array; *argv != nullptr; ++argv) {
result.emplace_back(jessilib::string_cast<OutCharT>(std::wstring_view{ *argv }));
}
return result;
}
} // namespace jessilib

60
src/include/jessilib/unicode.hpp

@ -18,6 +18,7 @@
#pragma once
#include <cuchar>
#include "unicode_compare.hpp"
namespace jessilib {
@ -176,6 +177,65 @@ std::basic_string<OutCharT> string_cast(const InT& in_string) {
}
}
/**
* Recodes a multi-byte string into a unicode-encoded string
*
* @tparam CharT Character type for resulting unicode string
* @param in_mbs Multibyte string to recode
* @return A pair containing a boolean which is true on success, and a unicode string
*/
template<typename CharT>
std::pair<bool, std::basic_string<CharT>> mbstring_to_ustring(std::string_view in_mbstring) {
std::pair<bool, std::basic_string<CharT>> result;
std::mbstate_t mbstate{};
while (!in_mbstring.empty()) {
char32_t codepoint{};
size_t bytes_read = std::mbrtoc32(&codepoint, in_mbstring.data(), in_mbstring.size(), &mbstate);
if (bytes_read > in_mbstring.size()) {
// Some sort of error; return
result.first = false;
return result;
}
// bytes_read is 0 for null characters; ensure null characters are also removed from the view
bytes_read = std::max(size_t{1}, bytes_read);
in_mbstring.remove_prefix(bytes_read);
encode_codepoint(result.second, codepoint);
}
result.first = true;
return result;
}
/**
* Recodes a unicode string into a multi-byte string
* @tparam CharT Character type for input unicode string
* @param in_string Unicode string to recode
* @return A pair containing a boolean which is true on success, and a multi-byte string
*/
template<typename CharT>
std::pair<bool, std::string> ustring_to_mbstring(std::basic_string_view<CharT> in_string) {
std::pair<bool, std::string> result;
std::mbstate_t mbstate{};
decode_result decode;
while ((decode = decode_codepoint(in_string).units != 0)) {
char buffer[MB_CUR_MAX]; // MB_LEN_MAX
size_t bytes_written = std::c32rtomb(buffer, decode.codepoint, &mbstate);
if (bytes_written > MB_CUR_MAX) {
// Invalid codepoint; return
result.first = false;
return result;
}
result.second.append(buffer, bytes_written);
}
result.first = true;
return result;
}
/**
* Searches a string for a specified substring
*

2
src/include/jessilib/unicode_base.hpp

@ -454,7 +454,7 @@ constexpr decode_result decode_codepoint_utf32(std::basic_string_view<CharT> in_
return { 0, 0 };
}
return { in_string.front(), 1 };
return { static_cast<char32_t>(in_string.front()), 1 };
}
template<typename CharT>

54
src/test/app_parameters.cpp

@ -22,24 +22,26 @@
using namespace jessilib;
using namespace std::literals;
template<typename CharT = char>
class ArgWrapper {
public:
template<typename... Args>
ArgWrapper(Args... in_args)
: ArgWrapper{ std::vector<std::string>{ in_args... } } {
: ArgWrapper{ std::vector<std::basic_string<CharT>>{ in_args... } } {
// Empty ctor body
}
ArgWrapper(std::vector<std::string> in_args)
ArgWrapper(std::vector<std::basic_string<CharT>> in_args)
: m_args{ in_args },
m_argv{ new const char*[in_args.size()] } {
m_argv{ new const CharT*[in_args.size() + 1] } {
// Populate m_argv
for (size_t index = 0; index != m_args.size(); ++index) {
m_argv[index] = m_args[index].c_str();
}
m_argv[in_args.size()] = nullptr; // last arg is always nullptr
}
const char** argv() const {
const CharT** argv() const {
return m_argv.get();
}
@ -48,8 +50,8 @@ public:
}
private:
std::vector<std::string> m_args;
std::unique_ptr<const char*[]> m_argv;
std::vector<std::basic_string<CharT>> m_args;
std::unique_ptr<const CharT*[]> m_argv;
};
TEST(AppParametersTest, null) {
@ -86,6 +88,21 @@ TEST(AppParametersTest, path_only) {
EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe");
}
TEST(AppParametersTest, path_only_w) {
ArgWrapper<wchar_t> args{ L"/path/to/exe" };
app_parameters parameters{ args.argc(), args.argv() };
EXPECT_EQ(parameters.path(), u8"/path/to/exe");
EXPECT_TRUE(parameters.arguments().empty());
EXPECT_TRUE(parameters.switches().empty());
EXPECT_TRUE(parameters.switches_set().empty());
EXPECT_TRUE(parameters.values().empty());
auto obj = parameters.as_object();
EXPECT_FALSE(obj.null());
EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe");
}
TEST(AppParametersTest, single_switch) {
ArgWrapper args{ "/path/to/exe", "-switch" };
app_parameters parameters{ args.argc(), args.argv() };
@ -279,3 +296,28 @@ TEST(AppParametersTest, switch_and_value) {
EXPECT_FALSE(parameters.has_switch(u8"switch2"));
EXPECT_EQ(parameters.get_value(u8"key"), u8"value");
}
TEST(AppParametersTest, switch_and_value_w) {
ArgWrapper<wchar_t> args{ L"/path/to/exe", L"--switch", L"-key", L"value" };
app_parameters parameters{ args.argc(), args.argv() };
EXPECT_FALSE(parameters.path().empty());
EXPECT_EQ(parameters.arguments().size(), 3U);
EXPECT_EQ(parameters.switches().size(), 1U);
EXPECT_EQ(parameters.switches_set().size(), 1U);
EXPECT_EQ(parameters.values().size(), 1U);
auto obj = parameters.as_object();
std::vector<std::u8string> expected_args{ u8"--switch", u8"-key", u8"value" };
std::vector<std::u8string> expected_switches{ u8"switch" };
std::map<std::u8string, object> expected_values{ { u8"key", u8"value" } };
EXPECT_FALSE(obj.null());
EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe");
EXPECT_EQ(obj[u8"Args"], expected_args);
EXPECT_EQ(obj[u8"Switches"], expected_switches);
EXPECT_EQ(obj[u8"Values"], expected_values);
EXPECT_TRUE(parameters.has_switch(u8"switch"));
EXPECT_FALSE(parameters.has_switch(u8"switch2"));
EXPECT_EQ(parameters.get_value(u8"key"), u8"value");
}

2
src/test/http_query.cpp

@ -25,7 +25,7 @@ using namespace std::literals;
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
#ifdef __cpp_lib_constexpr_string
constexpr std::string query_constexpr(std::string_view in_expression) {
std::string result{ in_expression };
std::string result{ static_cast<std::string>(in_expression) };
jessilib::deserialize_http_query(result);
return result;
}

2
src/test/unicode_sequence.cpp

@ -26,7 +26,7 @@ using namespace std::literals;
// Compile-time tests for constexpr on compilers which support C++20 constexpr std::string
#ifdef __cpp_lib_constexpr_string
constexpr std::string cpp_constexpr(std::string_view in_expression) {
std::string result{ in_expression };
std::string result{ static_cast<std::string>(in_expression) };
jessilib::apply_cpp_escape_sequences(result);
return result;
}

Loading…
Cancel
Save