From 1ad9b02fffcc6264585d5f73e0ce8b32b390f7bf Mon Sep 17 00:00:00 2001 From: Jessica James Date: Mon, 6 Dec 2021 22:22:50 -0600 Subject: [PATCH] Add wchar_t and envp support, as well as utf8 conversion, to app_parameters --- src/common/app_parameters.cpp | 144 ++++++++++++++++++++---- src/include/jessilib/app_parameters.hpp | 29 +++-- src/include/jessilib/unicode_base.hpp | 2 +- src/test/app_parameters.cpp | 54 ++++++++- 4 files changed, 186 insertions(+), 43 deletions(-) diff --git a/src/common/app_parameters.cpp b/src/common/app_parameters.cpp index e00532f..63b3851 100644 --- a/src/common/app_parameters.cpp +++ b/src/common/app_parameters.cpp @@ -17,24 +17,89 @@ */ #include "app_parameters.hpp" +#include +#include "jessilib/unicode.hpp" +#include "jessilib/split.hpp" namespace jessilib { -app_parameters::app_parameters(int in_argc, char** in_argv) - : app_parameters{ in_argc, const_cast(in_argv) } { +// Convert null-terminated multi-byte string to UTF-8 +std::u8string ntmbs_to_u8string(const char* in_ntmbs) { + std::u8string result; + + std::string_view ntmbs_view = in_ntmbs; + std::mbstate_t mbstate{}; + char32_t codepoint{}; + while (!ntmbs_view.empty()) { + size_t bytes_read = std::mbrtoc32(&codepoint, ntmbs_view.data(), ntmbs_view.size(), &mbstate); + if (bytes_read > ntmbs_view.size()) { + // Some sort of error; just break + break; + } + // bytes_read will never be 0 except for null characters, which are excluded from our view; here for future reuse + bytes_read = std::max(size_t{1}, bytes_read); + ntmbs_view.remove_prefix(bytes_read); + encode_codepoint(result, codepoint); + } + + return result; +} + +std::vector vectorize(const char** in_argv) { + std::vector result; + if (in_argv == nullptr) { + return result; + } + + for (auto argv = in_argv; *argv != nullptr; ++argv) { + result.emplace_back(ntmbs_to_u8string(*argv)); + } + return result; +} + +std::vector vectorize(const wchar_t** in_argv) { + std::vector result; + if (in_argv == nullptr) { + return result; + } + + for (auto argv = in_argv; *argv != nullptr; ++argv) { + result.emplace_back(jessilib::string_cast(std::wstring_view{ *argv })); + } + return result; +} + +app_parameters::app_parameters(int in_argc, char** in_argv, char** in_envp) + : app_parameters{ in_argc, const_cast(in_argv), const_cast(in_envp) } { // Empty ctor body } -app_parameters::app_parameters(int in_argc, const char** in_argv) { - // TODO: discard argc/argv and use GetCommandLineW on Windows - // TODO: not assume argv is utf-8; it often will not be +app_parameters::app_parameters(int, const char** in_argv, const char** in_envp) + : app_parameters{ vectorize(in_argv), vectorize(in_envp) } { +} + +app_parameters::app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp) + : app_parameters{ in_argc, const_cast(in_argv), const_cast(in_envp) } { +} + +app_parameters::app_parameters(int, const wchar_t** in_argv, const wchar_t** in_envp) + : app_parameters{ vectorize(in_argv), vectorize(in_envp) } { +} + +app_parameters::app_parameters(std::vector in_args, std::vector in_env) { + // Parse in environment variables first to ensure they're parsed before the early-out + for (const auto& env : in_env) { + auto split = jessilib::split_once(env, u8'='); + m_env_values[split.first] = split.second; + } + // Sanity safety check; should never happen - if (in_argc <= 0 || in_argv == nullptr) { + if (in_args.empty()) { return; } // Populate path - m_path = reinterpret_cast(in_argv[0]); + m_path = in_args[0]; // Process args std::u8string_view key; @@ -46,23 +111,24 @@ app_parameters::app_parameters(int in_argc, const char** in_argv) { m_switches.emplace_back(key); } else { - m_values.emplace(key, std::move(value)); + m_arg_values.emplace(key, std::move(value)); value.clear(); } } }; - for (int index = 1; index < in_argc; ++index) { - const char8_t* arg = reinterpret_cast(in_argv[index]); - if (arg != nullptr && *arg != '\0') { + for (size_t index = 1; index < in_args.size(); ++index) { + const std::u8string& arg = in_args[index]; + if (!arg.empty()) { // Check if this is a key or value - if (*arg == '-') { + if (arg.front() == '-') { // Flush pending value (if any) flush_value(); // Strip any leading '-' or '--' and set key - key = arg + 1; - if (key[0] == '-') { + key = arg; + key.remove_prefix(1); + if (key.front() == '-') { key.remove_prefix(1); } @@ -93,27 +159,27 @@ app_parameters::app_parameters(int in_argc, const char** in_argv) { flush_value(); // Populate m_switches_set from m_switches - m_switches_set = std::unordered_set{ m_switches.begin(), m_switches.end() }; + m_switches_set = std::unordered_set{ m_switches.begin(), m_switches.end() }; } std::u8string_view app_parameters::path() const { return m_path; } -const std::vector& app_parameters::arguments() const { +const std::vector& app_parameters::arguments() const { return m_args; } -const std::vector& app_parameters::switches() const { +const std::vector& app_parameters::switches() const { return m_switches; } -const std::unordered_set& app_parameters::switches_set() const { +const std::unordered_set& app_parameters::switches_set() const { return m_switches_set; } -const std::unordered_map& app_parameters::values() const { - return m_values; +const std::unordered_map& app_parameters::values() const { + return m_arg_values; } object app_parameters::as_object() const { @@ -130,23 +196,51 @@ object app_parameters::as_object() const { { u8"Path"s, m_path }, { u8"Args"s, m_args }, { u8"Switches"s, m_switches }, - { u8"Values"s, m_values } + { u8"Values"s, m_arg_values } }; } +#ifdef __cpp_lib_generic_unordered_lookup +#define WRAP_MAP_KEY(in_key) in_key +#else // We can't use std::string_view for InKeyType until GCC 11 & clang 12, and I still want to support GCC 9 +#define WRAP_MAP_KEY(in_key) static_cast(in_key) +#endif // __cpp_lib_generic_unordered_lookup + bool app_parameters::has_switch(std::u8string_view in_switch) const { - return m_switches_set.find(in_switch) != m_switches_set.end(); + return m_switches_set.find(WRAP_MAP_KEY(in_switch)) != m_switches_set.end(); } -std::u8string_view app_parameters::get_value(std::u8string_view in_key, std::u8string_view in_default) const { - auto result = m_values.find(in_key); +std::u8string_view app_parameters::get_arg_value(std::u8string_view in_key, std::u8string_view in_default) const { + auto result = m_arg_values.find(WRAP_MAP_KEY(in_key)); // Safety check - if (result == m_values.end()) { + if (result == m_arg_values.end()) { return in_default; } return result->second; } +std::u8string_view app_parameters::get_env_value(std::u8string_view in_key, std::u8string_view in_default) const { + auto result = m_env_values.find(WRAP_MAP_KEY(in_key)); + + // Safety check + if (result == m_env_values.end()) { + return in_default; + } + + return result->second; +} + +std::u8string_view app_parameters::get_value(std::u8string_view in_key, std::u8string_view in_default) const { + // Explicit args take priority + auto result = m_arg_values.find(WRAP_MAP_KEY(in_key)); + if (result != m_arg_values.end()) { + return result->second; + } + + // Fallback to env + return get_env_value(in_key, in_default); +} + } // namespace jessilib diff --git a/src/include/jessilib/app_parameters.hpp b/src/include/jessilib/app_parameters.hpp index d1a98ca..de2fd95 100644 --- a/src/include/jessilib/app_parameters.hpp +++ b/src/include/jessilib/app_parameters.hpp @@ -22,27 +22,34 @@ namespace jessilib { class app_parameters { public: - app_parameters(int in_argc, char** in_argv); - app_parameters(int in_argc, const char** in_argv); + app_parameters(int in_argc, char** in_argv, char** in_envp = nullptr); + app_parameters(int in_argc, const char** in_argv, const char** in_envp = nullptr); + app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp = nullptr); + app_parameters(int in_argc, const wchar_t** in_argv, const wchar_t** in_envp = nullptr); + app_parameters(std::vector in_args, std::vector in_env = {}); std::u8string_view path() const; - const std::vector& arguments() const; - const std::vector& switches() const; - const std::unordered_set& switches_set() const; - const std::unordered_map& values() const; + const std::vector& arguments() const; + + const std::vector& switches() const; + const std::unordered_set& switches_set() const; + const std::unordered_map& values() const; jessilib::object as_object() const; bool has_switch(std::u8string_view in_switch) const; + std::u8string_view get_arg_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; + std::u8string_view get_env_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; std::u8string_view get_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; operator jessilib::object() const { return as_object(); } private: - std::u8string_view m_path; - std::vector m_args; - std::vector m_switches; - std::unordered_set m_switches_set; - std::unordered_map m_values; + std::u8string m_path; + std::vector m_args; + std::vector m_switches; + std::unordered_set m_switches_set; + std::unordered_map m_arg_values; + std::unordered_map m_env_values; }; } // namespace jessilib diff --git a/src/include/jessilib/unicode_base.hpp b/src/include/jessilib/unicode_base.hpp index 83cda30..d3c08ab 100644 --- a/src/include/jessilib/unicode_base.hpp +++ b/src/include/jessilib/unicode_base.hpp @@ -454,7 +454,7 @@ constexpr decode_result decode_codepoint_utf32(std::basic_string_view in_ return { 0, 0 }; } - return { in_string.front(), 1 }; + return { static_cast(in_string.front()), 1 }; } template diff --git a/src/test/app_parameters.cpp b/src/test/app_parameters.cpp index d877ee7..557ea8f 100644 --- a/src/test/app_parameters.cpp +++ b/src/test/app_parameters.cpp @@ -22,24 +22,26 @@ using namespace jessilib; using namespace std::literals; +template class ArgWrapper { public: template ArgWrapper(Args... in_args) - : ArgWrapper{ std::vector{ in_args... } } { + : ArgWrapper{ std::vector>{ in_args... } } { // Empty ctor body } - ArgWrapper(std::vector in_args) + ArgWrapper(std::vector> in_args) : m_args{ in_args }, - m_argv{ new const char*[in_args.size()] } { + m_argv{ new const CharT*[in_args.size() + 1] } { // Populate m_argv for (size_t index = 0; index != m_args.size(); ++index) { m_argv[index] = m_args[index].c_str(); } + m_argv[in_args.size()] = nullptr; // last arg is always nullptr } - const char** argv() const { + const CharT** argv() const { return m_argv.get(); } @@ -48,8 +50,8 @@ public: } private: - std::vector m_args; - std::unique_ptr m_argv; + std::vector> m_args; + std::unique_ptr m_argv; }; TEST(AppParametersTest, null) { @@ -86,6 +88,21 @@ TEST(AppParametersTest, path_only) { EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe"); } +TEST(AppParametersTest, path_only_w) { + ArgWrapper args{ L"/path/to/exe" }; + app_parameters parameters{ args.argc(), args.argv() }; + + EXPECT_EQ(parameters.path(), u8"/path/to/exe"); + EXPECT_TRUE(parameters.arguments().empty()); + EXPECT_TRUE(parameters.switches().empty()); + EXPECT_TRUE(parameters.switches_set().empty()); + EXPECT_TRUE(parameters.values().empty()); + + auto obj = parameters.as_object(); + EXPECT_FALSE(obj.null()); + EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe"); +} + TEST(AppParametersTest, single_switch) { ArgWrapper args{ "/path/to/exe", "-switch" }; app_parameters parameters{ args.argc(), args.argv() }; @@ -279,3 +296,28 @@ TEST(AppParametersTest, switch_and_value) { EXPECT_FALSE(parameters.has_switch(u8"switch2")); EXPECT_EQ(parameters.get_value(u8"key"), u8"value"); } + +TEST(AppParametersTest, switch_and_value_w) { + ArgWrapper args{ L"/path/to/exe", L"--switch", L"-key", L"value" }; + app_parameters parameters{ args.argc(), args.argv() }; + + EXPECT_FALSE(parameters.path().empty()); + EXPECT_EQ(parameters.arguments().size(), 3U); + EXPECT_EQ(parameters.switches().size(), 1U); + EXPECT_EQ(parameters.switches_set().size(), 1U); + EXPECT_EQ(parameters.values().size(), 1U); + + auto obj = parameters.as_object(); + std::vector expected_args{ u8"--switch", u8"-key", u8"value" }; + std::vector expected_switches{ u8"switch" }; + std::map expected_values{ { u8"key", u8"value" } }; + EXPECT_FALSE(obj.null()); + EXPECT_EQ(obj[u8"Path"], u8"/path/to/exe"); + EXPECT_EQ(obj[u8"Args"], expected_args); + EXPECT_EQ(obj[u8"Switches"], expected_switches); + EXPECT_EQ(obj[u8"Values"], expected_values); + + EXPECT_TRUE(parameters.has_switch(u8"switch")); + EXPECT_FALSE(parameters.has_switch(u8"switch2")); + EXPECT_EQ(parameters.get_value(u8"key"), u8"value"); +}