Browse Source

Replace ntmbs_to_u8string with mbstring_to_ustring; add ustring_to_mbstring for completeness

master
Jessica James 3 years ago
parent
commit
f57fde422b
  1. 134
      src/common/app_parameters.cpp
  2. 39
      src/include/jessilib/app_parameters.hpp
  3. 60
      src/include/jessilib/unicode.hpp

134
src/common/app_parameters.cpp

@ -17,80 +17,77 @@
*/ */
#include "app_parameters.hpp" #include "app_parameters.hpp"
#include <cuchar>
#include "jessilib/unicode.hpp" #include "jessilib/unicode.hpp"
#include "jessilib/split.hpp" #include "jessilib/split.hpp"
namespace jessilib { namespace jessilib {
// Convert null-terminated multi-byte string to UTF-8 template<typename CharT,
std::u8string ntmbs_to_u8string(const char* in_ntmbs) { std::enable_if_t<std::is_same_v<std::remove_cvref_t<CharT>, char>>* = nullptr>
std::u8string result; std::vector<app_parameters::string_type> vectorize(CharT** in_ntarg_array) {
std::vector<app_parameters::string_type> result;
std::string_view ntmbs_view = in_ntmbs; if (in_ntarg_array == nullptr) {
std::mbstate_t mbstate{};
char32_t codepoint{};
while (!ntmbs_view.empty()) {
size_t bytes_read = std::mbrtoc32(&codepoint, ntmbs_view.data(), ntmbs_view.size(), &mbstate);
if (bytes_read > ntmbs_view.size()) {
// Some sort of error; just break
break;
}
// bytes_read will never be 0 except for null characters, which are excluded from our view; here for future reuse
bytes_read = std::max(size_t{1}, bytes_read);
ntmbs_view.remove_prefix(bytes_read);
encode_codepoint(result, codepoint);
}
return result; return result;
} }
std::vector<std::u8string> vectorize(const char** in_argv) { for (auto argv = in_ntarg_array; *argv != nullptr; ++argv) {
std::vector<std::u8string> result; result.emplace_back(mbstring_to_ustring<char8_t>(*argv).second);
if (in_argv == nullptr) {
return result;
} }
for (auto argv = in_argv; *argv != nullptr; ++argv) {
result.emplace_back(ntmbs_to_u8string(*argv));
}
return result; return result;
} }
std::vector<std::u8string> vectorize(const wchar_t** in_argv) { template<typename CharT,
std::vector<std::u8string> result; std::enable_if_t<std::is_same_v<std::remove_cvref_t<CharT>, wchar_t>>* = nullptr>
if (in_argv == nullptr) { std::vector<app_parameters::string_type> vectorize(CharT** in_ntarg_array) {
std::vector<app_parameters::string_type> result;
if (in_ntarg_array == nullptr) {
return result; return result;
} }
for (auto argv = in_argv; *argv != nullptr; ++argv) { for (auto argv = in_ntarg_array; *argv != nullptr; ++argv) {
result.emplace_back(jessilib::string_cast<char8_t>(std::wstring_view{ *argv })); result.emplace_back(jessilib::string_cast<char8_t>(std::wstring_view{ *argv }));
} }
return result; return result;
} }
app_parameters::app_parameters(int in_argc, char** in_argv, char** in_envp) app_parameters::app_parameters(int, char** in_argv, char** in_envp)
: app_parameters{ in_argc, const_cast<const char**>(in_argv), const_cast<const char**>(in_envp) } { : app_parameters{ vectorize(in_argv), vectorize(in_envp) } {
// Empty ctor body // Empty ctor body
} }
app_parameters::app_parameters(int, const char** in_argv, const char** in_envp) app_parameters::app_parameters(int, const char** in_argv, const char** in_envp)
: app_parameters{ vectorize(in_argv), vectorize(in_envp) } { : app_parameters{ vectorize(in_argv), vectorize(in_envp) } {
// Empty ctor body
} }
app_parameters::app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp) app_parameters::app_parameters(int, wchar_t** in_argv, wchar_t** in_envp)
: app_parameters{ in_argc, const_cast<const wchar_t**>(in_argv), const_cast<const wchar_t**>(in_envp) } { : app_parameters{ vectorize(in_argv), vectorize(in_envp) } {
// Empty ctor body
} }
app_parameters::app_parameters(int, const wchar_t** in_argv, const wchar_t** in_envp) app_parameters::app_parameters(int, const wchar_t** in_argv, const wchar_t** in_envp)
: app_parameters{ vectorize(in_argv), vectorize(in_envp) } { : app_parameters{ vectorize(in_argv), vectorize(in_envp) } {
// Empty ctor body
}
#ifdef __cpp_lib_generic_unordered_lookup
#define WRAP_MAP_KEY(in_key) in_key
#else // We can't use std::string_view for InKeyType until GCC 11 & clang 12, and I still want to support GCC 9
#define WRAP_MAP_KEY(in_key) static_cast<string_type>(in_key)
#endif // __cpp_lib_generic_unordered_lookup
bool app_parameters::has_switch(string_view_type in_switch) const {
return m_switches_set.find(WRAP_MAP_KEY(in_switch)) != m_switches_set.end();
} }
app_parameters::app_parameters(std::vector<std::u8string> in_args, std::vector<std::u8string> in_env) { app_parameters::app_parameters(std::vector<string_type> in_args, std::vector<string_type> in_env) {
// Parse in environment variables first to ensure they're parsed before the early-out // Parse in environment variables first to ensure they're parsed before the early-out
for (const auto& env : in_env) { for (const auto& env : in_env) {
auto split = jessilib::split_once(env, u8'='); auto split = jessilib::split_once(env, u8'=');
m_env_values[split.first] = split.second; m_values[split.first] = split.second;
m_env_values[split.first] = std::move(split.second);
} }
// Sanity safety check; should never happen // Sanity safety check; should never happen
@ -102,8 +99,8 @@ app_parameters::app_parameters(std::vector<std::u8string> in_args, std::vector<s
m_path = in_args[0]; m_path = in_args[0];
// Process args // Process args
std::u8string_view key; string_view_type key;
std::u8string value; string_type value;
auto flush_value = [&key, &value, this]() { auto flush_value = [&key, &value, this]() {
// This is the start of a key; flush what we were previously processing // This is the start of a key; flush what we were previously processing
if (!key.empty()) { if (!key.empty()) {
@ -111,14 +108,16 @@ app_parameters::app_parameters(std::vector<std::u8string> in_args, std::vector<s
m_switches.emplace_back(key); m_switches.emplace_back(key);
} }
else { else {
m_arg_values.emplace(key, std::move(value)); string_type key_str{ key };
m_values[key_str] = value;
m_arg_values[key_str] = std::move(value);
value.clear(); value.clear();
} }
} }
}; };
for (size_t index = 1; index < in_args.size(); ++index) { for (size_t index = 1; index < in_args.size(); ++index) {
const std::u8string& arg = in_args[index]; const string_type& arg = in_args[index];
if (!arg.empty()) { if (!arg.empty()) {
// Check if this is a key or value // Check if this is a key or value
if (arg.front() == '-') { if (arg.front() == '-') {
@ -134,7 +133,7 @@ app_parameters::app_parameters(std::vector<std::u8string> in_args, std::vector<s
// Parse key for any value denominator ('=') // Parse key for any value denominator ('=')
size_t key_end = key.find('='); size_t key_end = key.find('=');
if (key_end != std::u8string_view::npos) { if (key_end != string_view_type::npos) {
// arg contains start of a value // arg contains start of a value
value = key.substr(key_end + 1); value = key.substr(key_end + 1);
key = key.substr(0, key_end); key = key.substr(0, key_end);
@ -162,15 +161,15 @@ app_parameters::app_parameters(std::vector<std::u8string> in_args, std::vector<s
m_switches_set = { m_switches.begin(), m_switches.end() }; m_switches_set = { m_switches.begin(), m_switches.end() };
} }
std::u8string_view app_parameters::path() const { app_parameters::string_view_type app_parameters::path() const {
return m_path; return m_path;
} }
const std::vector<std::u8string>& app_parameters::arguments() const { const std::vector<app_parameters::string_type>& app_parameters::arguments() const {
return m_args; return m_args;
} }
const std::vector<std::u8string>& app_parameters::switches() const { const std::vector<app_parameters::string_type>& app_parameters::switches() const {
return m_switches; return m_switches;
} }
@ -178,10 +177,18 @@ const app_parameters::set_type& app_parameters::switches_set() const {
return m_switches_set; return m_switches_set;
} }
const app_parameters::map_type& app_parameters::values() const { const app_parameters::map_type& app_parameters::arg_values() const {
return m_arg_values; return m_arg_values;
} }
const app_parameters::map_type& app_parameters::env_values() const {
return m_env_values;
}
const app_parameters::map_type& app_parameters::values() const {
return m_values;
}
object app_parameters::as_object() const { object app_parameters::as_object() const {
using namespace std::literals; using namespace std::literals;
@ -192,26 +199,17 @@ object app_parameters::as_object() const {
return object{}; return object{};
} }
return std::map<std::u8string, object>{ return std::map<string_type, object>{
{ u8"Path"s, m_path }, { u8"Path"s, m_path },
{ u8"Args"s, m_args }, { u8"Args"s, m_args },
{ u8"Switches"s, m_switches }, { u8"Switches"s, m_switches },
{ u8"Values"s, m_arg_values }, { u8"ArgValues"s, m_arg_values },
{ u8"Env"s, m_env_values } { u8"EnvValues"s, m_env_values },
{ u8"Values"s, m_values }
}; };
} }
#ifdef __cpp_lib_generic_unordered_lookup app_parameters::string_view_type app_parameters::get_arg_value(string_view_type in_key, string_view_type in_default) const {
#define WRAP_MAP_KEY(in_key) in_key
#else // We can't use std::string_view for InKeyType until GCC 11 & clang 12, and I still want to support GCC 9
#define WRAP_MAP_KEY(in_key) static_cast<std::u8string>(in_key)
#endif // __cpp_lib_generic_unordered_lookup
bool app_parameters::has_switch(std::u8string_view in_switch) const {
return m_switches_set.find(WRAP_MAP_KEY(in_switch)) != m_switches_set.end();
}
std::u8string_view app_parameters::get_arg_value(std::u8string_view in_key, std::u8string_view in_default) const {
auto result = m_arg_values.find(WRAP_MAP_KEY(in_key)); auto result = m_arg_values.find(WRAP_MAP_KEY(in_key));
// Safety check // Safety check
@ -222,7 +220,7 @@ std::u8string_view app_parameters::get_arg_value(std::u8string_view in_key, std:
return result->second; return result->second;
} }
std::u8string_view app_parameters::get_env_value(std::u8string_view in_key, std::u8string_view in_default) const { app_parameters::string_view_type app_parameters::get_env_value(string_view_type in_key, string_view_type in_default) const {
auto result = m_env_values.find(WRAP_MAP_KEY(in_key)); auto result = m_env_values.find(WRAP_MAP_KEY(in_key));
// Safety check // Safety check
@ -233,15 +231,15 @@ std::u8string_view app_parameters::get_env_value(std::u8string_view in_key, std:
return result->second; return result->second;
} }
std::u8string_view app_parameters::get_value(std::u8string_view in_key, std::u8string_view in_default) const { app_parameters::string_view_type app_parameters::get_value(string_view_type in_key, string_view_type in_default) const {
// Explicit args take priority auto result = m_values.find(WRAP_MAP_KEY(in_key));
auto result = m_arg_values.find(WRAP_MAP_KEY(in_key));
if (result != m_arg_values.end()) { // Safety check
return result->second; if (result == m_values.end()) {
return in_default;
} }
// Fallback to env return result->second;
return get_env_value(in_key, in_default);
} }
} // namespace jessilib } // namespace jessilib

39
src/include/jessilib/app_parameters.hpp

@ -23,37 +23,42 @@ namespace jessilib {
class app_parameters { class app_parameters {
public: public:
using set_type = std::unordered_set<std::u8string, jessilib::text_hashi, jessilib::text_equali>; using string_type = std::u8string;
using map_type = std::unordered_map<std::u8string, std::u8string, jessilib::text_hashi, jessilib::text_equali>; using string_view_type = std::u8string_view;
using set_type = std::unordered_set<string_type, jessilib::text_hashi, jessilib::text_equali>;
using map_type = std::unordered_map<string_type, string_type, jessilib::text_hashi, jessilib::text_equali>;
app_parameters(int in_argc, char** in_argv, char** in_envp = nullptr); app_parameters(int in_argc, char** in_argv, char** in_envp = nullptr);
app_parameters(int in_argc, const char** in_argv, const char** in_envp = nullptr); app_parameters(int in_argc, const char** in_argv, const char** in_envp = nullptr);
app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp = nullptr); app_parameters(int in_argc, wchar_t** in_argv, wchar_t** in_envp = nullptr);
app_parameters(int in_argc, const wchar_t** in_argv, const wchar_t** in_envp = nullptr); app_parameters(int in_argc, const wchar_t** in_argv, const wchar_t** in_envp = nullptr);
app_parameters(std::vector<std::u8string> in_args, std::vector<std::u8string> in_env = {}); app_parameters(std::vector<string_type> in_args, std::vector<string_type> in_env = {});
std::u8string_view path() const; [[nodiscard]] string_view_type path() const;
const std::vector<std::u8string>& arguments() const; [[nodiscard]] const std::vector<string_type>& arguments() const;
const std::vector<std::u8string>& switches() const; [[nodiscard]] const std::vector<string_type>& switches() const;
const set_type& switches_set() const; [[nodiscard]] const set_type& switches_set() const;
const map_type& values() const; [[nodiscard]] const map_type& arg_values() const;
jessilib::object as_object() const; [[nodiscard]] const map_type& env_values() const;
[[nodiscard]] const map_type& values() const;
[[nodiscard]] jessilib::object as_object() const;
bool has_switch(std::u8string_view in_switch) const; [[nodiscard]] bool has_switch(string_view_type in_switch) const;
std::u8string_view get_arg_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; [[nodiscard]] string_view_type get_arg_value(string_view_type in_key, string_view_type in_default = {}) const;
std::u8string_view get_env_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; [[nodiscard]] string_view_type get_env_value(string_view_type in_key, string_view_type in_default = {}) const;
std::u8string_view get_value(std::u8string_view in_key, std::u8string_view in_default = {}) const; [[nodiscard]] string_view_type get_value(string_view_type in_key, string_view_type in_default = {}) const;
operator jessilib::object() const { return as_object(); } [[nodiscard]] inline operator jessilib::object() const { return as_object(); }
private: private:
std::u8string m_path; string_type m_path;
std::vector<std::u8string> m_args; std::vector<string_type> m_args;
std::vector<std::u8string> m_switches; std::vector<string_type> m_switches;
set_type m_switches_set; set_type m_switches_set;
map_type m_arg_values; map_type m_arg_values;
map_type m_env_values; map_type m_env_values;
map_type m_values;
}; };
} // namespace jessilib } // namespace jessilib

60
src/include/jessilib/unicode.hpp

@ -18,6 +18,7 @@
#pragma once #pragma once
#include <cuchar>
#include "unicode_compare.hpp" #include "unicode_compare.hpp"
namespace jessilib { namespace jessilib {
@ -176,6 +177,65 @@ std::basic_string<OutCharT> string_cast(const InT& in_string) {
} }
} }
/**
* Recodes a multi-byte string into a unicode-encoded string
*
* @tparam CharT Character type for resulting unicode string
* @param in_mbs Multibyte string to recode
* @return A pair containing a boolean which is true on success, and a unicode string
*/
template<typename CharT>
std::pair<bool, std::basic_string<CharT>> mbstring_to_ustring(std::string_view in_mbstring) {
std::pair<bool, std::basic_string<CharT>> result;
std::mbstate_t mbstate{};
while (!in_mbstring.empty()) {
char32_t codepoint{};
size_t bytes_read = std::mbrtoc32(&codepoint, in_mbstring.data(), in_mbstring.size(), &mbstate);
if (bytes_read > in_mbstring.size()) {
// Some sort of error; return
result.first = false;
return result;
}
// bytes_read will never be 0 except for null characters, which are excluded from our view; here for future reuse
bytes_read = std::max(size_t{1}, bytes_read);
in_mbstring.remove_prefix(bytes_read);
encode_codepoint(result.second, codepoint);
}
result.first = true;
return result;
}
/**
* Recodes a unicode string into a multi-byte string
* @tparam CharT Character type for input unicode string
* @param in_string Unicode string to recode
* @return A pair containing a boolean which is true on success, and a multi-byte string
*/
template<typename CharT>
std::pair<bool, std::string> ustring_to_mbstring(std::basic_string_view<CharT> in_string) {
std::pair<bool, std::string> result;
std::mbstate_t mbstate{};
decode_result decode;
while ((decode = decode_codepoint(in_string).units != 0)) {
char buffer[MB_CUR_MAX]; // MB_LEN_MAX
size_t bytes_written = std::c32rtomb(buffer, decode.codepoint, &mbstate);
if (bytes_written > MB_CUR_MAX) {
// Invalid codepoint; return
result.first = false;
return result;
}
result.second.append(buffer, bytes_written);
}
result.first = true;
return result;
}
/** /**
* Searches a string for a specified substring * Searches a string for a specified substring
* *

Loading…
Cancel
Save