feat: allow arbitrary char in opt name (#491)

* feat: Allow arbitrary characters
This commit is contained in:
Nitin Kumar 2026-02-23 12:44:43 +05:30 committed by GitHub
parent 068f6c582c
commit 370de72bfe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 164 additions and 23 deletions

View File

@ -672,6 +672,16 @@ inline bool IsFalseText(const std::string &text)
return false;
}
static inline bool valid_option_later_char(char c)
{
return c!='=' && c!=',' && !std::isspace(c, std::locale::classic()) && !std::iscntrl(c, std::locale::classic());
}
static inline bool valid_option_first_char(char c)
{
return c != '-' && valid_option_later_char(c);
}
inline OptionNames split_option_names(const std::string &text)
{
OptionNames split_names;
@ -692,25 +702,27 @@ inline OptionNames split_option_names(const std::string &text)
}
token_start_pos = next_non_space_pos;
auto next_delimiter_pos = text.find(',', token_start_pos);
if (next_delimiter_pos == token_start_pos) {
throw_or_mimic<exceptions::invalid_option_format>(text);
}
if (next_delimiter_pos == npos) {
next_delimiter_pos = length;
}
else if (next_delimiter_pos == token_start_pos) {
throw_or_mimic<exceptions::invalid_option_format>(text);
}
else if(next_delimiter_pos == length-1) {
// delimter at the end. ex : "a,ab,"
throw_or_mimic<exceptions::invalid_option_format>(text);
}
auto token_length = next_delimiter_pos - token_start_pos;
// validate the token itself matches the regex /([:alnum:][-_[:alnum:]]*/
{
const char* option_name_valid_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"_-.?";
if (!std::isalnum(text[token_start_pos], std::locale::classic()) ||
text.find_first_not_of(option_name_valid_chars, token_start_pos) < next_delimiter_pos) {
if(!valid_option_first_char(text[token_start_pos])){
throw_or_mimic<exceptions::invalid_option_format>(text);
}
for(size_t i=token_start_pos+1; i<next_delimiter_pos; ++i){
if(!valid_option_later_char(text[i])) {
throw_or_mimic<exceptions::invalid_option_format>(text);
}
}
}
split_names.emplace_back(text.substr(token_start_pos, token_length));
token_start_pos = next_delimiter_pos + 1;
@ -726,11 +738,11 @@ inline ArguDesc ParseArgument(const char *arg, bool &matched)
if (strncmp(pdata, "--", 2) == 0)
{
pdata += 2;
if (isalnum(*pdata, std::locale::classic()))
if (valid_option_first_char(*pdata))
{
argu_desc.arg_name.push_back(*pdata);
pdata += 1;
while (isalnum(*pdata, std::locale::classic()) || *pdata == '-' || *pdata == '_' || *pdata == '.')
while (valid_option_later_char(*pdata))
{
argu_desc.arg_name.push_back(*pdata);
pdata += 1;
@ -757,7 +769,7 @@ inline ArguDesc ParseArgument(const char *arg, bool &matched)
else if (strncmp(pdata, "-", 1) == 0)
{
pdata += 1;
if(isalnum(*pdata, std::locale::classic())) {
if(valid_option_first_char(*pdata)) {
// If we have '=' right after first alnum, its a match.
if(*(pdata+1) == '=') {
argu_desc.arg_name.push_back(*pdata);
@ -777,6 +789,13 @@ inline ArguDesc ParseArgument(const char *arg, bool &matched)
#else // CXXOPTS_NO_REGEX
namespace {
#define CXXOPTS_RE_NAME_START "[^-=,[:space:][:cntrl:]]"
#define CXXOPTS_RE_NAME_CHAR "[^=,[:space:][:cntrl:]]"
#define CXXOPTS_RE_NAME CXXOPTS_RE_NAME_START CXXOPTS_RE_NAME_CHAR "*"
#define CXXOPTS_RE_LONG_NAME CXXOPTS_RE_NAME_START CXXOPTS_RE_NAME_CHAR "+"
#define CXXOPTS_RE_SHORT_NAME CXXOPTS_RE_NAME_START
CXXOPTS_LINKONCE
const char* const integer_pattern =
"(-)?(0x)?([0-9a-zA-Z]+)|((0x)?0)";
@ -788,12 +807,12 @@ const char* const falsy_pattern =
"(f|F)(alse)?|0";
CXXOPTS_LINKONCE
const char* const option_pattern =
"--([[:alnum:]][-_[:alnum:]\\.]+)(=(.*))?|-([[:alnum:]])((=(.*))|(.*))";
// <-------Long Option--------------------> <-----Short Option------->
"--(" CXXOPTS_RE_LONG_NAME ")(=(.*))?|-(" CXXOPTS_RE_SHORT_NAME ")((=(.*))|(.*))";
// <-------Long Option---------------> <-------------Short Option--------------->
// Groups :
// <---------1------------------><--2--> <--4--------><-----5------>
// <-3> <--6--> <-8>
// <-7>
// <---------1--------------><--2--> <-----------4-------------><-----5------>
// <-3> <--6--> <-8>
// <-7>
const int LONG_NAME_IDX=1;
const int LONG_MATCH_IDX=2;
const int LONG_MATCH_VALUE_IDX=3;
@ -804,10 +823,16 @@ const int SHORT_GROUPING_IDX=8;
CXXOPTS_LINKONCE
const char* const option_specifier_pattern =
"([[:alnum:]][-_[:alnum:]\\.]*)(,[ ]*[[:alnum:]][-_[:alnum:]]*)*";
"(" CXXOPTS_RE_NAME ")(,[ ]*" CXXOPTS_RE_NAME ")*";
CXXOPTS_LINKONCE
const char* const option_specifier_separator_pattern = ", *";
#undef CXXOPTS_RE_NAME_START
#undef CXXOPTS_RE_NAME_CHAR
#undef CXXOPTS_RE_NAME
#undef CXXOPTS_RE_LONG_NAME
#undef CXXOPTS_RE_SHORT_NAME
} // namespace
inline IntegerDesc SplitInteger(const std::string &text)

View File

@ -277,6 +277,122 @@ TEST_CASE("Providing options values via equal sign", "[options]")
}
}
TEST_CASE("Valid/Invalid Option names", "[options]")
{
const char prog_name[] = "test_invalid_opt_names";
std::vector<std::pair<std::string, std::string>> invalid_opts = {
{"Equals short name", "="},
{"Dash short name", "-"},
{"Long name starts with =", "a,=abcd"},
{"Long name starts with dash", "a,-abcd"},
{"Whitespace as name", " "},
{"Name is a space", "a, "},
{"Only comma delimiter given", ","},
{"Leading comma", ",a"},
{"Trailing comma after names", "a,ab,"},
{"Double comma", "a,,ab"},
{"Empty option name string", ""},
{"Control character in name", "\x03"},
{"Tab inside option name", "a,ab,\tac"},
{"Equals inside option name", "a,ab,ac,ab=cd"},
{"Two simple option names", "a,b"},
};
std::vector<std::pair<std::string, std::string>> valid_opts = {
{"Basic", "a,ab"},
{"Spaces", "a, ab, xyz"},
{"Arbitrary characters", "#,%%%,/><>?"},
};
for(const auto& t: invalid_opts) {
SECTION(t.first){
cxxopts::Options options(prog_name, " - Option names as invalid characters");
auto option_adder = options.add_options();
CHECK_THROWS(option_adder(t.second, "description", cxxopts::value<std::string>()));
}
}
for(const auto& t: valid_opts) {
SECTION(t.first){
cxxopts::Options options(prog_name, " - Option names as invalid characters");
auto option_adder = options.add_options();
CHECK_NOTHROW(option_adder(t.second, "description", cxxopts::value<std::string>()));
}
}
}
TEST_CASE("Option names as arbitrary characters and values parsing", "[options]")
{
const char prog_name[] = "test_name_chars";
const char implicit_value[] = "implicit";
struct testcases
{
std::string name;
std::vector<std::string> opt_names;
Argv argv;
std::vector<std::pair<std::string, std::string>> expValues;
bool parseException;
bool setImplicit;
} tests[] = {
{
"Arbitrary chars",
{"#,^^","$$$"},
Argv{prog_name, "-#", "abc", "--$$$=@#$$"},
{{"#", "abc"}, {"$$$", "@#$$"}},
false,
false,
},
{
"Arbitrary chars and spaces",
{"*, ab-cd","ab_cd"},
Argv{prog_name, "--ab-cd", "xyz","--ab_cd={}()"},
{{"ab-cd", "xyz"}, {"ab_cd", "{}()"}},
false,
false,
},
{
"Arbitrary character as single dot",
{"."},
Argv{prog_name, "-.=.."},
{{".", ".."}},
false,
false,
},
{
"Multiple Arbitrary characters as short options",
{"@",":","?","[","}","*","~","!"},
Argv{prog_name, "-@:?[}*~!"},
{{"@", implicit_value}, {"[", implicit_value}, {"!", implicit_value}},
false,
true,
}
};
for(const auto& tc : tests) {
SECTION(tc.name){
cxxopts::Options options(prog_name, " - Option names as arbitrary characters");
auto option_adder = options.add_options();
for(const auto& opt_name: tc.opt_names) {
if(tc.setImplicit){
option_adder(opt_name, "description", cxxopts::value<std::string>()->implicit_value(implicit_value));
}
else{
option_adder(opt_name, "description", cxxopts::value<std::string>());
}
}
if(tc.parseException) {
CHECK_THROWS(options.parse(tc.argv.argc(), tc.argv.argv()));
continue;
}
auto result = options.parse(tc.argv.argc(), tc.argv.argv());
for (const auto& p : tc.expValues) {
CHECK(result[p.first].as<std::string>() == p.second);
}
}
}
}
TEST_CASE("No positional", "[positional]")
{
cxxopts::Options options("test_no_positional",
@ -951,7 +1067,7 @@ TEST_CASE("Allow bad short syntax", "[options]") {
Argv av({
"--ab?",
"-?b?#@"
"-=?b?#@"
});
auto** argv = av.argv();
@ -965,7 +1081,7 @@ TEST_CASE("Allow bad short syntax", "[options]") {
options.allow_unrecognised_options();
CHECK_NOTHROW(options.parse(argc, argv));
REQUIRE(argc == 2);
CHECK_THAT(argv[1], Catch::Equals("-?b?#@"));
CHECK_THAT(argv[1], Catch::Equals("-=?b?#@"));
}
}