I previously spoke about command-line parsing with getopt and mentioned an alternative called argp.
Using argp is convenient
because it automatically generates --help
and --usage
,
your help text won't get out of sync with your options;
and argp also combines the long option specification with the short options,
so your short options won't get out of sync with your long options.
Using argp does require you to restructure your argument parsing though, and while the argpbook is a good guide to learn how to write new programs, if you're already familiar with command-line parsing in general a lot of what it has to say is redundant.
So this article is about how to translate a program written to use getopt into a program that uses argp.
Converting programs that parse with getopt to use argp
This is a relatively simple program,
that reports the positional arguments
and the value passed to the --foo
option.
/* test0.c */
#include <stdio.h> /* fprintf */
#include <getopt.h> /* getopt_long, struct option */
int main(int argc, char *argv[]){
enum opt {
OPT_END = -1,
OPT_FOO = 'f',
OPT_NOFOO = 0x100,
OPT_UNEXPECTED = '?',
};
static const struct option longopts[] = {
{.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
{.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
{.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
{},
};
char **positionals;
char *foo = NULL;
for (;;) {
int longindex = -1;
enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
switch (opt) {
case OPT_END:
goto end_optparse;
case OPT_FOO:
foo = optarg;
break;
case OPT_NOFOO:
foo = NULL;
break;
case OPT_UNEXPECTED:
return 1;
}
}
end_optparse:
positionals = &argv[optind];
if (foo == NULL) {
fprintf(stdout, "Got no Foo\n");
} else {
fprintf(stdout, "Foo is %s\n", foo);
}
for (; *positionals; positionals++)
fprintf(stdout, "Positional: %s\n", *positionals);
return 0;
}
Fixing control flow
The control flow for getopt_long is different to argp.
The getopt_long is called in a loop until it has finished, effectively acting as a form of iterator, while argp is called once, passing it a callback function.
Returning parsed arguments in a struct
A side-effect of this change is that we need to change how we store our results, since we only get to pass one pointer to the parse function, we need to have a state structure.
--- test0.c 2016-05-30 11:58:27.799321266 +0100
+++ test1.c 2016-05-30 12:01:06.533529250 +0100
@@ -1,7 +1,12 @@
-/* test0.c */
+/* test1.c */
#include <stdio.h> /* fprintf */
#include <getopt.h> /* getopt_long, struct option */
+struct arguments {
+ char *foo;
+ char **positionals;
+};
+
int main(int argc, char *argv[]){
enum opt {
OPT_END = -1,
@@ -15,8 +20,9 @@
{.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
{},
};
- char **positionals;
- char *foo = NULL;
+ struct arguments args = {
+ .foo = NULL,
+ };
for (;;) {
int longindex = -1;
enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
@@ -24,24 +30,24 @@
case OPT_END:
goto end_optparse;
case OPT_FOO:
- foo = optarg;
+ args.foo = optarg;
break;
case OPT_NOFOO:
- foo = NULL;
+ args.foo = NULL;
break;
case OPT_UNEXPECTED:
return 1;
}
}
end_optparse:
- positionals = &argv[optind];
+ args.positionals = &argv[optind];
- if (foo == NULL) {
+ if (args.foo == NULL) {
fprintf(stdout, "Got no Foo\n");
} else {
- fprintf(stdout, "Foo is %s\n", foo);
+ fprintf(stdout, "Foo is %s\n", args.foo);
}
- for (; *positionals; positionals++)
+ for (char **positionals = args.positionals; *positionals; positionals++)
fprintf(stdout, "Positional: %s\n", *positionals);
return 0;
}
Adding a handler function
To make the switch-over to calling argp_parse
easier,
we're going to split out the argument parsing into a function,
while calls getopt_long in a loop,
and calls a second function to actually handle the argument.
--- test1.c 2016-05-30 12:01:06.533529250 +0100
+++ test2.c 2016-05-30 13:35:19.414248340 +0100
@@ -1,4 +1,4 @@
-/* test1.c */
+/* test2.c */
#include <stdio.h> /* fprintf */
#include <getopt.h> /* getopt_long, struct option */
@@ -7,40 +7,59 @@
char **positionals;
};
+enum opt {
+ OPT_END = -1,
+ OPT_FOO = 'f',
+ OPT_NOFOO = 0x100,
+ OPT_UNEXPECTED = '?',
+};
+static const struct option longopts[] = {
+ {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
+ {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
+ {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+ {},
+};
+const char optstring[] = "f:";
+
+int parse_arg(int opt, char *arg, struct arguments *args){
+ switch (opt) {
+ case OPT_FOO:
+ args->foo = arg;
+ return 0;
+ case OPT_NOFOO:
+ args->foo = NULL;
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+int parse_args(int argc, char *argv[], struct arguments *args){
+ for (;;) {
+ int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+ if (opt == OPT_END) {
+ args->positionals = &argv[optind];
+ return 0;
+ }
+
+ int ret = parse_arg(opt, optarg, args);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+}
+
int main(int argc, char *argv[]){
- enum opt {
- OPT_END = -1,
- OPT_FOO = 'f',
- OPT_NOFOO = 0x100,
- OPT_UNEXPECTED = '?',
- };
- static const struct option longopts[] = {
- {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
- {},
- };
+ int ret = 0;
struct arguments args = {
.foo = NULL,
};
- for (;;) {
- int longindex = -1;
- enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
- switch (opt) {
- case OPT_END:
- goto end_optparse;
- case OPT_FOO:
- args.foo = optarg;
- break;
- case OPT_NOFOO:
- args.foo = NULL;
- break;
- case OPT_UNEXPECTED:
- return 1;
- }
+
+ ret = parse_args(argc, argv, &args);
+ if (ret != 0) {
+ return ret;
}
-end_optparse:
- args.positionals = &argv[optind];
if (args.foo == NULL) {
fprintf(stdout, "Got no Foo\n");
You may have noticed the inconsistency from some parameters being passed in to the handler function and some being globals.
This is a side-effect of emulating the API that argp exposes, with minimal changes to the flow of data.
Handling positional parameters as options
argp parser functions typically handle parsing the positional arguments, rather than the caller.
Unfortunately we don't currently pass the argv in to the handler function,
so we'll need to change the API a little to meet that,
by adding a struct parse_state
that includes the argv.
--- test2.c 2016-05-30 13:42:20.638208254 +0100
+++ test3.c 2016-05-30 13:45:52.232178791 +0100
@@ -1,4 +1,4 @@
-/* test2.c */
+/* test3.c */
#include <stdio.h> /* fprintf */
#include <getopt.h> /* getopt_long, struct option */
@@ -21,7 +21,13 @@
};
const char optstring[] = "f:";
-int parse_arg(int opt, char *arg, struct arguments *args){
+struct parse_state {
+ char **argv;
+ struct arguments *input;
+};
+
+int parse_arg(int opt, char *arg, struct parse_state *state){
+ struct arguments *args = state->input;
switch (opt) {
case OPT_FOO:
args->foo = arg;
@@ -29,24 +35,29 @@
case OPT_NOFOO:
args->foo = NULL;
return 0;
+ case OPT_END:
+ args->positionals = &state->argv[optind];
+ return 0;
default:
return 1;
}
}
int parse_args(int argc, char *argv[], struct arguments *args){
+ struct parse_state state = {
+ .argv = argv,
+ .input = args,
+ };
for (;;) {
int opt = getopt_long(argc, argv, optstring, longopts, NULL);
-
- if (opt == OPT_END) {
- args->positionals = &argv[optind];
- return 0;
- }
-
- int ret = parse_arg(opt, optarg, args);
+ int ret = parse_arg(opt, optarg, &state);
if (ret != 0) {
return ret;
}
+
+ if (opt == OPT_END) {
+ return 0;
+ }
}
}
Switching over to argp_parse
Now that we've changed the logic flow,
we can effectively substitute parse_args
for argp_parse
.
The result is now mostly deleting code we added to change the logic flow.
Replacing parse_args
-
-int parse_args(int argc, char *argv[], struct arguments *args){
- struct parse_state state = {
- .argv = argv,
- .input = args,
- };
- for (;;) {
- int opt = getopt_long(argc, argv, optstring, longopts, NULL);
- int ret = parse_arg(opt, optarg, &state);
- if (ret != 0) {
- return ret;
- }
-
- if (opt == -1) {
- return 0;
- }
- }
-}
int main(int argc, char *argv[]){
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ };
int ret = 0;
struct arguments args = {
.foo = NULL,
};
- ret = parse_args(argc, argv, &args);
+ ret = argp_parse(&argp, argc, argv, 0, NULL, &args);
if (ret != 0) {
return ret;
}
This effectively replaces the code we had for parsing how we wanted
with a call to argp_parse
with appropriate configuration.
The static const struct argp argp
is in main
just to keep its definition local to its only user.
Strictly the static const struct argp_option opts[]
could also be moved here,
but it's easier to compare how options are specified
if it's changed in its current location
rather than moved.
Changing the options vector
@@ -8,25 +10,17 @@
};
enum opt {
- OPT_END = -1,
OPT_FOO = 'f',
OPT_NOFOO = 0x100,
- OPT_UNEXPECTED = '?',
};
-static const struct option longopts[] = {
- {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+static const struct argp_option opts[] = {
+ {.name = "foo", .key = OPT_FOO, .arg = "value"},
+ {.name = "also-foo", .key = OPT_FOO, .flags = OPTION_ALIAS},
+ {.name = "no-foo", .key = OPT_NOFOO},
{},
};
-const char optstring[] = "f:";
Because argp_parse
handles termination and unexpected options internally
we don't need OPT_END
or OPT_UNEXPECTED
any more.
struct argp_option
has similar behaviour to struct option
,
but it does not have a .flag
parameter,
so the .val
equivalent is called .key
and is used to determine which value to pass through to parse_arg
.
Rather than having .has_arg
defining whether it takes values,
the .arg
field defines whether it expects a value,
and labels it in the help output.
If an option's value is optional, then add OPTION_ARG_OPTIONAL
to .flags
.
Because argp_parse
treats any key which is printable
as a short option,
we don't need the separate option string.
Changes to parse_arg
-int parse_arg(int opt, char *arg, struct parse_state *state){
+error_t parse_arg(int opt, char *arg, struct argp_state *state){
struct arguments *args = state->input;
switch (opt) {
case OPT_FOO:
@@ -35,39 +29,26 @@
case OPT_NOFOO:
args->foo = NULL;
return 0;
- case OPT_END:
- args->positionals = &state->argv[optind];
+ case ARGP_KEY_ARGS:
+ case ARGP_KEY_NO_ARGS:
+ args->positionals = &state->argv[state->next];
return 0;
default:
- return 1;
+ return ARGP_ERR_UNKNOWN;
}
}
This is mostly the same.
The function signature has changed slightly since we pass argp's state instead,
and rather than using optind
, we use state->next
.
argp parser functions can handle arguments individually with ARGP_KEY_ARG
or them all together as ARGP_KEY_ARGS
,
and can handle being given no arguments with ARGP_KEY_NO_ARGS
.
Since we want to treat all subsequent arguments as the positionals,
we wouldn't do this by handling ARGP_KEY_ARG
,
since then we'd need to pick the arguments individually.
We need to handle ARGP_KEY_NO_ARGS
since we haven't initialised args->positionals
to anything,
and to be a valid argument vector we need to point to something
even if it is just a pointer to a NULL
(signifying an empty vector).
Since &state->argv[state->next]
points to the end of the array
if there were no positional parameters,
or to the next parameter if there was one,
the code is actually the same.
argp parser functions may be chained together,
so a parser function that doesn't recognise a particular option
should return ARGP_ERR_UNKNOWN
so that argp_parse
can either try a different parser function
or it can report it being unhandled as an error.
The full diff
--- test3.c 2016-05-30 13:47:01.431515079 +0100
+++ test4.c 2016-05-30 14:14:25.315137258 +0100
@@ -1,6 +1,8 @@
-/* test3.c */
+/* test4.c */
#include <stdio.h> /* fprintf */
-#include <getopt.h> /* getopt_long, struct option */
+#include <argp.h> /* argp_parse, error_t, struct argp, struct argp_option,
+ struct argp_state, OPTION_ALIAS,
+ ARGP_KEY_ARGS, ARGP_KEY_NO_ARGS, ARGP_ERR_UNKNOWN */
struct arguments {
char *foo;
@@ -8,25 +10,17 @@
};
enum opt {
- OPT_END = -1,
OPT_FOO = 'f',
OPT_NOFOO = 0x100,
- OPT_UNEXPECTED = '?',
};
-static const struct option longopts[] = {
- {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
- {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+static const struct argp_option opts[] = {
+ {.name = "foo", .key = OPT_FOO, .arg = "value"},
+ {.name = "also-foo", .key = OPT_FOO, .flags = OPTION_ALIAS},
+ {.name = "no-foo", .key = OPT_NOFOO},
{},
};
-const char optstring[] = "f:";
-struct parse_state {
- char **argv;
- struct arguments *input;
-};
-
-int parse_arg(int opt, char *arg, struct parse_state *state){
+error_t parse_arg(int opt, char *arg, struct argp_state *state){
struct arguments *args = state->input;
switch (opt) {
case OPT_FOO:
@@ -35,39 +29,26 @@
case OPT_NOFOO:
args->foo = NULL;
return 0;
- case OPT_END:
- args->positionals = &state->argv[optind];
+ case ARGP_KEY_ARGS:
+ case ARGP_KEY_NO_ARGS:
+ args->positionals = &state->argv[state->next];
return 0;
default:
- return 1;
+ return ARGP_ERR_UNKNOWN;
}
}
-
-int parse_args(int argc, char *argv[], struct arguments *args){
- struct parse_state state = {
- .argv = argv,
- .input = args,
- };
- for (;;) {
- int opt = getopt_long(argc, argv, optstring, longopts, NULL);
- int ret = parse_arg(opt, optarg, &state);
- if (ret != 0) {
- return ret;
- }
-
- if (opt == -1) {
- return 0;
- }
- }
-}
int main(int argc, char *argv[]){
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ };
int ret = 0;
struct arguments args = {
.foo = NULL,
};
- ret = parse_args(argc, argv, &args);
+ ret = argp_parse(&argp, argc, argv, 0, NULL, &args);
if (ret != 0) {
return ret;
}
Now we can see the fruits of our labour:
$ make test4
cc test4.c -o test4
$ ./test4 --help
Usage: test4 [OPTION...]
-f, --foo=value, --also-foo=value
--no-foo
-?, --help Give this help list
--usage Give a short usage message
Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.