I previously spoke about command-line parsing with getopt and mentioned an alternative called argp.

Using argp is convenient because it automatically generates --help and --usage, your help text won't get out of sync with your options; and argp also combines the long option specification with the short options, so your short options won't get out of sync with your long options.

Using argp does require you to restructure your argument parsing though, and while the argpbook is a good guide to learn how to write new programs, if you're already familiar with command-line parsing in general a lot of what it has to say is redundant.

So this article is about how to translate a program written to use getopt into a program that uses argp.

Converting programs that parse with getopt to use argp

This is a relatively simple program, that reports the positional arguments and the value passed to the --foo option.

/* test0.c */
#include <stdio.h> /* fprintf */
#include <getopt.h> /* getopt_long, struct option */

int main(int argc, char *argv[]){
    enum opt {
        OPT_END = -1,
        OPT_FOO = 'f',
        OPT_NOFOO = 0x100,
        OPT_UNEXPECTED = '?',
    };
    static const struct option longopts[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
        {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
        {},
    };
    char **positionals;
    char *foo = NULL;
    for (;;) {
        int longindex = -1;
        enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
        switch (opt) {
        case OPT_END:
            goto end_optparse;
        case OPT_FOO:
            foo = optarg;
            break;
        case OPT_NOFOO:
            foo = NULL;
            break;
        case OPT_UNEXPECTED:
            return 1;
        }
    }
end_optparse:
    positionals = &argv[optind];

    if (foo == NULL) {
        fprintf(stdout, "Got no Foo\n");
    } else {
        fprintf(stdout, "Foo is %s\n", foo);
    }
    for (; *positionals; positionals++)
        fprintf(stdout, "Positional: %s\n", *positionals);
    return 0;
}

Fixing control flow

The control flow for getopt_long is different to argp.

The getopt_long is called in a loop until it has finished, effectively acting as a form of iterator, while argp is called once, passing it a callback function.

Returning parsed arguments in a struct

A side-effect of this change is that we need to change how we store our results, since we only get to pass one pointer to the parse function, we need to have a state structure.

--- test0.c    2016-05-30 11:58:27.799321266 +0100
+++ test1.c    2016-05-30 12:01:06.533529250 +0100
@@ -1,7 +1,12 @@
-/* test0.c */
+/* test1.c */
 #include <stdio.h> /* fprintf */
 #include <getopt.h> /* getopt_long, struct option */
 
+struct arguments {
+    char *foo;
+    char **positionals;
+};
+
 int main(int argc, char *argv[]){
     enum opt {
         OPT_END = -1,
@@ -15,8 +20,9 @@
         {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
         {},
     };
-    char **positionals;
-    char *foo = NULL;
+    struct arguments args = {
+        .foo = NULL,
+    };
     for (;;) {
         int longindex = -1;
         enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
@@ -24,24 +30,24 @@
         case OPT_END:
             goto end_optparse;
         case OPT_FOO:
-            foo = optarg;
+            args.foo = optarg;
             break;
         case OPT_NOFOO:
-            foo = NULL;
+            args.foo = NULL;
             break;
         case OPT_UNEXPECTED:
             return 1;
         }
     }
 end_optparse:
-    positionals = &argv[optind];
+    args.positionals = &argv[optind];
 
-    if (foo == NULL) {
+    if (args.foo == NULL) {
         fprintf(stdout, "Got no Foo\n");
     } else {
-        fprintf(stdout, "Foo is %s\n", foo);
+        fprintf(stdout, "Foo is %s\n", args.foo);
     }
-    for (; *positionals; positionals++)
+    for (char **positionals = args.positionals; *positionals; positionals++)
         fprintf(stdout, "Positional: %s\n", *positionals);
     return 0;
 }

Adding a handler function

To make the switch-over to calling argp_parse easier, we're going to split out the argument parsing into a function, while calls getopt_long in a loop, and calls a second function to actually handle the argument.

--- test1.c    2016-05-30 12:01:06.533529250 +0100
+++ test2.c    2016-05-30 13:35:19.414248340 +0100
@@ -1,4 +1,4 @@
-/* test1.c */
+/* test2.c */
 #include <stdio.h> /* fprintf */
 #include <getopt.h> /* getopt_long, struct option */
 
@@ -7,40 +7,59 @@
     char **positionals;
 };
 
+enum opt {
+    OPT_END = -1,
+    OPT_FOO = 'f',
+    OPT_NOFOO = 0x100,
+    OPT_UNEXPECTED = '?',
+};
+static const struct option longopts[] = {
+    {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
+    {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
+    {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+    {},
+};
+const char optstring[] = "f:";
+
+int parse_arg(int opt, char *arg, struct arguments *args){
+    switch (opt) {
+    case OPT_FOO:
+        args->foo = arg;
+        return 0;
+    case OPT_NOFOO:
+        args->foo = NULL;
+        return 0;
+    default:
+        return 1;
+    }
+}
+
+int parse_args(int argc, char *argv[], struct arguments *args){
+    for (;;) {
+        int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+        if (opt == OPT_END) {
+            args->positionals = &argv[optind];
+            return 0;
+        }
+
+        int ret = parse_arg(opt, optarg, args);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+}
+
 int main(int argc, char *argv[]){
-    enum opt {
-        OPT_END = -1,
-        OPT_FOO = 'f',
-        OPT_NOFOO = 0x100,
-        OPT_UNEXPECTED = '?',
-    };
-    static const struct option longopts[] = {
-        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
-        {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
-        {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
-        {},
-    };
+    int ret = 0;
     struct arguments args = {
         .foo = NULL,
     };
-    for (;;) {
-        int longindex = -1;
-        enum opt opt = getopt_long(argc, argv, "f:", longopts, &longindex);
-        switch (opt) {
-        case OPT_END:
-            goto end_optparse;
-        case OPT_FOO:
-            args.foo = optarg;
-            break;
-        case OPT_NOFOO:
-            args.foo = NULL;
-            break;
-        case OPT_UNEXPECTED:
-            return 1;
-        }
+
+    ret = parse_args(argc, argv, &args);
+    if (ret != 0) {
+        return ret;
     }
-end_optparse:
-    args.positionals = &argv[optind];
 
     if (args.foo == NULL) {
         fprintf(stdout, "Got no Foo\n");

You may have noticed the inconsistency from some parameters being passed in to the handler function and some being globals.

This is a side-effect of emulating the API that argp exposes, with minimal changes to the flow of data.

Handling positional parameters as options

argp parser functions typically handle parsing the positional arguments, rather than the caller.

Unfortunately we don't currently pass the argv in to the handler function, so we'll need to change the API a little to meet that, by adding a struct parse_state that includes the argv.

--- test2.c    2016-05-30 13:42:20.638208254 +0100
+++ test3.c    2016-05-30 13:45:52.232178791 +0100
@@ -1,4 +1,4 @@
-/* test2.c */
+/* test3.c */
 #include <stdio.h> /* fprintf */
 #include <getopt.h> /* getopt_long, struct option */
 
@@ -21,7 +21,13 @@
 };
 const char optstring[] = "f:";
 
-int parse_arg(int opt, char *arg, struct arguments *args){
+struct parse_state {
+    char **argv;
+    struct arguments *input;
+};
+
+int parse_arg(int opt, char *arg, struct parse_state *state){
+    struct arguments *args = state->input;   
     switch (opt) {
     case OPT_FOO:
         args->foo = arg;
@@ -29,24 +35,29 @@
     case OPT_NOFOO:
         args->foo = NULL;
         return 0;
+    case OPT_END:
+        args->positionals = &state->argv[optind];
+        return 0;
     default:
         return 1;
     }
 }
 
 int parse_args(int argc, char *argv[], struct arguments *args){
+    struct parse_state state = {
+        .argv = argv,
+        .input = args,
+    };
     for (;;) {
         int opt = getopt_long(argc, argv, optstring, longopts, NULL);
-
-        if (opt == OPT_END) {
-            args->positionals = &argv[optind];
-            return 0;
-        }
-
-        int ret = parse_arg(opt, optarg, args);
+        int ret = parse_arg(opt, optarg, &state);
         if (ret != 0) {
             return ret;
         }
+
+        if (opt == OPT_END) {
+            return 0;
+        }
     }
 }

Switching over to argp_parse

Now that we've changed the logic flow, we can effectively substitute parse_args for argp_parse.

The result is now mostly deleting code we added to change the logic flow.

Replacing parse_args

-
-int parse_args(int argc, char *argv[], struct arguments *args){
-    struct parse_state state = {
-        .argv = argv,
-        .input = args,
-    };
-    for (;;) {
-        int opt = getopt_long(argc, argv, optstring, longopts, NULL);
-        int ret = parse_arg(opt, optarg, &state);
-        if (ret != 0) {
-            return ret;
-        }
-
-        if (opt == -1) {
-            return 0;
-        }
-    }
-}
 
 int main(int argc, char *argv[]){
+    static const struct argp argp = {
+        .options = opts,
+        .parser = parse_arg,
+    };
     int ret = 0;
     struct arguments args = {
         .foo = NULL,
     };
 
-    ret = parse_args(argc, argv, &args);
+    ret = argp_parse(&argp, argc, argv, 0, NULL, &args);
     if (ret != 0) {
         return ret;
     }

This effectively replaces the code we had for parsing how we wanted with a call to argp_parse with appropriate configuration.

The static const struct argp argp is in main just to keep its definition local to its only user.

Strictly the static const struct argp_option opts[] could also be moved here, but it's easier to compare how options are specified if it's changed in its current location rather than moved.

Changing the options vector

@@ -8,25 +10,17 @@
 };
 
 enum opt {
-    OPT_END = -1,
     OPT_FOO = 'f',
     OPT_NOFOO = 0x100,
-    OPT_UNEXPECTED = '?',
 };
-static const struct option longopts[] = {
-    {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
-    {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
-    {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+static const struct argp_option opts[] = {
+    {.name = "foo", .key = OPT_FOO, .arg = "value"},
+    {.name = "also-foo", .key = OPT_FOO, .flags = OPTION_ALIAS},
+    {.name = "no-foo", .key = OPT_NOFOO},
     {},
 };
-const char optstring[] = "f:";

Because argp_parse handles termination and unexpected options internally we don't need OPT_END or OPT_UNEXPECTED any more.

struct argp_option has similar behaviour to struct option, but it does not have a .flag parameter, so the .val equivalent is called .key and is used to determine which value to pass through to parse_arg.

Rather than having .has_arg defining whether it takes values, the .arg field defines whether it expects a value, and labels it in the help output.

If an option's value is optional, then add OPTION_ARG_OPTIONAL to .flags.

Because argp_parse treats any key which is printable as a short option, we don't need the separate option string.

Changes to parse_arg

-int parse_arg(int opt, char *arg, struct parse_state *state){
+error_t parse_arg(int opt, char *arg, struct argp_state *state){
     struct arguments *args = state->input;   
     switch (opt) {
     case OPT_FOO:
@@ -35,39 +29,26 @@
     case OPT_NOFOO:
         args->foo = NULL;
         return 0;
-    case OPT_END:
-        args->positionals = &state->argv[optind];
+    case ARGP_KEY_ARGS:
+    case ARGP_KEY_NO_ARGS:
+        args->positionals = &state->argv[state->next];
         return 0;
     default:
-        return 1;
+        return ARGP_ERR_UNKNOWN;
     }
 }

This is mostly the same.

The function signature has changed slightly since we pass argp's state instead, and rather than using optind, we use state->next.

argp parser functions can handle arguments individually with ARGP_KEY_ARG or them all together as ARGP_KEY_ARGS, and can handle being given no arguments with ARGP_KEY_NO_ARGS.

Since we want to treat all subsequent arguments as the positionals, we wouldn't do this by handling ARGP_KEY_ARG, since then we'd need to pick the arguments individually.

We need to handle ARGP_KEY_NO_ARGS since we haven't initialised args->positionals to anything, and to be a valid argument vector we need to point to something even if it is just a pointer to a NULL (signifying an empty vector).

Since &state->argv[state->next] points to the end of the array if there were no positional parameters, or to the next parameter if there was one, the code is actually the same.

argp parser functions may be chained together, so a parser function that doesn't recognise a particular option should return ARGP_ERR_UNKNOWN so that argp_parse can either try a different parser function or it can report it being unhandled as an error.

The full diff

--- test3.c    2016-05-30 13:47:01.431515079 +0100
+++ test4.c    2016-05-30 14:14:25.315137258 +0100
@@ -1,6 +1,8 @@
-/* test3.c */
+/* test4.c */
 #include <stdio.h> /* fprintf */
-#include <getopt.h> /* getopt_long, struct option */
+#include <argp.h> /* argp_parse, error_t, struct argp, struct argp_option,
+                     struct argp_state, OPTION_ALIAS,
+                     ARGP_KEY_ARGS, ARGP_KEY_NO_ARGS, ARGP_ERR_UNKNOWN */
 
 struct arguments {
     char *foo;
@@ -8,25 +10,17 @@
 };
 
 enum opt {
-    OPT_END = -1,
     OPT_FOO = 'f',
     OPT_NOFOO = 0x100,
-    OPT_UNEXPECTED = '?',
 };
-static const struct option longopts[] = {
-    {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
-    {.name = "also-foo", .has_arg = required_argument, .val = OPT_FOO},
-    {.name = "no-foo", .has_arg = no_argument, .val = OPT_NOFOO},
+static const struct argp_option opts[] = {
+    {.name = "foo", .key = OPT_FOO, .arg = "value"},
+    {.name = "also-foo", .key = OPT_FOO, .flags = OPTION_ALIAS},
+    {.name = "no-foo", .key = OPT_NOFOO},
     {},
 };
-const char optstring[] = "f:";
 
-struct parse_state {
-    char **argv;
-    struct arguments *input;
-};
-
-int parse_arg(int opt, char *arg, struct parse_state *state){
+error_t parse_arg(int opt, char *arg, struct argp_state *state){
     struct arguments *args = state->input;   
     switch (opt) {
     case OPT_FOO:
@@ -35,39 +29,26 @@
     case OPT_NOFOO:
         args->foo = NULL;
         return 0;
-    case OPT_END:
-        args->positionals = &state->argv[optind];
+    case ARGP_KEY_ARGS:
+    case ARGP_KEY_NO_ARGS:
+        args->positionals = &state->argv[state->next];
         return 0;
     default:
-        return 1;
+        return ARGP_ERR_UNKNOWN;
     }
 }
-
-int parse_args(int argc, char *argv[], struct arguments *args){
-    struct parse_state state = {
-        .argv = argv,
-        .input = args,
-    };
-    for (;;) {
-        int opt = getopt_long(argc, argv, optstring, longopts, NULL);
-        int ret = parse_arg(opt, optarg, &state);
-        if (ret != 0) {
-            return ret;
-        }
-
-        if (opt == -1) {
-            return 0;
-        }
-    }
-}
 
 int main(int argc, char *argv[]){
+    static const struct argp argp = {
+        .options = opts,
+        .parser = parse_arg,
+    };
     int ret = 0;
     struct arguments args = {
         .foo = NULL,
     };
 
-    ret = parse_args(argc, argv, &args);
+    ret = argp_parse(&argp, argc, argv, 0, NULL, &args);
     if (ret != 0) {
         return ret;
     }

Now we can see the fruits of our labour:

$ make test4
cc     test4.c   -o test4
$ ./test4 --help
Usage: test4 [OPTION...]

  -f, --foo=value, --also-foo=value
      --no-foo
  -?, --help                 Give this help list
      --usage                Give a short usage message

Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.