We previously discussed common command-line formats.

If your program only operates on string values then this should be sufficient, but programs often need to operate with other data types.

Parsing numbers

Firstly, you might want a value which is a number rather than a string.

Historically the function for this was atoi(3) (ASCII to integer), which has the disadvantage of not having a way to distinguish between the string was "0", or the string wasn't a number.

A more useful, but less simple option is strtol(3), which lets you determine where the number ended by the endptr argument.

If *endptr == nptr then there was no number, and if *endptr != '\0' then there was extra characters after the number.

/* test.c */
#include <stdbool.h> /* bool */
#include <stdlib.h> /* strtol */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */

int parse_options(int argc, char *argv[], long *foo_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {},
    };

    int ret = 0;
    long foo;
    bool parsed_foo = false;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:", options, NULL);
        switch(ret) {
        case OPT_FOO:
            {
                char *endptr;
                long foo;
                if (parsed_foo) {
                    fprintf(stderr, "%s: Only one --foo is permitted\n", argv[0]);
                    return 1;
                }
                foo = strtol(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --foo requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *foo_out = foo;
                parsed_foo = true;
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            return 1;
        }
    }
parsing_end:
    if (!parsed_foo) {
        fprintf(stderr, "%s: --foo is required\n", argv[0]);
        ret = 1;
    } 
    return ret;
}

int main(int argc, char *argv[]){
    long foo;
    int ret = parse_options(argc, argv, &foo);
    if (ret == 0)
        fprintf(stdout, "Foo is %ld\n", foo);
    return ret;
}
$ make test
cc    test.c   -o test
$ ./test 
./test: --foo is required
$ ./test --foo
./test: option '--foo' requires an argument
$ ./test --foo=asdf
./test: --foo requires a number, got asdf
$ ./test --foo=12
Foo is 12

Similarly there's strtoul(3), strtoll(3) and strtoull(3) for unsigned long, long long and unsigned long long integer types.

/* test.c */
#include <stdbool.h> /* bool */
#include <stdlib.h> /* strtol, strtoul, strtoll, strtoull */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */

int parse_options(int argc, char *argv[], long *foo_out,
                  unsigned long *bar_out, long long *baz_out,
                  unsigned long long *qux_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_BAR = 'b',
        OPT_BAZ = 'B',
        OPT_QUX = 'q',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {.name = "bar", .has_arg = required_argument, .val = OPT_BAR},
        {.name = "baz", .has_arg = required_argument, .val = OPT_BAZ},
        {.name = "qux", .has_arg = required_argument, .val = OPT_QUX},
        {},
    };

    int ret = 0;
    long foo;
    bool parsed_foo = false;
    unsigned long bar;
    bool parsed_bar = false;
    long long baz;
    bool parsed_baz = false;
    unsigned long long qux;
    bool parsed_qux = false;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:b:B:q:", options, NULL);
        switch(ret) {
        case OPT_FOO:
            {
                char *endptr;
                long foo;
                if (parsed_foo) {
                    fprintf(stderr, "%s: Only one --foo is permitted\n", argv[0]);
                    return 1;
                }
                foo = strtol(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --foo requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *foo_out = foo;
                parsed_foo = true;
            }
            break;
        case OPT_BAR:
            {
                char *endptr;
                unsigned long bar;
                if (parsed_bar) {
                    fprintf(stderr, "%s: Only one --bar is permitted\n", argv[0]);
                    return 1;
                }
                bar = strtoul(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --bar requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *bar_out = bar;
                parsed_bar = true;
            }
            break;
        case OPT_BAZ:
            {
                char *endptr;
                long long baz;
                if (parsed_baz) {
                    fprintf(stderr, "%s: Only one --baz is permitted\n", argv[0]);
                    return 1;
                }
                baz = strtoll(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --baz requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *baz_out = baz;
                parsed_baz = true;
            }
            break;
        case OPT_QUX:
            {
                char *endptr;
                unsigned long long qux;
                if (parsed_qux) {
                    fprintf(stderr, "%s: Only one --qux is permitted\n", argv[0]);
                    return 1;
                }
                qux = strtoull(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --qux requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *qux_out = qux;
                parsed_qux = true;
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            return 1;
        }
    }
parsing_end:
    if (!parsed_foo) {
        fprintf(stderr, "%s: --foo is required\n", argv[0]);
        ret = 1;
    } 
    if (!parsed_bar) {
        fprintf(stderr, "%s: --bar is required\n", argv[0]);
        ret = 1;
    } 
    if (!parsed_baz) {
        fprintf(stderr, "%s: --baz is required\n", argv[0]);
        ret = 1;
    } 
    if (!parsed_qux) {
        fprintf(stderr, "%s: --qux is required\n", argv[0]);
        ret = 1;
    } 
    return ret;
}

int main(int argc, char *argv[]){
    long foo;
    unsigned long bar;
    long long baz;
    unsigned long long qux;
    int ret = parse_options(argc, argv, &foo, &bar, &baz, &qux);
    if (ret == 0)
        fprintf(stdout,
                "Foo is %ld\nBar is %lu\nBaz is %Ld\nQux is %Lu\n",
                foo, bar, baz, qux);
    return ret;
}
$ make test
cc    test.c   -o test
$ ./test 
./test: --foo is required
./test: --bar is required
./test: --baz is required
./test: --qux is required
$ ./test --foo=12 --bar=23 --baz=34 --qux=45
Foo is 12
Bar is 23
Baz is 34
Qux is 45

Finally, there's strtof(3), strtod(3) and strtold(3) for parsing float, double and long double.

/* test.c */
#include <stdbool.h> /* bool */
#include <stdlib.h> /* strtof, strtod, strtold */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */

int parse_options(int argc, char *argv[], float *foo_out,
                  double *bar_out, long double *baz_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_BAR = 'b',
        OPT_BAZ = 'B',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {.name = "bar", .has_arg = required_argument, .val = OPT_BAR},
        {.name = "baz", .has_arg = required_argument, .val = OPT_BAZ},
        {},
    };

    int ret = 0;
    float foo;
    bool parsed_foo = false;
    double bar;
    bool parsed_bar = false;
    long double baz;
    bool parsed_baz = false;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:b:B:", options, NULL);
        switch(ret) {
        case OPT_FOO:
            {
                char *endptr;
                float foo;
                if (parsed_foo) {
                    fprintf(stderr, "%s: Only one --foo is permitted\n", argv[0]);
                    return 1;
                }
                foo = strtof(optarg, &endptr);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --foo requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *foo_out = foo;
                parsed_foo = true;
            }
            break;
        case OPT_BAR:
            {
                char *endptr;
                double bar;
                if (parsed_bar) {
                    fprintf(stderr, "%s: Only one --bar is permitted\n", argv[0]);
                    return 1;
                }
                bar = strtod(optarg, &endptr);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --bar requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *bar_out = bar;
                parsed_bar = true;
            }
            break;
        case OPT_BAZ:
            {
                char *endptr;
                long double baz;
                if (parsed_baz) {
                    fprintf(stderr, "%s: Only one --baz is permitted\n", argv[0]);
                    return 1;
                }
                baz = strtold(optarg, &endptr);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --baz requires a number, got %s\n",
                            argv[0], optarg);
                    return 1;
                }
                *baz_out = baz;
                parsed_baz = true;
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            return 1;
        }
    }
parsing_end:
    if (!parsed_foo) {
        fprintf(stderr, "%s: --foo is required\n", argv[0]);
        ret = 1;
    } 
    if (!parsed_bar) {
        fprintf(stderr, "%s: --bar is required\n", argv[0]);
        ret = 1;
    } 
    if (!parsed_baz) {
        fprintf(stderr, "%s: --baz is required\n", argv[0]);
        ret = 1;
    } 
    return ret;
}

int main(int argc, char *argv[]){
    float foo;
    double bar;
    long double baz;
    int ret = parse_options(argc, argv, &foo, &bar, &baz);
    if (ret == 0)
        fprintf(stdout,
                "Foo is %f\nBar is %lf\nBaz is %Lf\n",
                foo, bar, baz);
    return ret;
}
$ make test
cc    test.c   -o test
$ ./test 
./test: --foo is required
./test: --bar is required
./test: --baz is required
$ ./test --foo=1.2 --bar=2.3 --baz=3.4
Foo is 1.200000
Bar is 2.300000
Baz is 3.400000

Parsing arrays

We previously parsed the positional parameters array by making use of GNU getopt(3)'s permuting behaviour letting us use a slice of the argv array.

This is handy for a lot of programs, but some programs need to handle more than one array, or this array might not be of strings.

Parsing arrays of multiple options

/* test.c */
#include <stdlib.h> /* size_t, strtol */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */

int extend_foo_array(long **foos, size_t *foos_count, long foo) {
    size_t newsize = (*foos_count + 1) * sizeof(foo);
    long *newfoos = realloc(*foos, newsize);
    
    if (newfoos == NULL)
        return 1;
    
    newfoos[*foos_count] = foo;
    
    (*foos_count)++;
    *foos = newfoos;
    return 0;
}

int parse_options(int argc, char *argv[], long **foos_out, size_t *foos_count_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {},
    };

    int ret = 0;
    long *foos = NULL;
    size_t foos_count = 0;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:", options, NULL);
        switch(opt) {
        case OPT_FOO:
            {
                char *endptr;
                long foo = strtol(optarg, &endptr, 0);
                if (endptr == optarg || *endptr != '\0') {
                    fprintf(stderr, "%s: --foo requires a number, got %s\n",
                            argv[0], optarg);
                    ret = 1;
                    goto cleanup;
                }
                if (extend_foo_array(&foos, &foos_count, foo) != 0) {
                    fprintf(stderr, "%s: Unable to extend foo array\n", argv[0]);
                    ret = 2;
                    goto cleanup;
                }
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            ret = 1;
            goto cleanup;
        }
    }
parsing_end:
    if (foos == NULL || foos_count == 0) {
        fprintf(stderr, "%s: At least one --foo required\n", argv[0]);
        ret = 1;
    } else {
        *foos_out = foos;
        *foos_count_out = foos_count;
        foos = NULL;
        foos_count = 0;
    }
cleanup:
    free(foos);
    return ret;
}

int main(int argc, char *argv[]) {
    long *foos = NULL;
    size_t foos_count = 0;
    int ret = parse_options(argc, argv, &foos, &foos_count);
    if (ret == 0) {
        fprintf(stdout, "Foos:\n");
        for (int i = 0; i < foos_count; i++) {
            fprintf(stdout, "%d:\t%ld\n", i, foos[i]);
        }
    }
cleanup:
    free(foos);
    return ret;
}
$ ./test
./test: At least one --foo required
$ ./test -f
./test: option requires an argument -- 'f'
$ ./test -f1 -f2
Foos:
0:  1
1:  2

Parsing arrays of token separated values

The multiple option form of arrays is convenient when your values may be arbitrary strings, though it is more typing and it is a bit unnatural to create an array this way when there is a convenient token separator.

Comma and colon are the traditional favourites for this, and this is sufficiently common that there are functions in glibc to help.

strtok(3) is the traditional function for this, though it relies on global state, so is unfavourable.

strsep(3) was the BSD approach to fix this, which has a reasonably nice API, but strtok_r(3) is the standardised non-global-state version.

/* test.c */
#include <stdlib.h> /* size_t, strtol */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */
#include <string.h> /* strtok_r */

int extend_foo_array(long **foos, size_t *foos_count, long foo) {
    size_t newsize = (*foos_count + 1) * sizeof(foo);
    long *newfoos = realloc(*foos, newsize);
    
    if (newfoos == NULL)
        return 1;
    
    newfoos[*foos_count] = foo;
    
    (*foos_count)++;
    *foos = newfoos;
    return 0;
}

int parse_options(int argc, char *argv[], long **foos_out, size_t *foos_count_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {},
    };

    int ret = 0;
    long *foos = NULL;
    size_t foos_count = 0;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:", options, NULL);
        switch(opt) {
        case OPT_FOO:
            {
                char *str = optarg;
                char *token;
                if (foos != NULL || foos_count != 0) {
                    fprintf(stderr, "%s: Only one --foo is permitted\n", argv[0]);
                    ret = 1;
                    goto cleanup;
                }
                while ((token = strtok_r(str, ":", &str)) != NULL) {
                    char *endptr;
                    long foo = strtol(token, &endptr, 0);
                    if (endptr == token || *endptr != '\0') {
                        fprintf(stderr, "%s: --foo requires a : separated array"
                                        " of numbers, got %s\n",
                                argv[0], token);
                        ret = 1;
                        goto cleanup;
                    }
                    if (extend_foo_array(&foos, &foos_count, foo) != 0) {
                        fprintf(stderr, "%s: Unable to extend foo array\n",
                                argv[0]);
                        ret = 2;
                        goto cleanup;
                    }
                }
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            ret = 1;
            goto cleanup;
        }
    }
parsing_end:
    if (foos == NULL || foos_count == 0) {
        fprintf(stderr, "%s: At least one --foo required\n", argv[0]);
        ret = 1;
    } else {
        *foos_out = foos;
        *foos_count_out = foos_count;
        foos = NULL;
        foos_count = 0;
    }
cleanup:
    free(foos);
    return ret;
}

int main(int argc, char *argv[]) {
    long *foos = NULL;
    size_t foos_count = 0;
    int ret = parse_options(argc, argv, &foos, &foos_count);
    if (ret == 0) {
        fprintf(stdout, "Foos:\n");
        for (int i = 0; i < foos_count; i++) {
            fprintf(stdout, "%d:\t%ld\n", i, foos[i]);
        }
    }
cleanup:
    free(foos);
    return ret;
}
$ ./test 
./test: At least one --foo required
$ ./test -f1,2,3
./test: --foo requires a : separated array of numbers, got 1,2,3
$ ./test -f1:2:3
Foos:
0:  1
1:  2
2:  3
$ ./test -f1:2:3 -f123
./test: Only one --foo is permitted

Parsing suboptions

Some values may be compound, such as complex numbers that have a real and imaginary part, or any non-trivial C struct.

It is possible to parse the values as an array and fill in the data structure from the indices, but this gets complicated when the values may be of different types or optional.

So it would be convenient to be able to supply these with key-value pairs of field name and value.

If your keys don't have = in them and you don't have ,s in your keys or values, then getsubopt(3), an apparently unholy union between getopt(3) and strtok_r(3) could be just what you're looking for!

/* test.c */
#include <stdbool.h> /* bool */
#include <stdlib.h> /* size_t, strtol */
#include <getopt.h> /* getopt_long, struct option */
#include <stdio.h> /* fprintf */
#include <string.h> /* getsubopt */

struct foo {
    long bar;
    char *baz;
};

int extend_foo_array(struct foo **foos, size_t *foos_count, struct foo *foo) {
    size_t newsize = (*foos_count + 1) * sizeof(*foo);
    struct foo *newfoos = realloc(*foos, newsize);
    
    if (newfoos == NULL)
        return 1;
    
    newfoos[*foos_count] = *foo;
    
    (*foos_count)++;
    *foos = newfoos;
    return 0;
}

int parse_foo(char *progname, char *optarg, struct foo *foo_out) {
    enum foo_opt {
        FOO_BAR,
        FOO_BAZ,
    };
    static char *const foo_tokens[] = {
        [FOO_BAR] = "bar",
        [FOO_BAZ] = "baz",
        NULL,
    };

    struct foo foo;
    bool parsed_foo_bar = false;
    bool parsed_foo_baz = false;
    
    while (*optarg != '\0') {
        char *value;
        enum foo_opt foo_opt;
        int ret = getsubopt(&optarg, foo_tokens, &value);
        if (ret == -1) {
            return 1;
        }
        foo_opt = ret;
        switch (foo_opt) {
        case FOO_BAR:
            {
                char *endptr;
                if (parsed_foo_bar) {
                    fprintf(stderr, "%s: Only one --foo=bar=VALUE "
                            "is permitted\n", progname);
                    return 1;
                }
                long bar = strtol(value, &endptr, 0);
                if (endptr == value || *endptr != '\0') {
                    fprintf(stderr, "%s: --foo=bar=VALUE requires a number, "
                            "got %s\n", progname, value);
                    return 1;
                }
                foo.bar = bar;
                parsed_foo_bar = true;
            }
            break;
        case FOO_BAZ:
            {
                char *endptr;
                if (parsed_foo_baz) {
                    fprintf(stderr, "%s: Only one --foo=baz=VALUE "
                            "is permitted\n", progname);
                    return 1;
                }
                foo.baz = value;
                parsed_foo_baz = true;
            }
            break;
        }
    }

    if (!parsed_foo_bar)
        fprintf(stderr, "%s: Missing bar=VALUE in --foo\n", progname);
    if (!parsed_foo_baz)
        fprintf(stderr, "%s: Missing baz=VALUE in --foo\n", progname);
    if (parsed_foo_bar && parsed_foo_baz) {
        *foo_out = foo;
        return 0;
    }
    return 1;
}

int parse_options(int argc, char *argv[], struct foo **foos_out, size_t *foos_count_out) {
    enum opt {
        OPT_FOO = 'f',
        OPT_UNKNOWN = '?',
        OPT_NOVALUE = ':',
        OPT_END = -1,
    };
    static const struct option options[] = {
        {.name = "foo", .has_arg = required_argument, .val = OPT_FOO},
        {},
    };

    int ret = 0;
    struct foo *foos = NULL;
    size_t foos_count = 0;

    for (;;) {
        enum opt opt = getopt_long(argc, argv, "f:", options, NULL);
        switch(opt) {
        case OPT_FOO:
            {
                char *str = optarg;
                char *token;
                while ((token = strtok_r(str, ":", &str)) != NULL) {
                    char *endptr;
                    struct foo foo;
                    if (parse_foo(argv[0], token, &foo)) {
                        fprintf(stderr, "%s: --foo requires a , separated array"
                                        " of key-value pairs, got %s\n",
                                argv[0], token);
                        ret = 1;
                        goto cleanup;
                    }
                    if (extend_foo_array(&foos, &foos_count, &foo) != 0) {
                        fprintf(stderr, "%s: Unable to extend foo array\n",
                                argv[0]);
                        ret = 2;
                        goto cleanup;
                    }
                }
            }
            break;
        case OPT_END:
            goto parsing_end;
        case OPT_NOVALUE:
        case OPT_UNKNOWN:
            ret = 1;
            goto cleanup;
        }
    }
parsing_end:
    if (foos == NULL || foos_count == 0) {
        fprintf(stderr, "%s: At least one --foo required\n", argv[0]);
        ret = 1;
    } else {
        *foos_out = foos;
        *foos_count_out = foos_count;
        foos = NULL;
        foos_count = 0;
    }
cleanup:
    free(foos);
    return ret;
}

int main(int argc, char *argv[]) {
    struct foo *foos = NULL;
    size_t foos_count = 0;
    int ret = parse_options(argc, argv, &foos, &foos_count);
    if (ret == 0) {
        fprintf(stdout, "Foos:\n");
        for (int i = 0; i < foos_count; i++) {
            fprintf(stdout, "%d:\tbar=%ld, baz=%s\n", i, foos[i].bar, foos[i].baz);
        }
    }
cleanup:
    free(foos);
    return ret;
}
$ ./test
./test: At least one --foo required
$ ./test --foo=bar=1
./test: Missing baz=VALUE in --foo
./test: --foo requires a , separated array of key-value pairs, got bar=1
$ ./test --foo=baz=qux
./test: Missing bar=VALUE in --foo
./test: --foo requires a , separated array of key-value pairs, got baz=qux
$ ./test --foo=bar=1.2,baz=qux
./test: --foo=bar=VALUE requires a number, got 1.2
./test: --foo requires a , separated array of key-value pairs, got bar=1.2
$ ./test --foo=bar=1,baz=qux --foo=bar=2,baz=quux
Foos:
0:  bar=1, baz=qux
1:  bar=2, baz=quux

Conclusion

As you can see, there are plenty of functions built-into the C library designed to make it easier to perform the kind of string parsing that is necessary to parse command-line arguments.

Unfortunately if you resort to token-separated values, such as strtok_r(3) and getsubopt(3), you then can't handle strings that contain those characters,

Your homework this week is to take a look at extract_first_word so you can understand the context of why it exists.