view lib/args.c @ 1604:f057223498e4 draft

Remove more strncpy() calls. The semantics of strncat() and strncpy() are non-obvious, so let's not use 'em. Both zero all their remaining buffer space, and with strncat() the size is the space left at the _end_ of the string (not the size of the buffer) so it's way too easy to stomp memory you don't own. As long as we have to measure stuff ourselves to get it right, just use memcpy().
author Rob Landley <>
date Sat, 13 Dec 2014 11:56:41 -0600
parents 1d79c0c23f69
children 2c86e2cc1fd7
line wrap: on
line source

/* args.c - Command line argument parsing.
 * Copyright 2006 Rob Landley <>

#include "toys.h"

// Design goals:
//   Don't use getopt() out of libc.
//   Don't permute original arguments (screwing up ps/top output).
//   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"

/* This uses a getopt-like option string, but not getopt() itself. We call
 * it the get_opt string.
 * Each option in the get_opt string corresponds to a bit position in the
 * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
 * and so on. If the option isn't seen in argv[], its bit remains 0.
 * Options which have an argument fill in the corresponding slot in the global
 * union "this" (see generated/globals.h), which it treats as an array of longs
 * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
 * You don't have to free the option strings, which point into the environment
 * space. List objects should be freed by main() when command_main() returns.
 * Example:
 *   Calling get_optflags() when toys.which->options="ab:c:d" and
 *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
 *     Changes to struct toys:
 *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
 *       toys.optargs[0] = "walrus" (leftover argument)
 *       toys.optargs[1] = NULL (end of list)
 *       toys.optc = 1 (there was 1 leftover argument)
 *     Changes to union this:
 *       this[0]=NULL (because -c didn't get an argument this time)
 *       this[1]="fruit" (argument to -b)

// Enabling TOYBOX_DEBUG in .config adds syntax checks to option string parsing
// which aren't needed in the final code (your option string is hardwired and
// should be correct when you ship), but are useful for development.

// What you can put in a get_opt string:
//   Any otherwise unused character (all letters, unprefixed numbers) specify
//   an option that sets a flag. The bit value is the same as the binary digit
//   if you string the option characters together in order.
//   So in "abcdefgh" a = 128, h = 1
//   Suffixes specify that this option takes an argument (stored in GLOBALS):
//       Note that pointer and long are always the same size, even on 64 bit.
//     : plus a string argument, keep most recent if more than one
//     * plus a string argument, appended to a list
//     # plus a signed long argument
//       <LOW     - die if less than LOW
//       >HIGH    - die if greater than HIGH
//       =DEFAULT - value if not specified
//     - plus a signed long argument defaulting to negative (say + for positive)
//     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
//       Chop this option out with USE_TOYBOX_FLOAT() in option string
//       Same <LOW>HIGH=DEFAULT as #
//     @ plus an occurrence counter (which is a long)
//     (longopt)
//     | this is required. If more than one marked, only one required.
//     ; long option's argument is optional (can only be supplied with --opt=)
//     ^ Stop parsing after encountering this argument
//    " " (space char) the "plus an argument" must be separate
//        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
//   At the beginning of the get_opt string (before any options):
//     ^ stop at first nonoption argument
//     <0 die if less than # leftover arguments (default 0)
//     >9 die if > # leftover arguments (default MAX_INT)
//     ? Allow unknown arguments (pass them through to command).
//     & first argument has imaginary dash (ala tar/ps)
//       If given twice, all arguments have imaginary dash
//   At the end: [groups] of previously seen options
//     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
//     + Synonyms (switch on all)          [+abc] means -ab=-abc, -c=-abc
//     ! More than one in group is error   [!abc] means -ab calls error_exit()
//       primarily useful if you can switch things back off again.

// Notes from getopt man page
//   - and -- cannot be arguments.
//     -- force end of arguments
//     - is a synonym for stdin in file arguments
//   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)

// Linked list of all known options (option string parsed into this).
// Hangs off getoptflagstate, freed at end of option parsing.
struct opts {
  struct opts *next;
  long *arg;         // Pointer into union "this" to store arguments at.
  int c;             // Argument character to match
  int flags;         // |=1, ^=2
  unsigned dex[3];   // which bits to disable/enable/exclude in toys.optflags
  char type;         // Type of arguments to store union "this"
  union {
    long l;
    FLOAT f;
  } val[3];          // low, high, default - range of allowed values

// linked list of long options. (Hangs off getoptflagstate, free at end of
// option parsing, details about flag to set and global slot to fill out
// stored in related short option struct, but if opt->c = -1 the long option
// is "bare" (has no corresponding short option).
struct longopts {
  struct longopts *next;
  struct opts *opt;
  char *str;
  int len;

// State during argument parsing.
struct getoptflagstate
  int argc, minargs, maxargs, nodash;
  char *arg;
  struct opts *opts;
  struct longopts *longopts;
  int noerror, nodash_now, stopearly;
  unsigned excludes, requires;

// Use getoptflagstate to parse parse one command line option from argv
static int gotflag(struct getoptflagstate *gof, struct opts *opt)
  int type;

  // Did we recognize this option?
  if (!opt) {
    if (gof->noerror) return 1;
    error_exit("Unknown option %s", gof->arg);

  // Might enabling this switch off something else?
  if (toys.optflags & opt->dex[0]) {
    struct opts *clr;
    unsigned i = 1;

    // Forget saved argument for flag we switch back off
    for (clr=gof->opts, i=1; clr; clr = clr->next, i<<=1)
      if (clr->arg && (i & toys.optflags & opt->dex[0])) *clr->arg = 0;
    toys.optflags &= ~opt->dex[0];

  // Set flags
  toys.optflags |= opt->dex[1];
  gof->excludes |= opt->dex[2];
  if (opt->flags&2) gof->stopearly=2;

  if (toys.optflags & gof->excludes) {
    struct opts *bad;
    unsigned i = 1;

    for (bad=gof->opts, i=1; ;bad = bad->next, i<<=1) {
      if (opt == bad || !(i & toys.optflags)) continue;
      if (toys.optflags & bad->dex[2]) break;
    error_exit("No '%c' with '%c'", opt->c, bad->c);

  // Does this option take an argument?
  if (!gof->arg) {
    if (opt->flags & 8) return 0;
    gof->arg = "";
  } else gof->arg++;
  type = opt->type;

  if (type == '@') ++*(opt->arg);
  else if (type) {
    char *arg = gof->arg;

    // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
    // to make "tar xCjfv blah1 blah2 thingy" work like
    // "tar -x -C blah1 -j -f blah2 -v thingy"

    if (gof->nodash_now || (!arg[0] && !(opt->flags & 8)))
      arg = toys.argv[++gof->argc];
    if (!arg) {
      char *s = "Missing argument to ";
      struct longopts *lo;

      if (opt->c != -1) error_exit("%s-%c", s, opt->c);

      for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
      error_exit("%s--%.*s", s, lo->len, lo->str);

    if (type == ':') *(opt->arg) = (long)arg;
    else if (type == '*') {
      struct arg_list **list;

      list = (struct arg_list **)opt->arg;
      while (*list) list=&((*list)->next);
      *list = xzalloc(sizeof(struct arg_list));
      (*list)->arg = arg;
    } else if (type == '#' || type == '-') {
      long l = atolx(arg);
      if (type == '-' && !ispunct(*arg)) l*=-1;
      if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
      if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);

      *(opt->arg) = l;
    } else if (CFG_TOYBOX_FLOAT && type == '.') {
      FLOAT *f = (FLOAT *)(opt->arg);

      *f = strtod(arg, &arg);
      if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
        error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
      if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
        error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);

    if (!gof->nodash_now) gof->arg = "";

  return 0;

// Parse this command's options string into struct getoptflagstate, which
// includes a struct opts linked list in reverse order (I.E. right-to-left)
void parse_optflaglist(struct getoptflagstate *gof)
  char *options = toys.which->options;
  long *nextarg = (long *)&this;
  struct opts *new = 0;
  int idx;

  // Parse option format string
  memset(gof, 0, sizeof(struct getoptflagstate));
  gof->maxargs = INT_MAX;
  if (!options) return;

  // Parse leading special behavior indicators
  for (;;) {
    if (*options == '^') gof->stopearly++;
    else if (*options == '<') gof->minargs=*(++options)-'0';
    else if (*options == '>') gof->maxargs=*(++options)-'0';
    else if (*options == '?') gof->noerror++;
    else if (*options == '&') gof->nodash++;
    else break;

  // Parse option string into a linked list of options with attributes.

  if (!*options) gof->stopearly++;
  while (*options) {
    char *temp;

    // Option groups come after all options are defined
    if (*options == '[') break;

    // Allocate a new list entry when necessary
    if (!new) {
      new = xzalloc(sizeof(struct opts));
      new->next = gof->opts;
      gof->opts = new;
      new->val[0].l = LONG_MIN;
      new->val[1].l = LONG_MAX;
    // Each option must start with "(" or an option character.  (Bare
    // longopts only come at the start of the string.)
    if (*options == '(' && new->c != -1) {
      char *end;
      struct longopts *lo;

      // Find the end of the longopt
      for (end = ++options; *end && *end != ')'; end++);
      if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");

      // init a new struct longopts
      lo = xmalloc(sizeof(struct longopts));
      lo->next = gof->longopts;
      lo->opt = new;
      lo->str = options;
      lo->len = end-options;
      gof->longopts = lo;
      options = ++end;

      // Mark this struct opt as used, even when no short opt.
      if (!new->c) new->c = -1;


    // If this is the start of a new option that wasn't a longopt,

    } else if (strchr(":*#@.-", *options)) {
      if (CFG_TOYBOX_DEBUG && new->type)
        error_exit("multiple types %c:%c%c", new->c, new->type, *options);
      new->type = *options;
    } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
    // bounds checking
    else if (-1 != (idx = stridx("<>=", *options))) {
      if (new->type == '#') {
        long l = strtol(++options, &temp, 10);
        if (temp != options) new->val[idx].l = l;
      } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
        FLOAT f = strtod(++options, &temp);
        if (temp != options) new->val[idx].f = f;
      } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
      options = --temp;

    // At this point, we've hit the end of the previous option.  The
    // current character is the start of a new option.  If we've already
    // assigned an option to this struct, loop to allocate a new one.
    // (It'll get back here afterwards and fall through to next else.)
    } else if (new->c) {
      new = 0;

    // Claim this option, loop to see what's after it.
    } else new->c = *options;


  // Initialize enable/disable/exclude masks and pointers to store arguments.
  // (This goes right to left so we need the whole list before we can start.)
  idx = 0;
  for (new = gof->opts; new; new = new->next) {
    unsigned u = 1<<idx++;

    new->dex[1] = u;
    if (new->flags & 1) gof->requires |= u;
    if (new->type) {
      new->arg = (void *)nextarg;
      *(nextarg++) = new->val[2].l;

  // Parse trailing group indicators
  while (*options) {
    unsigned bits = 0;

    if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);

    idx = stridx("-+!", *++options);
    if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
    if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
      error_exit("empty []");

    // Don't advance past ] but do process it once in loop.
    while (*options++ != ']') {
      struct opts *opt;
      int i;

      if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
      // Find this option flag (in previously parsed struct opt)
      for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
        if (*options == ']') {
          if (!opt) break;
          if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
        } else {
          if (CFG_TOYBOX_DEBUG && !opt)
            error_exit("[] unknown target %c", *options);
          if (opt->c == *options) {
            bits |= 1<<i;

// Fill out toys.optflags, toys.optargs, and this[] from toys.argv

void get_optflags(void)
  struct getoptflagstate gof;
  struct opts *catch;
  long saveflags;
  char *letters[]={"s",""};

  // Option parsing is a two stage process: parse the option string into
  // a struct opts list, then use that list to process argv[];

  // Allocate memory for optargs
  saveflags = 0;
  while (toys.argv[saveflags++]);
  toys.optargs = xzalloc(sizeof(char *)*saveflags);


  // Iterate through command line arguments, skipping argv[0]
  for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
    gof.arg = toys.argv[gof.argc];
    catch = NULL;

    // Parse this argument
    if (gof.stopearly>1) goto notflag;

    gof.nodash_now = 0;

    // Various things with dashes
    if (*gof.arg == '-') {

      // Handle -
      if (!gof.arg[1]) goto notflag;
      if (*gof.arg=='-') {
        struct longopts *lo;

        // Handle --
        if (!*gof.arg) {
          gof.stopearly += 2;

        // do we match a known --longopt?
        for (lo = gof.longopts; lo; lo = lo->next) {
          if (!strncmp(gof.arg, lo->str, lo->len)) {
            if (!gof.arg[lo->len]) gof.arg = 0;
            else if (gof.arg[lo->len] == '=' && lo->opt->type)
              gof.arg += lo->len;
            else continue;
            // It's a match.
            catch = lo->opt;

        // Should we handle this --longopt as a non-option argument?
        if (!lo && gof.noerror) {
          gof.arg -= 2;
          goto notflag;

        // Long option parsed, handle option.
        gotflag(&gof, catch);

    // Handle things that don't start with a dash.
    } else {
      if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
      else goto notflag;

    // At this point, we have the args part of -args.  Loop through
    // each entry (could be -abc meaning -a -b -c)
    saveflags = toys.optflags;
    while (*gof.arg) {

      // Identify next option char.
      for (catch = gof.opts; catch; catch = catch->next)
        if (*gof.arg == catch->c)
          if (!((catch->flags&4) && gof.arg[1])) break;

      // Handle option char (advancing past what was used)
      if (gotflag(&gof, catch) ) {
        toys.optflags = saveflags;
        gof.arg = toys.argv[gof.argc];
        goto notflag;

    // Not a flag, save value in toys.optargs[]
    if (gof.stopearly) gof.stopearly++;
    toys.optargs[toys.optc++] = toys.argv[gof.argc];

  // Sanity check
  if (toys.optc<gof.minargs)
    error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
      gof.minargs, letters[!(gof.minargs-1)]);
  if (toys.optc>gof.maxargs)
    error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
  if (gof.requires && !(gof.requires & toys.optflags)) {
    struct opts *req;
    char needs[32], *s = needs;

    for (req = gof.opts; req; req = req->next)
      if (req->flags & 1) *(s++) = req->c;
    *s = 0;

    error_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
  toys.exithelp = 0;

    llist_traverse(gof.opts, free);
    llist_traverse(gof.longopts, free);