changeset 1390:c18ad59ae442 draft

Write a new find. Not quite done, but the basics work.
author Rob Landley <rob@landley.net>
date Wed, 16 Jul 2014 20:43:58 -0500
parents e59d4322331b
children 6a06541c090c
files toys/pending/find.c toys/posix/find.c
diffstat 2 files changed, 294 insertions(+), 363 deletions(-) [+]
line wrap: on
line diff
--- a/toys/pending/find.c	Mon Jul 14 05:44:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,363 +0,0 @@
-/* vi: set sw=4 ts=4:
- *
- * find.c - find files matching a criteria
- *
- * Copyright 2012 Tim Bird <tbird20d@gmail.com>
- *
- * See http://opengroup.org/onlinepubs/9699919799/utilities/find.html
-
-USE_FIND(NEWTOY(find, "?", TOYFLAG_USR|TOYFLAG_BIN))
-
-config FIND
-  bool "find"
-  default n
-  help
-    usage: find [DIR] [<options>]
-
-    -name <pattern>    match pattern
-    -type [bcdflps]    match file type
-    !, -a, -o          not, and , or
-    (, )               group expressions  
-    -mtime [-+]n       match file modification time (to within 24 hours)
-    \t\t               +=greater (older) than, -=less (younger) than
-
-    File types:
-    b  block device  d  directory     l  symlink  s  socket
-    c  char device   f  regular file  p  FIFO
-*/
-
-/* To Do:
- * -exec action
- */ 
-
-#define FOR_find
-#include "toys.h"
-
-struct filter_node {
-  struct filter_node *next;
-  int op;
-  union {
-    char *name_regex;
-    struct {
-      int sign;
-      time_t time;
-    } t;
-    mode_t type;
-    struct {
-      int arg_path_index;
-      char **exec_args;
-    } e;
-  } data;
-};
-
-GLOBALS(
-  char *dir;
-  struct filter_node *filter_root;
-  time_t itime;
-)
-
-/* filter operation types */
-#define OP_UNKNOWN	0
-#define OP_OR		2
-#define OP_AND		3
-#define OP_NOT		4
-
-#define LPAREN		1 
-#define RPAREN		5 
-
-#define MAX_OP		5
-
-#define CHECK_NAME	7
-#define CHECK_MTIME	8
-#define CHECK_TYPE	9
-
-#define ACTION_PRINT	20
-#define ACTION_PRINT0	21
-#define ACTION_EXEC	22
-
-#define IS_ACTION(x)    (x >= 20)
-
-/* executes the command for a filter node
-   returns 0 for failure or 1 for success
- */
-static int do_exec(struct filter_node *filter, struct dirtree *node)
-{
-  char *path = NULL;
-  char **arg_array;
-  pid_t pid;
-  int status;
-
-  arg_array = filter->data.e.exec_args;
-  if (filter->data.e.arg_path_index) {
-    int plen;
-    path = dirtree_path(node, &plen);
-    arg_array[filter->data.e.arg_path_index] = path;
-  }
-
-  if (!(pid = xfork())) xexec(arg_array);
-  free(path);
-  waitpid(pid, &status, 0);
-
-  return !WEXITSTATUS(status);
-}
-
-/* prefix evaluator */
-/* returns 0 for failure or 1 for success */
-static int evaluate(struct filter_node *filter, struct dirtree *node,
-  struct filter_node **fnext)
-{
-  int result;
-  int op;
-  static int skip = 0;
-
-  /* if no filters, success */
-  if (!filter) {
-    *fnext = NULL;
-    return 1;
-  }
-  op = filter->op;
-
-  if (op==OP_NOT) return !evaluate(filter->next, node, fnext);
-
-  if (op==OP_OR || op==OP_AND) {
-    result = evaluate(filter->next, node, fnext);
-    if(!skip) {
-      if (op==OP_OR) {
-        skip = result;
-        result = evaluate(*fnext, node, fnext) || result;
-      } else {
-        skip = !result;
-        result = evaluate(*fnext, node, fnext) && result;
-      }
-      skip = 0;
-    }
-    else result = evaluate(*fnext, node, fnext);
-    return result;
-  }
-
-  // we're down to single operations, mark our position
-  *fnext = filter->next;
-  if(skip) return 0;
-
-  // TODO: Do a regex comparison
-  if (op==CHECK_NAME)
-    return !strcmp(filter->data.name_regex, node->name);
-
-  if (op==CHECK_MTIME) {
-    long delta = (long)(((double)TT.itime - node->st.st_mtime) / 86400)
-                 -filter->data.t.time;
-    return filter->data.t.sign == (delta > 0) - (delta < 0);
-  }
-  if (op==CHECK_TYPE) return (node->st.st_mode & S_IFMT) == filter->data.type;
-
-
-  if (op==ACTION_PRINT || op==ACTION_PRINT0) {
-    char *path;
-    int plen = 0;
-    char terminator = (op==ACTION_PRINT)*'\n';
-
-    path = dirtree_path(node, &plen);
-    printf("%s%c", path, terminator);
-    free(path);
-    return 1;
-  }
-
-  if (op==ACTION_EXEC) return !do_exec(filter, node);
-
-  error_msg("Ran out of operations in filter list!");
-  return 1;
-}
-
-static int check_node_callback(struct dirtree *node)
-{
-  struct filter_node *junk;
-
-  /* only recurse on "." at the root */
-  /* so, don't recurse on 1) non-root "." and 2) any ".." */
-
-  if (node->name[0] == '.' && 
-    ((!node->name[1] && node->parent) ||
-    (node->name[1]=='.' && !node->name[2])))
-    return 0;
-
-  evaluate(TT.filter_root, node, &junk);
-  return DIRTREE_RECURSE;
-}
-
-
-static void build_filter_list(void)
-{
-  struct filter_node *node_list, *op_stack, *node, *op_node, *next;
-  char **arg;
-  int j;
-  int prevop = 0;
-  int have_action = 0;
-
-  /* part optargs here and build a filter list in prefix format */
-
-  TT.dir = ".";
-  node_list = NULL;
-  for (arg = toys.optargs; *arg; arg++) {
-    struct {
-      char *arg;
-      int op;
-      int extrarg;
-    } arg_map[] = {{"-o", OP_OR, 0},
-                   {"-a", OP_AND, 0},
-                   {"!", OP_NOT, 0},
-                   {"(", LPAREN, 0},
-                   {")", RPAREN, 0},
-                   {"-name", CHECK_NAME, 1},
-                   {"-mtime", CHECK_MTIME, 1},
-                   {"-type", CHECK_TYPE, 1},
-                   {"-print", ACTION_PRINT, 0},
-                   {"-print0", ACTION_PRINT0, 0},
-                   {"-exec", ACTION_EXEC, 1}
-    };
-    mode_t types[]={S_IFREG,S_IFDIR,S_IFCHR,S_IFBLK,S_IFLNK,S_IFSOCK,S_IFIFO};
-    char **arg_array;
-    node = (struct filter_node *) xmalloc(sizeof(struct filter_node));
-    node->op = OP_UNKNOWN;
-    for (j=0; j < sizeof(arg_map)/sizeof(*arg_map); j++) {
-      if (!strcmp(*arg, arg_map[j].arg)) {
-        node->op = arg_map[j].op;
-        if (arg_map[j].extrarg && !*(++arg))
-	   error_exit("Missing argument to %s", arg_map[j].arg);
-	break;
-      }
-    }
-
-    switch(node->op) {
-      case CHECK_NAME:
-        node->data.name_regex = *arg;
-        break;
-      case CHECK_MTIME:
-        switch(**arg) {
-          case '+':
-            node->data.t.sign=+1;
-            (*arg)++;
-            break;
-          case '-':
-            node->data.t.sign=-1;
-            (*arg)++;
-            break;
-          default:
-            node->data.t.sign=0;
-            break;
-        }
-        node->data.t.time = atoi(*arg);
-        break;
-      case CHECK_TYPE:
-        if (-1 == (j = stridx("fdcblsp", **arg)))
-          error_exit("bad type '%s'", *arg);
-        else node->data.type = types[j];
-        break;
-      case ACTION_EXEC:
-        arg_array = xmalloc(sizeof(char *));
-        for (j = 0; *arg && strcmp(*arg, ";"); j++) {
-          /* new method */
-          arg_array = xrealloc(arg_array, sizeof(char *) * (j+2));
-          arg_array[j] = *arg;
-          if (!strcmp(*arg, "{}")) node->data.e.arg_path_index = j;
-
-          arg++;
-        }
-        if (!*arg) error_exit("need ';' in exec");
-        arg_array[j] = 0;
-        node->data.e.exec_args = arg_array;
-        break;
-    }
-
-    have_action |= IS_ACTION(node->op);
-
-    if (node->op == OP_UNKNOWN) {
-      if (**arg == '-') error_exit("bad option '%s'", *arg);
-      else TT.dir = *arg;
-    }  else {
-      // add OP_AND where necessary
-      if (node_list) {
-        int o1 = node_list->op, o2 = node->op;
-        if ((o1>MAX_OP && o2>MAX_OP) ||
-           (o1==RPAREN && o2>=OP_NOT && o2!=RPAREN) ||
-           (o1>=OP_NOT && o1!=LPAREN && o2==LPAREN)) {
-          struct filter_node *n = (struct filter_node *)
-                                  xmalloc(sizeof(struct filter_node));
-          n->op = OP_AND;
-          n->next = node_list;
-          node_list = n;
-        }
-      }
-      /* push node */
-      node->next = node_list;;
-      node_list = node;
-    }
-
-  }
-
-  /* now convert from infix to prefix */
-  if(have_action) TT.filter_root = NULL;
-  else TT.filter_root = &(struct filter_node){0, ACTION_PRINT};
-  op_stack = NULL;
-  node = node_list;
-  while( node ) {
-    int op = node->op;
-    next = node->next;
-    if (op==LPAREN || op==RPAREN) {
-      free(node);
-      node = 0;
-    }
-    if (op<=MAX_OP) {
-      if (prevop > op) {
-        /* pop opstack to output */
-        op_node = op_stack;
-        while (op_node) {
-          /* remove from op_stack */
-          op_stack = op_node->next;
-          /* push to output */
-          op_node->next = TT.filter_root;
-          TT.filter_root = op_node;
-          /* get next node */
-          op_node = op_stack;
-        }
-      }
-      if (node) {
-        /* push to opstack */
-        node->next = op_stack;
-        op_stack = node;
-      }
-      prevop = op*(op!=RPAREN);
-    }
-    else {
-        /* push to output */
-        node->next = TT.filter_root;
-        TT.filter_root = node;
-    }
-    node = next;
-  }
-  /* end of input - push opstack to output */
-  /* pop opstack to output till empty */
-  op_node = op_stack;
-  while (op_node) {
-    op_stack = op_node->next;
-    op_node->next = TT.filter_root;
-    TT.filter_root = op_node;
-    op_node = op_stack;
-  }
-  if(!have_action) {
-    node = TT.filter_root;
-    TT.filter_root = (struct filter_node*) xmalloc(sizeof(struct filter_node));
-    TT.filter_root->next = node;
-    TT.filter_root->op = OP_AND;
-  }
-}
-
-void find_main(void)
-{
-  TT.itime = time(NULL);
-  /* parse filters, if present */
-  build_filter_list();
-
-  /* FIXTHIS - parse actions, if present */
-
-  dirtree_read(TT.dir, check_node_callback);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toys/posix/find.c	Wed Jul 16 20:43:58 2014 -0500
@@ -0,0 +1,294 @@
+/* find.c - Search directories for matching files.
+ *
+ * Copyright 2014 Rob Landley <rob@landley.net>
+ *
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
+ * Our "unspecified" behavior for no paths is to use "."
+ * Parentheses can only stack 4096 deep
+
+USE_FIND(NEWTOY(find, "^HL", TOYFLAG_USR|TOYFLAG_BIN))
+
+config FIND
+  bool "find"
+  default n
+  help
+    usage: find [-HL] [DIR...] [<options>]
+
+    Search directories for matching files.
+    (Default is to search "." match all and display matches.)
+
+    -H  Follow command line symlinks
+    -L  Follow all symlinks
+
+    Match filters:
+    -name <pattern>    filename (with wildcards)
+    -path <pattern>    path name (with wildcards)
+    -nouser            belongs to unknown user
+    -nogroup           belongs to unknown group
+    -xdev              do not cross into new filesystems
+    -prune             do not descend into children
+    -perm  MODE        permissons (prefixed with - means at least these)
+    -iname PATTERN     case insensitive filename
+    -links N           hardlink count
+    -user  UNAME       belongs to user
+    -group GROUP       belongs to group
+    -size  N[c]
+    -atime N
+    -ctime N
+    -type  [bcdflps]   type (block, char, dir, file, symlink, pipe, socket)
+    -mtime N           last modified N (24 hour) days ago
+
+    Numbers N may be prefixed by a - (less than) or + (greater than)
+
+    Combine matches with:
+    !, -a, -o, ( )    not, and, or, group expressions
+
+
+    Actions:
+    -exec
+    -print
+    -print0
+*/
+
+// find . ! \( -name blah -print \)
+// find . -o
+
+#define FOR_find
+#include "toys.h"
+
+GLOBALS(
+  char **filter;
+  struct double_list *argdata;
+  int xdev, depth;
+  time_t now;
+)
+
+// Return numeric value with explicit sign
+static int compare_numsign(long val, long units, char *str)
+{
+  char sign = 0;
+  long myval;
+
+  if (*str == '+' || *str == '-') sign = *(str++);
+  else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
+  myval = atolx(str);
+  if (units && isdigit(str[strlen(str)-1])) myval *= units;
+
+  if (sign == '+') return val > myval;
+  if (sign == '-') return val < myval;
+  return val == myval;
+}
+
+static void do_print(struct dirtree *new, char c)
+{
+  char *s=dirtree_path(new, 0);
+
+  xprintf("%s%c", s, c);
+  free(s);
+}
+
+void todo_store_argument(void)
+{
+  error_exit("NOP");
+}
+
+// pending issues:
+// old false -a ! new false does not yield true.
+// 
+// -user -group -newer evaluate once and save result (where?)
+// add -print if no action (-exec, -ok, -print)
+// find . -print -xdev (should xdev before print)
+
+// Call this with 0 for first pass argument parsing, syntax checking.
+static int do_find(struct dirtree *new)
+{
+  int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
+  char *s, **ss;
+
+  recurse = DIRTREE_RECURSE|((toys.optflags&FLAG_L) ? DIRTREE_SYMFOLLOW : 0);
+
+  // skip . and .. below topdir, handle -xdev and -depth
+  if (active) {
+    if (new->parent) {
+      if (!dirtree_notdotdot(new)) return 0;
+      if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) return 0;
+    }
+    if (TT.depth && S_ISDIR(new->st.st_dev) && new->data != -1)
+      return DIRTREE_COMEAGAIN;
+  }
+
+  // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
+  // test: result of most recent test
+  // active: if 0 don't perform tests
+  // not: a pending ! applies to this test (only set if performing tests)
+  // print: saw one of print/ok/exec, no need for default -print
+
+  if (TT.filter) for (ss = TT.filter; *ss; ss++) {
+    int check = active && test;
+
+    s = *ss;
+
+    // handle ! ( ) using toybuf as a stack
+    if (*s != '-') {
+      if (s[1]) goto error;
+
+      if (*s == '!') {
+        // Don't invert if we're not making a decision
+        if (check) not = !not;
+
+      // Save old "not" and "active" on toybuf stack.
+      // Deactivate this parenthetical if !test
+      // Note: test value should never change while !active
+      } else if (*s == '(') {
+        if (pcount == sizeof(toybuf)) goto error;
+        toybuf[pcount++] = not+(active<<1);
+        if (!check) active = 0;
+
+      // Pop status, apply deferred not to test
+      } else if (*s == ')') {
+        if (--pcount < 0) goto error;
+        // Pop active state, apply deferred not (which was only set if checking)
+        active = (toybuf[pcount]>>1)&1;
+        if (active && (toybuf[pcount]&1)) test = !test;
+        not = 0;
+      } else goto error;
+
+      continue;
+    } else s++;
+
+    if (!strcmp(s, "xdev")) TT.xdev = 1;
+    else if (!strcmp(s, "depth")) TT.depth = 1;
+    else if (!strcmp(s, "o") || !strcmp(s, "or")) {
+      if (not) goto error;
+      if (active) {
+        if (!test) test = 1;
+        else active = 0;     // decision has been made until next ")"
+      }
+
+    // Mostly ignore NOP argument
+    } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
+      if (not) goto error;
+
+    } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
+      print++;
+      if (check) do_print(new, s[6] ? 0 : '\n');
+
+    } else if (!strcmp(s, "nouser")) {
+      if (check) if (getpwuid(new->st.st_uid)) test = 0;
+    } else if (!strcmp(s, "nogroup")) {
+      if (check) if (getgrgid(new->st.st_gid)) test = 0;
+    } else if (!strcmp(s, "prune")) {
+      if (check && S_ISDIR(new->st.st_dev) && !TT.depth) recurse = 0;
+
+    // Remaining filters take an argument
+    } else {
+
+      if (!strcmp(s, "name") || !strcmp(s, "iname")) {
+        if (check) {
+          if (*s == 'i') todo_store_argument();
+//            if (!new) {
+//            } else {
+//              name = xstrdup(name);
+//              while (
+          test = !fnmatch(ss[1], new->name, 0);
+        }
+      } else if (!strcmp(s, "path")) {
+        if (check) {
+          char *path = dirtree_path(new, 0);
+          int len = strlen(ss[1]);
+
+          if (strncmp(path, ss[1], len) || (ss[1][len] && ss[1][len] != '/'))
+            test = 0;
+          free(s);
+        }
+      } else if (!strcmp(s, "perm")) {
+        if (check) {
+          char *m = ss[1];
+          mode_t m1 = string_to_mode(m+(*m == '-'), 0),
+                 m2 = new->st.st_dev & 07777;
+
+          if (*m != '-') m2 &= m1;
+          test = m1 == m2;
+        }
+      } else if (!strcmp(s, "type")) {
+        if (check) {
+          char c = stridx("bcdlpfs", *ss[1]);
+          int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
+                         S_IFREG, S_IFSOCK};
+
+          if ((new->st.st_dev & S_IFMT) != types[c]) test = 0;
+        }
+
+      } else if (!strcmp(s, "atime")) {
+        if (check)
+          test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
+      } else if (!strcmp(s, "ctime")) {
+        if (check)
+          test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
+      } else if (!strcmp(s, "mtime")) {
+        if (check)
+          test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
+      } else if (!strcmp(s, "size")) {
+        if (check)
+          test = compare_numsign(new->st.st_size, 512, ss[1]);
+      } else if (!strcmp(s, "links")) {
+        if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
+      } else if (!strcmp(s, "user")) {
+        todo_store_argument();
+      } else if (!strcmp(s, "group")) {
+        todo_store_argument();
+      } else if (!strcmp(s, "newer")) {
+        todo_store_argument();
+      } else if (!strcmp(s, "exec") || !strcmp("ok", s)) {
+        print++;
+        if (check) error_exit("implement exec/ok");
+      } else goto error;
+
+      // This test can go at the end because we do a syntax checking
+      // pass first. Putting it here gets the error message (-unknown
+      // vs -known noarg) right.
+      if (!*++ss) error_exit("'%s' needs 1 arg", --s);
+    }
+
+    // Apply pending "!" to result
+    if (active && not) test = !test;
+    not = 0;
+  }
+
+  // If there was no action, print
+  if (!print && test && new) do_print(new, '\n');
+
+  return recurse;
+
+error:
+  error_exit("bad arg '%s'", *ss);
+}
+
+void find_main(void)
+{
+  int i, len;
+  char **ss = toys.optargs;
+
+  // Distinguish paths from filters
+  for (len = 0; toys.optargs[len]; len++)
+    if (strchr("-!(", *toys.optargs[len])) break;
+  TT.filter = toys.optargs+len;
+
+  // use "." if no paths
+  if (!*ss) {
+    ss = (char *[]){"."};
+    len = 1;
+  }
+
+  // first pass argument parsing, verify args match up, handle "evaluate once"
+  TT.now = time(0);
+  do_find(0);
+
+  // Loop through paths
+  for (i = 0; i < len; i++) {
+    struct dirtree *new;
+
+    new = dirtree_add_node(0, ss[i], toys.optflags&(FLAG_H|FLAG_L));
+    if (new) dirtree_handle_callback(new, do_find);
+  }
+}