changeset 948:55e587acefa9

add grep
author Strake
date Fri, 12 Jul 2013 18:10:52 -0500
parents 9310246414a2
children 59d4d453296b
files lib/lib.c lib/lib.h toys/pending/grep.c
diffstat 3 files changed, 188 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/lib/lib.c	Thu Jul 11 22:38:29 2013 -0500
+++ b/lib/lib.c	Fri Jul 12 18:10:52 2013 -0500
@@ -1291,3 +1291,25 @@
          return rvalue; //Not reachable; to avoid waring message.
    }
 }
+
+/*
+ * strcat to mallocated buffer
+ * reallocate if need be
+ */
+char *astrcat (char *x, char *y) {
+  char *z;
+  z = x;
+  x = realloc (x, (x ? strlen (x) : 0) + strlen (y) + 1);
+  if (!x) return 0;
+  (z ? strcat : strcpy) (x, y);
+  return x;
+}
+
+/*
+ * astrcat, but die on failure
+ */
+char *xastrcat (char *x, char *y) {
+  x = astrcat (x, y);
+  if (!x) error_exit ("xastrcat");
+  return x;
+}
--- a/lib/lib.h	Thu Jul 11 22:38:29 2013 -0500
+++ b/lib/lib.h	Fri Jul 12 18:10:52 2013 -0500
@@ -192,3 +192,7 @@
 
 // cut helper functions
 unsigned long get_int_value(const char *numstr, unsigned lowrange, unsigned highrange);
+
+// grep helper functions
+char  *astrcat (char *, char *);
+char *xastrcat (char *, char *);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toys/pending/grep.c	Fri Jul 12 18:10:52 2013 -0500
@@ -0,0 +1,162 @@
+/* grep.c - print lines what match given regular expression
+ *
+ * Copyright 2013 CE Strake <strake888 at gmail.com>
+ *
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/
+ * See http://refspecs.linuxfoundation.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/cmdbehav.html
+
+USE_GREP(NEWTOY(grep, "EFhinovclqe*f*m#", TOYFLAG_BIN))
+
+config GREP
+  bool "grep"
+  default n
+  help
+    usage: grep [-clq] [-EFhinov] (-e RE | -f REfile | RE) [file...]
+
+    modes:
+      default: print lines from each file what match regular expression RE.
+      -c:   print the number of matching lines in each file.
+      -l:   print all matching file names.
+      -q:   print nil; quit with code 0 when match found.
+
+    flags:
+      -E:   extended RE syntax
+      -F:   fixed RE syntax, i.e. all characters literal
+      -h:   not print file name
+      -i:   case insensitive
+      -n:   print line numbers
+      -o:   print only matching part
+      -v:   invert match
+*/
+
+#define FOR_grep
+#include "toys.h"
+#include <regex.h>
+#include <err.h>
+
+/* could be in GLOBALS but so need initialization code */
+static int c = 1;
+
+static regex_t re; /* fails in GLOBALS */
+
+GLOBALS(
+  long mArgu;
+  struct arg_list *fArgu, *eArgu;
+  char mode;
+)
+
+static void do_grep (int fd, char *name) {
+  int n = 0, nMatch = 0;
+
+  for (;;) {
+    char *x, *y;
+    regmatch_t match;
+    int atBOL = 1;
+
+    x = get_rawline (fd, 0, '\n');
+    if (!x) break;
+    y = x;
+    n++; /* start at 1 */
+
+    while (regexec (&re, y, 1, &match, atBOL ? 0 : REG_NOTBOL) == 0) {
+      if (atBOL) nMatch++;
+      c = 0; atBOL = 0;
+      switch (TT.mode) {
+      case 'q':
+        exit (0);
+      case 'l':
+        if (!(toys.optflags & FLAG_h)) printf ("%s\n", name);
+        free (x);
+        return;
+      case 'c':
+        break;
+      default:
+        if (!(toys.optflags & FLAG_h)) printf ("%s:", name);
+        if ( (toys.optflags & FLAG_n)) printf ("%d:", n);
+        if (!(toys.optflags & FLAG_o)) fputs (x, stdout);
+        else {
+          y += match.rm_so;
+          printf ("%.*s\n", match.rm_eo - match.rm_so, y++);
+        }
+      }
+      if (!(toys.optflags & FLAG_o)) break;
+    }
+
+    free (x);
+
+    if ((toys.optflags & FLAG_m) && nMatch >= TT.mArgu) break;
+  }
+
+  if (TT.mode == 'c') printf ("%s:%d\n", name, nMatch);
+}
+
+char *regfix (char *re_xs) {
+  char *re_ys;
+  int ii, jj = 0;
+  re_ys = xmalloc (2*strlen (re_xs) + 1);
+  for (ii = 0; re_xs[ii]; ii++) {
+    if (strchr ("^.[]$()|*+?{}\\", re_xs[ii])) re_ys[jj++] = '\\';
+    re_ys[jj++] = re_xs[ii];
+  }
+  re_ys[jj] = 0;
+  return re_ys;
+}
+
+void buildRE (void) {
+  char *re_xs;
+
+  re_xs = 0;
+  for (; TT.eArgu; TT.eArgu = TT.eArgu -> next) {
+    if (toys.optflags & FLAG_F) TT.eArgu -> arg = regfix (TT.eArgu -> arg);
+    if (re_xs) re_xs = xastrcat (re_xs, "|");
+    re_xs = xastrcat (re_xs, TT.eArgu -> arg);
+  }
+  for (; TT.fArgu; TT.fArgu = TT.fArgu -> next) {
+    FILE *f;
+    char *x, *y;
+    size_t l;
+
+    f = xfopen (TT.fArgu -> arg, "r");
+    x = 0;
+    for (;;) {
+      if (getline (&x, &l, f) < 0) {
+        if (feof (f)) break;
+        err (2, "failed to read");
+      }
+      y = x + strlen (x) - 1;
+      if (y[0] == '\n') y[0] = 0;
+
+      y = toys.optflags & FLAG_F ? regfix (x) : x;
+      if (re_xs) re_xs = xastrcat (re_xs, "|");
+      re_xs = xastrcat (re_xs, y);
+      free (y);
+    }
+    free (x);
+    fclose (f);
+  }
+
+  if (!re_xs) {
+    if (toys.optc < 1) errx (2, "no RE");
+    re_xs = toys.optflags & FLAG_F ? regfix (toys.optargs[0]) : toys.optargs[0];
+    toys.optc--; toys.optargs++;
+  }
+
+  if (regcomp (&re, re_xs,
+               (toys.optflags & (FLAG_E | FLAG_F) ? REG_EXTENDED : 0) |
+               (toys.optflags &  FLAG_i           ? REG_ICASE    : 0)) != 0) {
+    errx (2, "bad RE");
+  }
+}
+
+void grep_main (void) {
+  buildRE ();
+
+  if (toys.optflags & FLAG_c) TT.mode = 'c';
+  if (toys.optflags & FLAG_l) TT.mode = 'l';
+  if (toys.optflags & FLAG_q) TT.mode = 'q';
+
+  if (toys.optc > 0) loopfiles (toys.optargs, do_grep);
+  else do_grep (0, "-");
+
+  exit (c);
+}