From 2611693169c017a8c867c4c971a3246e4c985b99 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Wed, 21 Sep 2022 22:57:30 -0500 Subject: [PATCH] Teach grep to autodetect fixed patterns and run fixed and regex tests together. --- tests/grep.test | 4 ++ toys/posix/grep.c | 108 ++++++++++++++++++++++++---------------------- 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/tests/grep.test b/tests/grep.test index dbab4cc0..cbaf7696 100755 --- a/tests/grep.test +++ b/tests/grep.test @@ -25,6 +25,10 @@ testing "-e -e" "grep -e one -e two -e three input" \ "two\ntwo\nthree\none\n" "two\ntwo\nthree\nand\none\n" "" testing "-F" "grep -F is input" "this is test\nthis is test2\n" \ "this is test\nthis is test2\ntest case" "" +testing "-Fo ''" "grep -Fo ''" "" "" "hello\n" +testing "-Fw ''" "grep -Fw ''" "" "" "hello\n" +testing "-Fw '' 2" "grep -Fw ''" "\n" "" "\n" +testing "-F is really fixed" "grep -F '.[x]'" "c.[x]d\n" "" "axb\nc.[x]d\n" echo -e "this is test\nthis is test2\ntest case" > foo echo -e "hello this is test" > foo2 diff --git a/toys/posix/grep.c b/toys/posix/grep.c index 0474f7db..c75e85b1 100644 --- a/toys/posix/grep.c +++ b/toys/posix/grep.c @@ -167,55 +167,53 @@ static void do_grep(int fd, char *name) // Loop to handle multiple matches in same line do { regmatch_t *mm = (void *)toybuf; + struct arg_list *seek, fseek; + int baseline = mm->rm_eo; + char *s = 0; + + mm->rm_so = mm->rm_eo = INT_MAX; + rc = 1; + + // Handle "fixed" (literal) matches (if any) + for (seek = TT.e; seek; seek = seek->next) { + if (FLAG(x)) { + if (!(FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line; + } else if (!*seek->arg) { + if (FLAG(o)) continue; + // No need to set fseek.next because this will match every line. + seek = &fseek; + fseek.arg = s = line; + mm->rm_so = mm->rm_eo = rc = 0; + break; + } else if (FLAG(i)) s = strcasestr(start, seek->arg); + else s = memmem(start, ulen-(start-line), seek->arg, strlen(seek->arg)); + + // TODO: literal matches don't have "best match" logic, just first hit. + if (s) { + if (!rc && ((s-start) > mm->rm_so || ((s-start)==mm->rm_so && (mm->rm_so+strlen(seek->arg) < mm->rm_eo)))) continue; + rc = 0; + mm->rm_so = mm->rm_eo = (s-start); + mm->rm_eo += strlen(seek->arg); + } + } - // Handle "fixed" (literal) matches - if (FLAG(F)) { - struct arg_list *seek, fseek; - char *s = 0; - - for (seek = TT.e; seek; seek = seek->next) { - if (FLAG(x)) { - if (!(FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line; - } else if (!*seek->arg) { - // No need to set fseek.next because this will match every line. - seek = &fseek; - fseek.arg = s = line; - } else if (FLAG(i)) s = strcasestr(start, seek->arg); - else s = strstr(start, seek->arg); - - if (s) break; + // Handle regex matches (if any) + for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) { + // Do we need to re-check this regex? + if (!shoe->rc) { + shoe->m.rm_so -= baseline; + shoe->m.rm_eo -= baseline; + if (!matched || shoe->m.rm_so<0) + shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1, + &shoe->m, start==line ? 0 : REG_NOTBOL); } - if (s) { + // If we got a match, is it a _better_ match? + if (!shoe->rc && (shoe->m.rm_so < mm->rm_so || + (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo))) + { + mm = &shoe->m; rc = 0; - mm->rm_so = (s-start); - mm->rm_eo = (s-start)+strlen(seek->arg); - } else rc = 1; - - // Handle regex matches - } else { - int baseline = mm->rm_eo; - - mm->rm_so = mm->rm_eo = INT_MAX; - rc = 1; - for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) { - - // Do we need to re-check this regex? - if (!shoe->rc) { - shoe->m.rm_so -= baseline; - shoe->m.rm_eo -= baseline; - if (!matched || shoe->m.rm_so<0) - shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1, - &shoe->m, start==line ? 0 : REG_NOTBOL); - } - - // If we got a match, is it a _better_ match? - if (!shoe->rc && (shoe->m.rm_so < mm->rm_so || - (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo))) - { - mm = &shoe->m; - rc = 0; - } } } @@ -370,7 +368,7 @@ static void do_grep(int fd, char *name) static void parse_regex(void) { - struct arg_list *al, *new, *list = NULL; + struct arg_list *al, *new, *list = NULL, **last; char *s, *ss; // Add all -f lines to -e list. (Yes, this is leaking allocation context for @@ -392,6 +390,7 @@ static void parse_regex(void) } if (!s) continue; + // NOTE: even with -z, -f is still \n delimited. Blank line = match all // Split lines at \n, add individual lines to new list. do { ss = FLAG(z) ? 0 : strchr(s, '\n'); @@ -405,18 +404,23 @@ static void parse_regex(void) } TT.e = list; - if (!FLAG(F)) { - // Convert regex list - for (al = TT.e; al; al = al->next) { + // Convert to regex where appropriate + for (last = &TT.e; *last;) { + for (s = (*last)->arg; *s; s++) + if (*s>127 || strchr("^.[$()|*+?{\\", *s)) break; + if (!*s || FLAG(F)) last = &((*last)->next); + else { struct reg *shoe; - if (FLAG(o) && !*al->arg) continue; dlist_add_nomalloc(&TT.reg, (void *)(shoe = xmalloc(sizeof(struct reg)))); - xregcomp(&shoe->r, al->arg, + xregcomp(&shoe->r, (*last)->arg, (REG_EXTENDED*!!FLAG(E))|(REG_ICASE*!!FLAG(i))); + al = *last; + *last = (*last)->next; + free(al); } - dlist_terminate(TT.reg); } + dlist_terminate(TT.reg); } static int do_grep_r(struct dirtree *new) -- 2.39.2