From 1930098552665575539f12744677bcc9acebb570 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Thu, 29 Sep 2022 00:04:43 -0500 Subject: [PATCH] Fix grep fast path logic to handle initial \ and '^$' pattern. --- tests/grep.test | 5 +++++ toys/posix/grep.c | 16 ++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/grep.test b/tests/grep.test index d150987b..67cfecaa 100755 --- a/tests/grep.test +++ b/tests/grep.test @@ -204,3 +204,8 @@ seq 1 100002 | base64 > testfile testing "speed" "timeout 5 grep -f testfile testfile 2>/dev/null | wc -l" \ "10332\n" "" "" rm -f testfile + +# Fast path tests + +testcmd 'initial \' '\\.jar' 'bell.jar\n' '' 'bell.jar\n' +testcmd '^$' '^\$' '\n' '' 'one\n\ntwo\n' diff --git a/toys/posix/grep.c b/toys/posix/grep.c index c7e0064d..efaabe82 100644 --- a/toys/posix/grep.c +++ b/toys/posix/grep.c @@ -177,7 +177,7 @@ static void do_grep(int fd, char *name) if (ss!=start) continue; pp++; } - for (ii = 1; pp[ii] && ss[ii]; ii++) { + for (ii = 0; pp[ii] && ss[ii]; ii++) { if (!FLAG(F)) { if (pp[ii]=='.') continue; if (pp[ii]=='\\' && pp[ii+1]) pp++; @@ -418,13 +418,16 @@ static void parse_regex(void) // Convert to regex where appropriate for (last = &TT.e; *last;) { - if ('.'!=*(s = (*last)->arg) && !FLAG(F)) for (; *s; s++) { + // Can we use the fast path? + s = (*last)->arg; + if ('.'!=*s && !FLAG(F) && strcmp(s, "^$")) for (; *s; s++) { if (*s=='\\') { if (!s[1] || !strchr(special, *++s)) break; if (!FLAG(E) && *s=='(') break; } else if (*s>127 || strchr(special+4, *s)) break; } + // Add entry to fast path (literal-ish match) or slow path (regexec) if (!*s || FLAG(F)) last = &((*last)->next); else { struct reg *shoe; @@ -439,18 +442,19 @@ static void parse_regex(void) } dlist_terminate(TT.reg); - // Sort fixed patterns into buckets by first character + // Sort fast path patterns into buckets by first character for (al = TT.e; al; al = new) { new = al->next; key = '^'==*al->arg; - if ('$'==al->arg[key] && !al->arg[key+1]) key = 0; - else key = al->arg[key]; + if ('\\'==al->arg[key]) key++; + else if ('$'==al->arg[key] && !al->arg[key+1]) key++; + key = al->arg[key]; if (FLAG(i)) key = toupper(key); al->next = TT.fixed[key]; TT.fixed[key] = al; } - // Sort each fixed pattern set by length so first hit is longest match + // Sort each fast path pattern set by length so first hit is longest match if (TT.e) for (key = 0; key<256; key++) { if (!TT.fixed[key]) continue; for (len = 0, al = TT.fixed[key]; al; al = al->next) len++; -- 2.39.2