Mercurial > hg > toybox
comparison toys/pending/grep.c @ 1017:059e1f30b80b
Finish grep rewrite and fleshing out test suite. Several of the grep tests fail with the ubuntu version, I _think_ these are upstream bugs? (Second opinions welcome...)
author | Rob Landley <rob@landley.net> |
---|---|
date | Mon, 19 Aug 2013 03:17:51 -0500 |
parents | 8b49ff103af9 |
children |
comparison
equal
deleted
inserted
replaced
1016:9ee321b6edb5 | 1017:059e1f30b80b |
---|---|
2 * | 2 * |
3 * Copyright 2013 CE Strake <strake888 at gmail.com> | 3 * Copyright 2013 CE Strake <strake888 at gmail.com> |
4 * | 4 * |
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html | 5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html |
6 | 6 |
7 USE_GREP(NEWTOY(grep, "EFHabhinosvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN)) | 7 USE_GREP(NEWTOY(grep, "EFHabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN)) |
8 USE_GREP(OLDTOY(egrep, grep, OPTSTR_grep, TOYFLAG_BIN)) | 8 USE_GREP(OLDTOY(egrep, grep, OPTSTR_grep, TOYFLAG_BIN)) |
9 USE_GREP(OLDTOY(fgrep, grep, OPTSTR_grep, TOYFLAG_BIN)) | 9 USE_GREP(OLDTOY(fgrep, grep, OPTSTR_grep, TOYFLAG_BIN)) |
10 | 10 |
11 config GREP | 11 config GREP |
12 bool "grep" | 12 bool "grep" |
21 -e Regex to match. (May be repeated.) | 21 -e Regex to match. (May be repeated.) |
22 -f File containing regular expressions to match. | 22 -f File containing regular expressions to match. |
23 | 23 |
24 match type: | 24 match type: |
25 -E extended regex syntax -F fixed (match literal string) | 25 -E extended regex syntax -F fixed (match literal string) |
26 -i case insensitive -v invert match | 26 -i case insensitive -m stop after this many lines matched |
27 -w whole word (implies -E) -m stop after this many lines matched | 27 -r recursive (on dir) -v invert match |
28 -x whole line | 28 -w whole word (implies -E) -x whole line |
29 | 29 |
30 display modes: (default: matched line) | 30 display modes: (default: matched line) |
31 -c count of matching lines -l show matching filenames | 31 -c count of matching lines -l show matching filenames |
32 -o only matching part -q quiet (errors only) | 32 -o only matching part -q quiet (errors only) |
33 -s silent (no error msg) | 33 -s silent (no error msg) |
44 GLOBALS( | 44 GLOBALS( |
45 long m; | 45 long m; |
46 struct arg_list *f; | 46 struct arg_list *f; |
47 struct arg_list *e; | 47 struct arg_list *e; |
48 | 48 |
49 char *regstr; | 49 struct arg_list *regex; |
50 ) | 50 ) |
51 | 51 |
52 static void do_grep(int fd, char *name) | 52 static void do_grep(int fd, char *name) |
53 { | 53 { |
54 FILE *file = fdopen(fd, "r"); | 54 FILE *file = fdopen(fd, "r"); |
78 for (;;) | 78 for (;;) |
79 { | 79 { |
80 int rc = 0, skip = 0; | 80 int rc = 0, skip = 0; |
81 | 81 |
82 if (toys.optflags & FLAG_F) { | 82 if (toys.optflags & FLAG_F) { |
83 struct arg_list *seek; | 83 struct arg_list *seek, fseek; |
84 char *s = 0; | 84 char *s = 0; |
85 | 85 |
86 for (seek = TT.e; seek; seek = seek->next) { | 86 for (seek = TT.e; seek; seek = seek->next) { |
87 | 87 if (!*seek->arg) { |
88 seek = &fseek; | |
89 fseek.arg = s = line; | |
90 break; | |
91 } | |
88 if (toys.optflags & FLAG_i) { | 92 if (toys.optflags & FLAG_i) { |
89 long ll = strlen(seek->arg);; | 93 long ll = strlen(seek->arg);; |
90 | 94 |
91 // Alas, posix hasn't got strcasestr() | 95 // Alas, posix hasn't got strcasestr() |
92 for (s = line; *s; s++) if (!strncasecmp(s, seek->arg, ll)) break; | 96 for (s = line; *s; s++) if (!strncasecmp(s, seek->arg, ll)) break; |
131 printf("%s\n", name); | 135 printf("%s\n", name); |
132 free(line); | 136 free(line); |
133 fclose(file); | 137 fclose(file); |
134 return; | 138 return; |
135 } | 139 } |
140 if (toys.optflags & FLAG_o) | |
141 if (matches[which].rm_eo == matches[which].rm_so) | |
142 break; | |
143 | |
136 if (!(toys.optflags & FLAG_c)) { | 144 if (!(toys.optflags & FLAG_c)) { |
137 if (!(toys.optflags & FLAG_h)) printf("%s:", name); | 145 if (toys.optflags & FLAG_H) printf("%s:", name); |
138 if (toys.optflags & FLAG_n) printf("%d:", lcount); | 146 if (toys.optflags & FLAG_n) printf("%d:", lcount); |
139 if (toys.optflags & FLAG_b) | 147 if (toys.optflags & FLAG_b) |
140 printf("%ld:", offset + (start-line) + | 148 printf("%ld:", offset + (start-line) + |
141 ((toys.optflags & FLAG_o) ? matches[which].rm_so : 0)); | 149 ((toys.optflags & FLAG_o) ? matches[which].rm_so : 0)); |
142 if (!(toys.optflags & FLAG_o)) xputs(line); | 150 if (!(toys.optflags & FLAG_o)) xputs(line); |
156 if (mmatch) mcount++; | 164 if (mmatch) mcount++; |
157 if ((toys.optflags & FLAG_m) && mcount >= TT.m) break; | 165 if ((toys.optflags & FLAG_m) && mcount >= TT.m) break; |
158 } | 166 } |
159 | 167 |
160 if (toys.optflags & FLAG_c) { | 168 if (toys.optflags & FLAG_c) { |
161 if (!(toys.optflags & FLAG_h)) printf("%s:", name); | 169 if (toys.optflags & FLAG_H) printf("%s:", name); |
162 xprintf("%d\n", mcount); | 170 xprintf("%d\n", mcount); |
163 } | 171 } |
164 | 172 |
165 // loopfiles will also close the fd, but this frees an (opaque) struct. | 173 // loopfiles will also close the fd, but this frees an (opaque) struct. |
166 fclose(file); | 174 fclose(file); |
177 al = TT.f ? TT.f : TT.e; | 185 al = TT.f ? TT.f : TT.e; |
178 while (al) { | 186 while (al) { |
179 if (TT.f) s = ss = xreadfile(al->arg); | 187 if (TT.f) s = ss = xreadfile(al->arg); |
180 else s = ss = al->arg; | 188 else s = ss = al->arg; |
181 | 189 |
182 while (ss && *s) { | 190 do { |
183 ss = strchr(s, '\n'); | 191 ss = strchr(s, '\n'); |
184 if (ss) *(ss++) = 0; | 192 if (ss) *(ss++) = 0; |
185 new = xmalloc(sizeof(struct arg_list)); | 193 new = xmalloc(sizeof(struct arg_list)); |
186 new->next = list; | 194 new->next = list; |
187 new->arg = s; | 195 new->arg = s; |
188 list = new; | 196 list = new; |
189 s = ss; | 197 s = ss; |
190 } | 198 } while (ss && *s); |
191 al = al->next; | 199 al = al->next; |
192 if (!al && TT.f) { | 200 if (!al && TT.f) { |
193 TT.f = 0; | 201 TT.f = 0; |
194 al = TT.e; | 202 al = TT.e; |
195 } | 203 } |
196 } | 204 } |
197 TT.e = list; | 205 TT.e = list; |
198 | 206 |
199 if (!(toys.optflags & FLAG_F)) { | 207 if (!(toys.optflags & FLAG_F)) { |
200 int w = toys.optflags & FLAG_w; | 208 int w = toys.optflags & FLAG_w; |
209 char *regstr; | |
201 | 210 |
202 // Convert strings to one big regex | 211 // Convert strings to one big regex |
203 if (w) len = 36; | 212 if (w) len = 36; |
204 for (al = TT.e; al; al = al->next) len += strlen(al->arg)+1; | 213 for (al = TT.e; al; al = al->next) len += strlen(al->arg)+1; |
205 | 214 |
206 TT.regstr = s = xmalloc(len); | 215 regstr = s = xmalloc(len); |
207 if (w) s = stpcpy(s, "(^|[^_[:alnum:]])("); | 216 if (w) s = stpcpy(s, "(^|[^_[:alnum:]])("); |
208 for (al = TT.e; al; al = al->next) { | 217 for (al = TT.e; al; al = al->next) { |
209 s = stpcpy(s, al->arg); | 218 s = stpcpy(s, al->arg); |
210 if (!(toys.optflags & FLAG_E)) *(s++) = '\\'; | 219 if (!(toys.optflags & FLAG_E)) *(s++) = '\\'; |
211 *(s++) = '|'; | 220 *(s++) = '|'; |
212 } | 221 } |
213 *(s-=(1+!(toys.optflags & FLAG_E))) = 0; | 222 *(s-=(1+!(toys.optflags & FLAG_E))) = 0; |
214 if (w) strcpy(s, ")($|[^_[:alnum:]])"); | 223 if (w) strcpy(s, ")($|[^_[:alnum:]])"); |
215 | 224 |
216 w = regcomp((regex_t *)toybuf, TT.regstr, | 225 w = regcomp((regex_t *)toybuf, regstr, |
217 ((toys.optflags & FLAG_E) ? REG_EXTENDED : 0) | | 226 ((toys.optflags & FLAG_E) ? REG_EXTENDED : 0) | |
218 ((toys.optflags & FLAG_i) ? REG_ICASE : 0)); | 227 ((toys.optflags & FLAG_i) ? REG_ICASE : 0)); |
219 | 228 |
220 if (w) { | 229 if (w) { |
221 regerror(w, (regex_t *)toybuf, toybuf+sizeof(regex_t), | 230 regerror(w, (regex_t *)toybuf, toybuf+sizeof(regex_t), |
223 error_exit("bad REGEX: %s", toybuf); | 232 error_exit("bad REGEX: %s", toybuf); |
224 } | 233 } |
225 } | 234 } |
226 } | 235 } |
227 | 236 |
237 static int do_grep_r(struct dirtree *new) | |
238 { | |
239 char *name; | |
240 | |
241 if (new->parent && !dirtree_notdotdot(new)) return 0; | |
242 if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE; | |
243 | |
244 // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should. | |
245 if (new->parent && !(toys.optflags & FLAG_h)) toys.optflags |= FLAG_H; | |
246 | |
247 name = dirtree_path(new, 0); | |
248 do_grep(openat(dirtree_parentfd(new), new->name, 0), name); | |
249 free(name); | |
250 | |
251 return 0; | |
252 } | |
253 | |
228 void grep_main(void) | 254 void grep_main(void) |
229 { | 255 { |
256 char **ss; | |
257 | |
230 // Handle egrep and fgrep | 258 // Handle egrep and fgrep |
231 if (*toys.which->name == 'e' || (toys.optflags & FLAG_w)) | 259 if (*toys.which->name == 'e' || (toys.optflags & FLAG_w)) |
232 toys.optflags |= FLAG_E; | 260 toys.optflags |= FLAG_E; |
233 if (*toys.which->name == 'f') toys.optflags |= FLAG_F; | 261 if (*toys.which->name == 'f') toys.optflags |= FLAG_F; |
234 | 262 |
239 toys.optc--; | 267 toys.optc--; |
240 } | 268 } |
241 | 269 |
242 parse_regex(); | 270 parse_regex(); |
243 | 271 |
244 if (!(toys.optflags & FLAG_H) && (toys.optc < 2)) toys.optflags |= FLAG_h; | 272 if (!(toys.optflags & FLAG_h) && toys.optc>1) toys.optflags |= FLAG_H; |
245 | 273 |
246 toys.exitval = 1; | 274 toys.exitval = 1; |
247 if (toys.optflags & FLAG_s) { | 275 if (toys.optflags & FLAG_s) { |
248 close(2); | 276 close(2); |
249 xopen("/dev/null", O_RDWR); | 277 xopen("/dev/null", O_RDWR); |
250 } | 278 } |
251 loopfiles_rw(toys.optargs, O_RDONLY, 0, 1, do_grep); | 279 |
252 xexit(); | 280 if (toys.optflags & FLAG_r) { |
253 } | 281 for (ss=toys.optargs; *ss; ss++) { |
282 if (!strcmp(*ss, "-")) do_grep(0, *ss); | |
283 else dirtree_read(*ss, do_grep_r); | |
284 } | |
285 } else loopfiles_rw(toys.optargs, O_RDONLY, 0, 1, do_grep); | |
286 } |