Mercurial > hg > toybox
comparison toys/pending/sed.c @ 1532:bf2c5216d726 draft
Basic sed range support, enough for "sed -n 9,11p README" to work.
author | Rob Landley <rob@landley.net> |
---|---|
date | Fri, 24 Oct 2014 18:16:32 -0500 |
parents | 3eafa445c1a6 |
children | a6ef79b31829 |
comparison
equal
deleted
inserted
replaced
1531:3ff823086c99 | 1532:bf2c5216d726 |
---|---|
80 struct arg_list *f; | 80 struct arg_list *f; |
81 struct arg_list *e; | 81 struct arg_list *e; |
82 | 82 |
83 // processed pattern list | 83 // processed pattern list |
84 struct double_list *pattern; | 84 struct double_list *pattern; |
85 | |
86 char *nextline; | |
87 long nextlen, count; | |
88 int fdout, noeol; | |
85 ) | 89 ) |
86 | 90 |
87 struct step { | 91 struct step { |
88 struct step *next, *prev; | 92 struct step *next, *prev; |
89 | 93 |
91 long lmatch[2]; | 95 long lmatch[2]; |
92 regex_t *rmatch[2]; | 96 regex_t *rmatch[2]; |
93 | 97 |
94 // Action | 98 // Action |
95 char c; | 99 char c; |
100 | |
101 int hit; | |
96 }; | 102 }; |
97 | 103 |
104 // Write out line with potential embedded NUL, handling eol/noeol | |
105 static int emit(char *line, long len, int eol) | |
106 { | |
107 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1; | |
108 if (eol) line[len++] = '\n'; | |
109 TT.noeol = !eol; | |
110 if (len != writeall(TT.fdout, line, len)) { | |
111 perror_msg("short write"); | |
112 | |
113 return 1; | |
114 } | |
115 | |
116 return 0; | |
117 } | |
118 | |
119 // Do regex matching handling embedded NUL bytes in string. | |
120 static int ghostwheel(regex_t *preg, char *string, int nmatch, | |
121 regmatch_t pmatch[], int eflags) | |
122 { | |
123 // todo: this | |
124 return regexec(preg, string, nmatch, pmatch, eflags); | |
125 } | |
126 | |
98 // Apply pattern to line from input file | 127 // Apply pattern to line from input file |
99 static void do_line(char **pline, long len) | 128 static void sed_line(char **pline, long plen) |
100 { | 129 { |
101 printf("len=%ld line=%s\n", len, *pline); | 130 char *line = TT.nextline; |
131 long len = TT.nextlen; | |
132 struct step *logrus; | |
133 int eol = 0; | |
134 | |
135 // Grab next line for deferred processing (EOF detection, we get a NULL | |
136 // pline at EOF to flush last line). Note that only end of _last_ input | |
137 // file matches $ (unless we're doing -i). | |
138 if (pline) { | |
139 TT.nextline = *pline; | |
140 TT.nextlen = plen; | |
141 *pline = 0; | |
142 } | |
143 | |
144 if (!line || !len) return; | |
145 | |
146 if (line[len-1] == '\n') line[--len] = eol++; | |
147 TT.count++; | |
148 | |
149 for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next) { | |
150 char c = logrus->c; | |
151 | |
152 // Have we got a matching range for this rule? | |
153 if (logrus->lmatch || *logrus->rmatch) { | |
154 int miss = 0; | |
155 long lm; | |
156 regex_t *rm; | |
157 | |
158 // In a match that might end? | |
159 if (logrus->hit) { | |
160 if (!(lm = logrus->lmatch[1])) { | |
161 if (!(rm = logrus->rmatch[1])) logrus->hit = 0; | |
162 else { | |
163 // regex match end includes matching line, so defer deactivation | |
164 if (!ghostwheel(rm, line, 0, 0, 0)) miss = 1; | |
165 } | |
166 } else if (lm > 0 && lm < TT.count) logrus->hit = 0; | |
167 | |
168 // Start a new match? | |
169 } else { | |
170 if (!(lm = *logrus->lmatch)) { | |
171 if (!ghostwheel(*logrus->rmatch, line, 0, 0, 0)) logrus->hit++; | |
172 } else if (lm == TT.count) logrus->hit++; | |
173 } | |
174 | |
175 if (!logrus->hit) continue; | |
176 if (miss) logrus->hit = 0; | |
177 } | |
178 | |
179 // Process like the wind, bullseye! | |
180 | |
181 // todo: embedded NUL, eol | |
182 if (c == 'p') { | |
183 if (emit(line, len, eol)) break; | |
184 } else error_exit("what?"); | |
185 } | |
186 | |
187 if (!(toys.optflags & FLAG_n)) emit(line, len, eol); | |
188 | |
189 free(line); | |
102 } | 190 } |
103 | 191 |
104 // Genericish function, can probably get moved to lib.c | 192 // Genericish function, can probably get moved to lib.c |
105 | 193 |
106 // Iterate over lines in file, calling function. Function can write NULL to | 194 // Iterate over lines in file, calling function. Function can write NULL to |
122 fclose(fp); | 210 fclose(fp); |
123 } | 211 } |
124 | 212 |
125 // Iterate over newline delimited data blob (potentially with embedded NUL), | 213 // Iterate over newline delimited data blob (potentially with embedded NUL), |
126 // call function on each line. | 214 // call function on each line. |
127 static void chop_lines(char *data, long len, | 215 static void chop_lines(char *data, long len, void (*call)(char **p, long l)) |
128 void (*call)(char **pline, long len)) | |
129 { | 216 { |
130 long ll; | 217 long ll; |
131 | 218 |
132 for (ll = 0; ll < len; ll++) { | 219 for (ll = 0; ll < len; ll++) { |
133 if (data[ll] == '\n') { | 220 if (data[ll] == '\n') { |
144 if (len) call(&data, len); | 231 if (len) call(&data, len); |
145 } | 232 } |
146 | 233 |
147 static void do_sed(int fd, char *name) | 234 static void do_sed(int fd, char *name) |
148 { | 235 { |
149 do_lines(fd, name, do_line); | 236 int i = toys.optflags & FLAG_i; |
237 | |
238 if (i) { | |
239 // todo: rename dance | |
240 } | |
241 do_lines(fd, name, sed_line); | |
242 if (i) { | |
243 sed_line(0, 0); | |
244 | |
245 // todo: rename dance | |
246 } | |
150 } | 247 } |
151 | 248 |
152 // Translate primal pattern into walkable form. | 249 // Translate primal pattern into walkable form. |
153 static void jewel_of_judgement(char **pline, long len) | 250 static void jewel_of_judgement(char **pline, long len) |
154 { | 251 { |
155 struct step *corwin; | 252 struct step *corwin; |
156 char *line = *pline, *reg; | 253 char *line = *pline, *reg; |
157 int i; | 254 int i; |
158 | 255 |
159 while (isspace(*line)) line++; | 256 for (line = *pline;;line++) { |
160 if (*line == '#') return; | 257 while (isspace(*line)) line++; |
161 | 258 if (*line == '#') return; |
162 memset(toybuf, 0, sizeof(struct step)); | 259 |
163 corwin = (void *)toybuf; | 260 memset(toybuf, 0, sizeof(struct step)); |
164 reg = toybuf + sizeof(struct step); | 261 corwin = (void *)toybuf; |
165 | 262 reg = toybuf + sizeof(struct step); |
166 // Parse address range (if any) | 263 |
167 for (i = 0; i < 2; i++) { | 264 // Parse address range (if any) |
168 if (*line == ',') line++; | 265 for (i = 0; i < 2; i++) { |
169 else if (i) break; | 266 if (*line == ',') line++; |
170 | 267 else if (i) break; |
171 if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0); | 268 |
172 else if (*line == '$') { | 269 if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0); |
173 corwin->lmatch[i] = -1; | 270 else if (*line == '$') { |
174 line++; | 271 corwin->lmatch[i] = -1; |
175 } else if (*line == '/' || *line == '\\') { | 272 line++; |
176 char delim = *(line++), slash = 0, *to, *from; | 273 } else if (*line == '/' || *line == '\\') { |
177 | 274 char delim = *(line++), slash = 0, *to, *from; |
178 if (delim == '\\') { | 275 |
179 if (!*line) goto brand; | 276 if (delim == '\\') { |
180 slash = delim = *(line++); | 277 if (!*line) goto brand; |
181 } | 278 slash = delim = *(line++); |
182 | 279 } |
183 // Removing backslash escapes edits the source string, which could | 280 |
184 // be from the environment space via -e, which could screw up what | 281 // Removing backslash escapes edits the source string, which could |
185 // "ps" sees, and I'm ok with that. | 282 // be from the environment space via -e, which could screw up what |
186 for (to = from = line; *from != delim; *(to++) = *(from++)) { | 283 // "ps" sees, and I'm ok with that. |
187 if (!*from) goto brand; | 284 for (to = from = line; *from != delim; *(to++) = *(from++)) { |
188 if (*from == '\\') { | 285 if (!*from) goto brand; |
189 if (!from[1]) goto brand; | 286 if (*from == '\\') { |
190 | 287 if (!from[1]) goto brand; |
191 // Check escaped end delimiter before printf style escapes. | 288 |
192 if (from[1] == slash) from++; | 289 // Check escaped end delimiter before printf style escapes. |
193 else { | 290 if (from[1] == slash) from++; |
194 char c = unescape(from[1]); | 291 else { |
195 | 292 char c = unescape(from[1]); |
196 if (c) { | 293 |
197 *to = c; | 294 if (c) { |
198 from++; | 295 *to = c; |
296 from++; | |
297 } | |
199 } | 298 } |
200 } | 299 } |
201 } | 300 } |
202 } | 301 slash = *to; |
203 slash = *to; | 302 *to = 0; |
204 *to = 0; | 303 xregcomp(corwin->rmatch[i] = (void *)reg, line, |
205 xregcomp(corwin->rmatch[i] = (void *)reg, line, | 304 ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB); |
206 ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB); | 305 *to = slash; |
207 *to = slash; | 306 reg += sizeof(regex_t); |
208 reg += sizeof(regex_t); | 307 line = from + 1; |
209 } else break; | 308 } else break; |
210 } | 309 } |
211 | 310 |
212 while (isspace(*line)) line++; | 311 while (isspace(*line)) line++; |
213 | 312 |
214 if (!*line || !strchr("p", *line)) goto brand; | 313 if (!*line || !strchr("p", *line)) break; |
215 | 314 corwin->c = *(line++); |
216 // Add step to pattern | 315 |
217 corwin = xmalloc(reg-toybuf); | 316 // Add step to pattern |
218 memcpy(corwin, toybuf, reg-toybuf); | 317 corwin = xmalloc(reg-toybuf); |
219 dlist_add_nomalloc(&TT.pattern, (void *)corwin); | 318 memcpy(corwin, toybuf, reg-toybuf); |
220 | 319 dlist_add_nomalloc(&TT.pattern, (void *)corwin); |
221 return; | 320 |
321 while (isspace(*line)) line++; | |
322 if (!*line) return; | |
323 if (*line != ';') break; | |
324 } | |
222 | 325 |
223 brand: | 326 brand: |
224 | |
225 // Reminisce about chestnut trees. | 327 // Reminisce about chestnut trees. |
226 error_exit("bad pattern '%s'@%ld (%c)", *pline, line-*pline, *line); | 328 error_exit("bad pattern '%s'@%ld (%c)", *pline, line-*pline, *line); |
227 } | 329 } |
228 | 330 |
229 void sed_main(void) | 331 void sed_main(void) |
232 char **args = toys.optargs; | 334 char **args = toys.optargs; |
233 | 335 |
234 // Lie to autoconf when it asks stupid questions, so configure regexes | 336 // Lie to autoconf when it asks stupid questions, so configure regexes |
235 // that look for "GNU sed version %f" greater than some old buggy number | 337 // that look for "GNU sed version %f" greater than some old buggy number |
236 // don't fail us for not matching their narrow expectations. | 338 // don't fail us for not matching their narrow expectations. |
237 if (FLAG_version) { | 339 if (toys.optflags & FLAG_version) { |
238 xprintf("This is not GNU sed version 9.0\n"); | 340 xprintf("This is not GNU sed version 9.0\n"); |
239 return; | 341 return; |
240 } | 342 } |
241 | 343 |
242 // Need a pattern. If no unicorns about, fight dragon and take its eye. | 344 // Need a pattern. If no unicorns about, fight serpent and take its eye. |
243 if (!TT.e && !TT.f) { | 345 if (!TT.e && !TT.f) { |
244 if (!*toys.optargs) error_exit("no pattern"); | 346 if (!*toys.optargs) error_exit("no pattern"); |
245 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++); | 347 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++); |
246 } | 348 } |
247 | 349 for (dworkin = TT.e; dworkin; dworkin = dworkin->next) |
248 for (dworkin = TT.e; dworkin; dworkin = dworkin->next) { | |
249 chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement); | 350 chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement); |
250 } | 351 for (dworkin = TT.f; dworkin; dworkin = dworkin->next) |
251 | 352 do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement); |
252 for (dworkin = TT.f; dworkin; dworkin = dworkin->next) { | 353 dlist_terminate(TT.pattern); |
253 int fd = xopen(dworkin->arg, O_RDONLY); | 354 |
254 | 355 TT.fdout = 1; |
255 do_lines(fd, dworkin->arg, jewel_of_judgement); | |
256 } | |
257 | 356 |
258 // Inflict pattern upon input files | 357 // Inflict pattern upon input files |
259 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed); | 358 loopfiles_rw(args, O_RDONLY, 0, 0, do_sed); |
260 } | 359 |
360 if (!(toys.optflags & FLAG_i)) sed_line(0, 0); | |
361 } |