From 5262c5387426f645d9df9f0398ea77965d437112 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Wed, 28 Sep 2022 01:27:26 -0500 Subject: [PATCH] Add sed --tarxform mode to wrap a simple protocol around transforms, and have tar --xform use it. --- tests/sed.test | 1 + tests/tar.test | 4 +++ toys/posix/sed.c | 80 +++++++++++++++++++++++++++++++++--------------- toys/posix/tar.c | 45 ++++++++++++++++++--------- 4 files changed, 91 insertions(+), 39 deletions(-) diff --git a/tests/sed.test b/tests/sed.test index 269012df..dc7685a5 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -178,6 +178,7 @@ testcmd '-z S' "-z 'N;P'" 'one\0one\0two' '' 'one\0two' testcmd '-z D' "-z 'N;D'" 'two' '' 'one\0two' testcmd '-z G' "-z 'h;G'" 'one\0one' '' 'one' testcmd '-z H' "-z 'H;g'" '\0one' '' 'one' +toyonly testcmd '-z x NOEOL' '-z ax' 'abc\0x\0def\0x\0' '' 'abc\0def' # toybox handling of empty capturing groups broke minjail. Check that we # correctly replace an empty capturing group with the empty string: diff --git a/tests/tar.test b/tests/tar.test index 4c87f210..cb246af6 100755 --- a/tests/tar.test +++ b/tests/tar.test @@ -316,9 +316,13 @@ mkdir -p one/two/three/four/five touch one/two/three/four/five/six testing "--strip" "$TAR one | tar t --strip=2 --show-transformed | grep six" \ "three/four/five/six\n" "" "" + +# toybox tar --xform depends on toybox sed +sed --tarxform '' /dev/null || SKIP=99 testing "--xform" "$TAR one --xform=s@three/four/@zero@ | tar t | grep six" \ "one/two/zerofive/six\n" "" "" rm -rf one +SKIP=0 if false then diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 70fe0b2f..24dd3499 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -18,8 +18,9 @@ * continuations for [abc], \; to end [abc] argument before end of line. * Explicit violations of stuff posix says NOT to do: N at EOF does default * print, l escapes \n + * Added --tarxform mode to support tar --xform -USE_SED(NEWTOY(sed, "(help)(version)e*f*i:;nErz(null-data)s[+Er]", TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLAG_NOHELP)) +USE_SED(NEWTOY(sed, "(help)(version)(tarxform)e*f*i:;nErz(null-data)s[+Er]", TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLAG_NOHELP)) config SED bool "sed" @@ -128,11 +129,11 @@ GLOBALS( // processed pattern list struct double_list *pattern; - char *nextline, *remember; + char *nextline, *remember, *tarxform; void *restart, *lastregex; long nextlen, rememberlen, count; int fdout, noeol; - unsigned xx; + unsigned xx, tarxlen; char delim; ) @@ -156,14 +157,22 @@ struct sedcmd { // Write out line with potential embedded NUL, handling eol/noeol static int emit(char *line, long len, int eol) { - int l, old = line[len]; - - if (TT.noeol && !writeall(TT.fdout, &TT.delim, 1)) return 1; + int l = len, old = line[len]; + + if (FLAG(tarxform)) { + TT.tarxform = xrealloc(TT.tarxform, TT.tarxlen+len+TT.noeol+eol); + if (TT.noeol) TT.tarxform[TT.tarxlen++] = TT.delim; + memcpy(TT.tarxform+TT.tarxlen, line, len); + TT.tarxlen += len; + if (eol) TT.tarxform[TT.tarxlen++] = TT.delim; + } else { + if (TT.noeol && !writeall(TT.fdout, &TT.delim, 1)) return 1; + if (eol) line[len++] = TT.delim; + if (!len) return 0; + l = writeall(TT.fdout, line, len); + if (eol) line[len-1] = old; + } TT.noeol = !eol; - if (eol) line[len++] = TT.delim; - if (!len) return 0; - l = writeall(TT.fdout, line, len); - if (eol) line[len-1] = old; if (l != len) { if (TT.fdout != 1) perror_msg("short write"); @@ -208,23 +217,35 @@ static void sed_line(char **pline, long plen) int file; char *str; } *append = 0; - char *line = TT.nextline; - long len = TT.nextlen; + char *line; + long len; struct sedcmd *command; int eol = 0, tea = 0; - // Ignore EOF for all files before last unless -i - if (!pline && !FLAG(i) && !FLAG(s)) return; - - // Grab next line for deferred processing (EOF detection: we get a NULL - // pline at EOF to flush last line). Note that only end of _last_ input - // file matches $ (unless we're doing -i). - TT.nextline = 0; - TT.nextlen = 0; - if (pline) { - TT.nextline = *pline; - TT.nextlen = plen; + if (FLAG(tarxform)) { + if (!pline) return; + + line = *pline; + len = plen; *pline = 0; + pline = 0; + } else { + line = TT.nextline; + len = TT.nextlen; + + // Ignore EOF for all files before last unless -i or -s + if (!pline && !FLAG(i) && !FLAG(s)) return; + + // Grab next line for deferred processing (EOF detection: we get a NULL + // pline at EOF to flush last line). Note that only end of _last_ input + // file matches $ (unless we're doing -i). + TT.nextline = 0; + TT.nextlen = 0; + if (pline) { + TT.nextline = *pline; + TT.nextlen = plen; + *pline = 0; + } } if (!line || !len) return; @@ -531,6 +552,8 @@ static void sed_line(char **pline, long plen) char *name; writenow: + if (FLAG(tarxform)) error_exit("tilt"); + // Swap out emit() context fd = TT.fdout; noeol = TT.noeol; @@ -577,6 +600,7 @@ writenow: done: if (line && !FLAG(n)) emit(line, len, eol); + // TODO: should "sed -z ax" use \n instead of NUL? if (dlist_terminate(append)) while (append) { struct append *a = append->next; @@ -585,7 +609,7 @@ done: // Force newline if noeol pending if (fd != -1) { - if (TT.noeol) xwrite(TT.fdout, "\n", 1); + if (TT.noeol) xwrite(TT.fdout, &TT.delim, 1); TT.noeol = 0; xsendfile(fd, TT.fdout); close(fd); @@ -596,6 +620,12 @@ done: append = a; } free(line); + + if (TT.tarxlen) { + dprintf(TT.fdout, "%08x", --TT.tarxlen); + writeall(TT.fdout, TT.tarxform, TT.tarxlen); + TT.tarxlen = 0; + } } // Callback called on each input file @@ -847,7 +877,6 @@ resume_s: long l; if (isspace(*line) && *line != '\n') continue; - if (0 <= (l = stridx("igpx", *line))) command->sflags |= 1<sflags |= 1<<0; else if (!(command->sflags>>4) && 0<(l = strtol(line, &line, 10))) { @@ -987,6 +1016,7 @@ void sed_main(void) struct arg_list *al; char **args = toys.optargs; + if (FLAG(tarxform)) toys.optflags |= FLAG_z; if (!FLAG(z)) TT.delim = '\n'; // Lie to autoconf when it asks stupid questions, so configure regexes diff --git a/toys/posix/tar.c b/toys/posix/tar.c index 7893b259..34182387 100644 --- a/toys/posix/tar.c +++ b/toys/posix/tar.c @@ -61,7 +61,7 @@ GLOBALS( struct double_list *incl, *excl, *seen; struct string_list *dirs; char *cwd, **xfsed; - int fd, ouid, ggid, hlc, warn, sparselen, pid; + int fd, ouid, ggid, hlc, warn, sparselen, pid, xfpipe[2]; struct dev_ino archive_di; long long *sparse; time_t mtt; @@ -192,6 +192,23 @@ static void alloread(void *buf, int len) (*b)[len] = 0; } +static char *xform(char **name) +{ + char buf[9], *end; + off_t len; + + if (!TT.xform) return 0; + + buf[8] = 0; + if (dprintf(TT.xfpipe[0], "%s%c", *name, 0) != strlen(*name)+1 + || readall(TT.xfpipe[1], buf, 8) != 8 + || !(len = estrtol(buf, &end, 16)) || errno ||*end) error_exit("bad xform"); + xreadall(TT.xfpipe[1], *name = xmalloc(len+1), len); + (*name)[len] = 0; + + return *name; +} + // callback from dirtree to create archive static int add_to_tar(struct dirtree *node) { @@ -242,10 +259,7 @@ static int add_to_tar(struct dirtree *node) TT.warn = 0; } - // Note: linux sed doesn't add newline, so no need to remove it or use -z. - if (TT.xfsed) - if (!(hname = xfname = xrunread(TT.xfsed, hname))) error_exit("bad xform"); - + xfname = xform(&hname); if (TT.owner) st->st_uid = TT.ouid; if (TT.group) st->st_gid = TT.ggid; if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode); @@ -781,8 +795,7 @@ static void unpack_tar(char *first) // We accept --show-transformed but always do, so it's a NOP. name = TT.hdr.name; - if (TT.xfsed) { - if (!(name = xrunread(TT.xfsed, name))) error_exit("bad xform"); + if (xform(&name)) { free(TT.hdr.name); TT.hdr.name = name; } @@ -885,7 +898,7 @@ static char *get_archiver() void tar_main(void) { - char *s, **args = toys.optargs; + char *s, **xfsed, **args = toys.optargs; int len = 0, ii; // Needed when extracting to command @@ -927,13 +940,17 @@ void tar_main(void) struct arg_list *al; for (ii = 0, al = TT.xform; al; al = al->next) ii++; - TT.xfsed = xmalloc((ii+1)*2*sizeof(char *)); - TT.xfsed[0] = "sed"; - for (ii = 1, al = TT.xform; al; al = al->next) { - TT.xfsed[ii++] = "-e"; - TT.xfsed[ii++] = al->arg; + xfsed = xmalloc((ii+2)*2*sizeof(char *)); + xfsed[0] = "sed"; + xfsed[1] = "--tarxform"; + for (ii = 2, al = TT.xform; al; al = al->next) { + xfsed[ii++] = "-e"; + xfsed[ii++] = al->arg; } - TT.xfsed[ii] = 0; + xfsed[ii] = 0; + TT.xfpipe[0] = TT.xfpipe[1] = -1; + xpopen_both(xfsed, TT.xfpipe); + free(xfsed); } // nommu reentry for nonseekable input skips this, parent did it for us -- 2.39.2