From ea52d3048af5ec5e612322bc9055853964153bd0 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 21 May 2022 12:40:28 -0500 Subject: [PATCH] Add tar --xform. Adds xrunread() to lib and makes mount use it instead of a local function. --- lib/lib.c | 4 +- lib/lib.h | 1 + lib/xwrap.c | 28 +++++++++++++ tests/tar.test | 11 ++++++ toys/lsb/mount.c | 35 +++++----------- toys/posix/tar.c | 101 +++++++++++++++++++++++++++++++---------------- 6 files changed, 118 insertions(+), 62 deletions(-) diff --git a/lib/lib.c b/lib/lib.c index cf0a9582..2807c402 100644 --- a/lib/lib.c +++ b/lib/lib.c @@ -449,9 +449,9 @@ char *strafter(char *haystack, char *needle) // Remove trailing \n char *chomp(char *s) { - char *p = s+strlen(s); + char *p; - while (p>=s && (p[-1]=='\r' || p[-1]=='\n')) *--p = 0; + if (s) for (p = s+strlen(s); p>=s && (p[-1]=='\r' || p[-1]=='\n'); *--p = 0); return s; } diff --git a/lib/lib.h b/lib/lib.h index 34f069d4..178000ea 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -134,6 +134,7 @@ int xpclose_both(pid_t pid, int *pipes); pid_t xpopen(char **argv, int *pipe, int isstdout); pid_t xpclose(pid_t pid, int pipe); int xrun(char **argv); +char *xrunread(char *argv[], char *stdin); int xpspawn(char **argv, int*pipes); void xaccess(char *path, int flags); void xunlink(char *path); diff --git a/lib/xwrap.c b/lib/xwrap.c index 65e9f4fe..e83351dd 100644 --- a/lib/xwrap.c +++ b/lib/xwrap.c @@ -384,6 +384,34 @@ int xrun(char **argv) return xpclose_both(xpopen_both(argv, 0), 0); } +// Run child, writing "stdin", returning stdout or NULL, pass through stderr +char *xrunread(char *argv[], char *stdin) +{ + char *result = 0; + int pipe[] = {-1, -1}, total = 0, len; + pid_t pid; + + pid = xpopen_both(argv, pipe); + if (stdin && *stdin) writeall(*pipe, stdin, strlen(stdin)); + close(*pipe); + for (;;) { + if (0>=(len = readall(pipe[1], libbuf, sizeof(libbuf)))) break; + memcpy((result = xrealloc(result, 1+total+len))+total, libbuf, len); + total += len; + if (len != sizeof(libbuf)) break; + } + if (result) result[total] = 0; + close(pipe[1]); + + if (xwaitpid(pid)) { + free(result); + + return 0; + } + + return result; +} + void xaccess(char *path, int flags) { if (access(path, flags)) perror_exit("Can't access '%s'", path); diff --git a/tests/tar.test b/tests/tar.test index 034a3bfb..eb75d2cd 100755 --- a/tests/tar.test +++ b/tests/tar.test @@ -284,8 +284,19 @@ testing "pass ..dotsdir" "$TAR ..dotsdir | LST" \ "drwxrwxr-x root/root 0 2009-02-13 23:31 ..dotsdir/\n" "" "" rmdir ..dotsdir +mkdir -p one/two/three/four/five +touch one/two/three/four/five/six +testing "--strip" "$TAR one | tar t --strip=2 --show-transformed | grep six" \ + "three/four/five/six\n" "" "" +testing "--xform" "$TAR one --xform=s@three/four/@zero@ | tar t | grep six" \ + "one/two/zerofive/six\n" "" "" +rm -rf one + if false then +# Sequencing issues that leak implementation details out the interface +testing "what order are --xform, --strip, and --exclude processed in?" +testing "--xform vs ../ removal and adding / to dirs" chmod 700 dir tar cpf tar.tgz dir/file diff --git a/toys/lsb/mount.c b/toys/lsb/mount.c index 10e8e9e4..f5fdf2b4 100644 --- a/toys/lsb/mount.c +++ b/toys/lsb/mount.c @@ -145,25 +145,6 @@ static long flag_opts(char *new, long flags, char **more) return flags; } -// Shell out to a program, returning the output string or NULL on error -static char *tortoise(int loud, char **cmd) -{ - int rc, pipe, len; - pid_t pid; - - pid = xpopen(cmd, &pipe, 1); - len = readall(pipe, toybuf, sizeof(toybuf)-1); - rc = xpclose(pid, pipe); - if (!rc && len > 1) { - if (toybuf[len-1] == '\n') --len; - toybuf[len] = 0; - return toybuf; - } - if (loud) error_msg("%s failed %d", *cmd, rc); - - return 0; -} - static void mount_filesystem(char *dev, char *dir, char *type, unsigned long flags, char *opts) { @@ -183,10 +164,11 @@ static void mount_filesystem(char *dev, char *dir, char *type, } if (strstart(&dev, "UUID=")) { - char *s = tortoise(0, (char *[]){"blkid", "-U", dev, 0}); + char *s = xrunread((char *[]){"blkid", "-U", dev, 0}, 0); - if (!s) return error_msg("No uuid %s", dev); - dev = s; + if (!s || strlen(s)>=sizeof(toybuf)) return error_msg("No uuid %s", dev); + strcpy(dev = toybuf, s); + free(s); } // Autodetect bind mount or filesystem type @@ -266,10 +248,11 @@ static void mount_filesystem(char *dev, char *dir, char *type, // device, then do the loopback setup and retry the mount. if (rc && errno == ENOTBLK) { - dev = tortoise(1, (char *[]){"losetup", - (flags&MS_RDONLY) ? "-fsr" : "-fs", dev, 0}); - if (!dev) break; - continue; + char *losetup[] = {"losetup", (flags&MS_RDONLY)?"-fsr":"-fs", dev, 0}; + + if ((dev = xrunread(losetup, 0))) continue; + error_msg("%s failed", *losetup); + break; } free(buf); diff --git a/toys/posix/tar.c b/toys/posix/tar.c index 9cf1a5c9..a21e599f 100644 --- a/toys/posix/tar.c +++ b/toys/posix/tar.c @@ -17,7 +17,7 @@ * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt' * -USE_TAR(NEWTOY(tar, "&(strip-components)#(selinux)(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN)) +USE_TAR(NEWTOY(tar, "&(show-transformed-names)(selinux)(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):~(strip-components)(strip)#~(transform)(xform)*o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*I(use-compress-program):C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN)) config TAR bool "tar" @@ -41,6 +41,7 @@ config TAR --restrict All under one dir --no-recursion Skip dir contents --numeric-owner Use numeric uid/gid, not user/group names --strip-components NUM Ignore first NUM directory components when extracting + --xform=SED Modify filenames via SED expression (ala s/find/replace/g) -I PROG Filter through PROG to compress or PROG -d to decompress */ @@ -48,15 +49,15 @@ config TAR #include "toys.h" GLOBALS( - char *f, *C; - struct arg_list *T, *X; - char *I, *to_command, *owner, *group, *mtime, *mode; + char *f, *C, *I; + struct arg_list *T, *X, *xform; + long strip; + char *to_command, *owner, *group, *mtime, *mode; struct arg_list *exclude; - long strip_components; struct double_list *incl, *excl, *seen; struct string_list *dirs; - char *cwd; + char *cwd, **xfsed; int fd, ouid, ggid, hlc, warn, adev, aino, sparselen, pid; long long *sparse; time_t mtt; @@ -80,8 +81,9 @@ GLOBALS( } hdr; ) +// The on-disk 512 byte record structure. struct tar_hdr { - char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8], + char name[100], mode[8], uid[8], gid[8], size[12], mtime[12], chksum[8], type, link[100], magic[8], uname[32], gname[32], major[8], minor[8], prefix[155], padd[12]; }; @@ -194,8 +196,8 @@ static int add_to_tar(struct dirtree *node) struct tar_hdr hdr; struct passwd *pw = pw; struct group *gr = gr; - int i, fd = -1, norecurse = FLAG(no_recursion); - char *name, *lnk, *hname; + int i, fd = -1, recurse = 0; + char *name, *lnk, *hname, *xfname = 0; if (!dirtree_notdotdot(node)) return 0; if (TT.adev == st->st_dev && TT.aino == st->st_ino) { @@ -208,11 +210,7 @@ static int add_to_tar(struct dirtree *node) // exclusion defaults to --no-anchored and --wildcards-match-slash for (lnk = name; *lnk;) { - if (filter(TT.excl, lnk)) { - norecurse++; - - goto done; - } + if (filter(TT.excl, lnk)) goto done; while (*lnk && *lnk!='/') lnk++; while (*lnk=='/') lnk++; } @@ -241,6 +239,10 @@ static int add_to_tar(struct dirtree *node) TT.warn = 0; } + // Note: linux sed doesn't add newline, so no need to remove it or use -z. + if (TT.xfsed) + if (!(hname = xfname = xrunread(TT.xfsed, hname))) error_exit("bad xform"); + if (TT.owner) st->st_uid = TT.ouid; if (TT.group) st->st_gid = TT.ggid; if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode); @@ -348,6 +350,7 @@ static int add_to_tar(struct dirtree *node) // Before we write the header, make sure we can read the file if ((fd = open(name, O_RDONLY)) < 0) { perror_msg("can't open '%s'", name); + free(name); return 0; } @@ -420,10 +423,13 @@ static int add_to_tar(struct dirtree *node) if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512))); close(fd); } + recurse = !FLAG(no_recursion); + done: + free(xfname); free(name); - return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse; + return recurse*(DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0)); } static void wsettime(char *s, long long sec) @@ -518,18 +524,9 @@ error: close(fd); } -static void extract_to_disk(void) +static void extract_to_disk(char *name) { - char *name = TT.hdr.name; - int ala = TT.hdr.mode, strip; - - for (strip = 0; strip < TT.strip_components; strip++) { - char *s = strchr(name, '/'); - - if (s && s[1]) name = s+1; - else if (S_ISDIR(ala)) return; - else break; - } + int ala = TT.hdr.mode; if (dirflush(name, S_ISDIR(ala))) { if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size); @@ -610,7 +607,7 @@ static void unpack_tar(char *first) struct tar_hdr tar; int i, sefd = -1, and = 0; unsigned maj, min; - char *s; + char *s, *name; for (;;) { if (first) { @@ -776,9 +773,31 @@ static void unpack_tar(char *first) } } - // Skip excluded files - if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete)) + // Skip excluded files, filtering on the untransformed name. + if (filter(TT.excl, name = TT.hdr.name) || (TT.incl && !delete)) { skippy(TT.hdr.size); + goto done; + } + + // We accept --show-transformed but always do, so it's a NOP. + name = TT.hdr.name; + if (TT.xfsed) { + if (!(name = xrunread(TT.xfsed, name))) error_exit("bad xform"); + free(TT.hdr.name); + TT.hdr.name = name; + } + + for (i = 0; itm_year, 1+lc->tm_mon, lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : ""); } - printf("%s", TT.hdr.name); + printf("%s", name); if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target); xputc('\n'); skippy(TT.hdr.size); } else { - if (FLAG(v)) printf("%s\n", TT.hdr.name); + if (FLAG(v)) printf("%s\n", name); if (FLAG(O)) sendfile_sparse(1); else if (FLAG(to_command)) { if (S_ISREG(TT.hdr.mode)) { @@ -810,7 +829,7 @@ static void unpack_tar(char *first) xsetenv("TAR_FILETYPE", "f"); xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0); xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0); - xsetenv("TAR_FILENAME", TT.hdr.name); + xsetenv("TAR_FILENAME", name); xsetenv("TAR_UNAME", TT.hdr.uname); xsetenv("TAR_GNAME", TT.hdr.gname); xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0); @@ -823,9 +842,10 @@ static void unpack_tar(char *first) fd = xpclose_both(pid, 0); if (fd) error_msg("%d: Child returned %d", pid, fd); } - } else extract_to_disk(); + } else extract_to_disk(name); } +done: if (sefd != -1) { // zero length write resets fscreate context to default (void)write(sefd, 0, 0); @@ -862,7 +882,7 @@ void tar_main(void) { char *s, **args = toys.optargs, *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2")); - int len = 0; + int len = 0, ii; // Needed when extracting to command signal(SIGPIPE, SIG_IGN); @@ -898,6 +918,19 @@ void tar_main(void) TT.fd = 1; } + if (TT.xform) { + struct arg_list *al; + + for (ii = 0, al = TT.xform; al; al = al->next) ii++; + TT.xfsed = xmalloc((ii+1)*2*sizeof(char *)); + TT.xfsed[0] = "sed"; + for (ii = 1, al = TT.xform; al; al = al->next) { + TT.xfsed[ii++] = "-e"; + TT.xfsed[ii++] = al->arg; + } + TT.xfsed[ii] = 0; + } + // nommu reentry for nonseekable input skips this, parent did it for us if (toys.stacktop) { if (TT.f && strcmp(TT.f, "-")) -- 2.39.2