From 2b58072c464eb47f05adc8a69891bed65641cbb2 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 6 Dec 2024 14:53:26 -0500 Subject: [PATCH] file, tar: basic zstd awareness. We're seeing ever more zstd-compressed files in the wild, so even though toybox can't compress/decompress zstd without an external helper, it still seems useful to integrate with any that happens to be on the system. --- toys/posix/file.c | 2 ++ toys/posix/tar.c | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/toys/posix/file.c b/toys/posix/file.c index 566daf1d..0f3af847 100644 --- a/toys/posix/file.c +++ b/toys/posix/file.c @@ -322,6 +322,8 @@ static void do_regular_file(int fd, char *name) xprintf("bzip2 compressed data, block size = %c00k\n", *s); else if (len>31 && peek_be(s, 7) == 0xfd377a585a0000ULL) xputs("xz compressed data"); + else if (len>10 && strstart(&s, "\x28\xb5\x2f\xfd")) + xputs("zstd compressed data"); else if (len>10 && strstart(&s, "\x1f\x8b")) xputs("gzip compressed data"); else if (len>32 && !smemcmp(s+1, "\xfa\xed\xfe", 3)) { int bit = (*s==0xce) ? 32 : 64; diff --git a/toys/posix/tar.c b/toys/posix/tar.c index 998e668d..287c82c6 100644 --- a/toys/posix/tar.c +++ b/toys/posix/tar.c @@ -20,7 +20,7 @@ * No --no-null because the args infrastructure isn't ready. * Until args.c learns about no- toggles, --no-thingy always wins over --thingy -USE_TAR(NEWTOY(tar, "&(one-file-system)(no-ignore-case)(ignore-case)(no-anchored)(anchored)(no-wildcards)(wildcards)(no-wildcards-match-slash)(wildcards-match-slash)(show-transformed-names)(selinux)(restrict)(full-time)(no-recursion)(null)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(sort);:(mode):(mtime):(group):(owner):(to-command):~(strip-components)(strip)#~(transform)(xform)*o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*I(use-compress-program):C(directory):f(file):as[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_UMASK)) +USE_TAR(NEWTOY(tar, "&(zstd)(one-file-system)(no-ignore-case)(ignore-case)(no-anchored)(anchored)(no-wildcards)(wildcards)(no-wildcards-match-slash)(wildcards-match-slash)(show-transformed-names)(selinux)(restrict)(full-time)(no-recursion)(null)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(sort);:(mode):(mtime):(group):(owner):(to-command):~(strip-components)(strip)#~(transform)(xform)*o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*I(use-compress-program):C(directory):f(file):as[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_UMASK)) config TAR bool "tar" @@ -48,6 +48,7 @@ config TAR --strip-components NUM Ignore first NUM directory components when extracting --xform=SED Modify filenames via SED expression (ala s/find/replace/g) -I PROG Filter through PROG to compress or PROG -d to decompress + --zstd zstd compression Filename filter types. Create command line args aren't filtered, extract defaults to --anchored, --exclude defaults to --wildcards-match-slash, @@ -1001,9 +1002,9 @@ static void do_XT(char **pline, long len) if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline); } -static char *get_archiver() +static char *get_archiver() { - return TT.I ? : FLAG(z) ? "gzip" : FLAG(j) ? "bzip2" : "xz"; + return TT.I ? : FLAG(z)?"gzip" : FLAG(j)?"bzip2" : FLAG(zstd)?"zstd" : "xz"; } void tar_main(void) @@ -1094,12 +1095,13 @@ void tar_main(void) char *hdr = 0; // autodetect compression type when not specified - if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) { + if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)||FLAG(zstd))) { len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512); if (len!=512 || !is_tar_header(hdr)) { // detect gzip and bzip signatures if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z; else if (!smemcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j; + else if (!smemcmp(hdr, "\x28\xb5\x2f\xfd", 4)) toys.optflags|=FLAG_zstd; else if (peek_be(hdr, 7) == 0xfd377a585a0000ULL) toys.optflags |= FLAG_J; else error_exit("Not tar"); @@ -1108,10 +1110,10 @@ void tar_main(void) } } - if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) { + if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)||FLAG(zstd)) { int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid; struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"), - FLAG(z) ? "zcat" : FLAG(j) ? "bzcat" : "xzcat"); + FLAG(z)?"zcat" : FLAG(j)?"bzcat" : FLAG(zstd)?"zstdcat" : "xzcat"); // Toybox provides more decompressors than compressors, so try them first TT.pid = xpopen_both(zcat ? (char *[]){zcat->str, 0} : @@ -1174,17 +1176,19 @@ void tar_main(void) struct double_list *dl = TT.incl; // autodetect compression type based on -f name. (Use > to avoid.) - if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) { + if (TT.f && !(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)||FLAG(zstd))) { char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"}; if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz")) toys.optflags |= FLAG_z; - if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz")) + else if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz")) toys.optflags |= FLAG_J; + else if (strend(TT.f, ".tzst") || strend(TT.f, ".tar.zst")) + toys.optflags |= FLAG_zstd; else for (len = 0; len