From 6e6b6eb6fa68d5b764751e9658c4fd15b7555b95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20R=C3=B6hrich?= Date: Thu, 19 Aug 2021 22:47:37 +0200 Subject: [PATCH] new toy: Simple hexdump implementation - Add simple hexdump implementation - Add tests for hexdump --- tests/hexdump.test | 135 ++++++++++++++++++++++++++++++++++++ toys/pending/hexdump.c | 153 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100755 tests/hexdump.test create mode 100644 toys/pending/hexdump.c diff --git a/tests/hexdump.test b/tests/hexdump.test new file mode 100755 index 00000000..e319957c --- /dev/null +++ b/tests/hexdump.test @@ -0,0 +1,135 @@ +#!/bin/bash + +[ -f testing.sh ] && . testing.sh + +testcmd "simple file" "input" "0000000 6973 706d 656c 000a\n0000007\n" "simple\\n" "" +testcmd "simple file -b" "-b input" "0000000 163 151 155 160 154 145 012\n0000007\n" "simple\\n" "" +testcmd "simple file -c" "-c input" "0000000 s i m p l e \\\\n\n0000007\n" "simple\\n" "" +testcmd "simple file -d" "-d input" "0000000 26995 28781 25964 00010\n0000007\n" "simple\\n" "" +testcmd "simple file -o" "-o input" "0000000 064563 070155 062554 000012\n0000007\n" "simple\\n" "" +testcmd "simple file -x" "-x input" "0000000 6973 706d 656c 000a\n0000007\n" "simple\\n" "" + +testcmd \ + "simple file canonical output -C" \ + "-C input" \ + "\ +00000000 73 69 6d 70 6c 65 0a |simple.|\n\ +00000007\n" \ + "simple\n" \ + "" +testcmd \ + "simple file canonical output -C multiline" \ + "-C input" \ + "\ +00000000 73 69 6d 70 6c 65 0a 62 61 72 66 6f 6f 62 61 72 |simple.barfoobar|\n\ +00000010 66 6f 6f 62 61 72 0a |foobar.|\n\ +00000017\n" \ + "\ +simple\n\ +barfoobarfoobar\n" \ + "" + +testcmd \ + "head of file -n 10" \ + "-n 10 input" \ + "\ +0000000 6973 706d 656c 730a 6d69\n\ +000000a\n" \ + "simple\nsimple\n" \ + "" +testcmd \ + "skip head of file -s 10" \ + "-s 10 input" \ + "\ +000000a 6c70 0a65\n\ +000000e\n" \ + "simple\nsimple\n" \ + "" + +testcmd \ + "squeeze repeating lines" \ + "input" \ + "\ +0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +*\n\ +0000070 6f66 006f\n\ +0000073\n" \ + "\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +foo" \ + "" +testcmd \ + "squeeze repeating lines" \ + "input" \ + "\ +0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +*\n\ +0000030 6262 6262 6262 6262 6262 6262 6262 0a62\n\ +0000040 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +*\n\ +0000070 6262 6262 6262 6262 6262 6262 6262 0a62\n\ +0000080\n" \ + "\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +bbbbbbbbbbbbbbb\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +bbbbbbbbbbbbbbb\n" \ + "" +testcmd \ + "don't squeeze repeating lines" \ + "-v input" \ + "\ +0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000010 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000020 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000030 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000040 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000050 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000060 6161 6161 6161 6161 6161 6161 6161 0a61\n\ +0000070 6f66 006f\n\ +0000073\n" \ + "\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +aaaaaaaaaaaaaaa\n\ +foo" \ + "" + +for _ in {1..25}; do echo "foobar" >> file1; done +for _ in {1..25}; do echo "buzzbar" >> file2; done + +testcmd \ + "accumulate offset accross files" \ + "file1 file2" \ + "0000000 6f66 626f 7261 660a 6f6f 6162 0a72 6f66\n\ +0000010 626f 7261 660a 6f6f 6162 0a72 6f66 626f\n\ +0000020 7261 660a 6f6f 6162 0a72 6f66 626f 7261\n\ +0000030 660a 6f6f 6162 0a72 6f66 626f 7261 660a\n\ +0000040 6f6f 6162 0a72 6f66 626f 7261 660a 6f6f\n\ +0000050 6162 0a72 6f66 626f 7261 660a 6f6f 6162\n\ +0000060 0a72 6f66 626f 7261 660a 6f6f 6162 0a72\n\ +0000070 6f66 626f 7261 660a 6f6f 6162 0a72 6f66\n\ +0000080 626f 7261 660a 6f6f 6162 0a72 6f66 626f\n\ +0000090 7261 660a 6f6f 6162 0a72 6f66 626f 7261\n\ +00000a0 660a 6f6f 6162 0a72 6f66 626f 7261 620a\n\ +00000b0 7a75 627a 7261 620a 7a75 627a 7261 620a\n\ +*\n\ +0000170 7a75 627a 7261 000a\n\ +0000177\n" \ + "" \ + "" +rm file1 file2 diff --git a/toys/pending/hexdump.c b/toys/pending/hexdump.c new file mode 100644 index 00000000..53bee903 --- /dev/null +++ b/toys/pending/hexdump.c @@ -0,0 +1,153 @@ +/* hexdump.c - Dump file content in hexadecimal format to stdout + * + * Copyright 2021 Moritz Röhrich + * + * No standard + * + * TODO: + * - Implement format strings (see man (1) hexdump) + +USE_HEXDUMP(NEWTOY(hexdump, "bcCdn#<0os#<0vx[!bcCdox]", TOYFLAG_USR|TOYFLAG_BIN)) +USE_HD(OLDTOY(hd, hexdump, TOYFLAG_USR|TOYFLAG_BIN)) + +config HEXDUMP + bool "hexdump" + default n + help + usage: hexdump [-b|-c|-C|-d|-o|-x] [-v] [-n bytes] [-s bytes] FILES + + Dump file content in hexadecimal format to stdout. + + -b One-byte octal display + -c One-byte character display + -C Canonical (hex + ASCII) display + -d Two-bytes decimal display + -n Dump only bytes of the input + -o Two-bytes octal display + -s Skip bytes from the beginning of the input + -v Do not squeeze identical lines in the output together + -x Two-bytes hexadecimal display (default) + +config HD + bool "hd" + default HEXDUMP + help + See hexdump +*/ + +#define FOR_hexdump +#include "toys.h" + +GLOBALS( + long s, n; + long long len, pos, ppos; + const char *fmt; + unsigned int fn, bc; // file number and byte count + char linebuf[16]; // line buffer - serves double duty for sqeezing repeat + // lines and for accumulating full lines accross file + // boundaries if necessesary. +) + +const char *make_printable(unsigned char byte) { + switch (byte) { + case '\0': return "\\0"; + case '\a': return "\\a"; + case '\b': return "\\b"; + case '\t': return "\\t"; + case '\n': return "\\n"; + case '\v': return "\\v"; + case '\f': return "\\f"; + default: return "??"; // for all unprintable bytes + } +} + +void do_hexdump(int fd, char *name) { + unsigned short block, adv, i; + int sl, fs; // skip line, file size + + TT.fn++; // keep track of how many files have been printed. + // skipp ahead, if neccessary skip entire files: + if (FLAG(s) && (TT.s-TT.pos>0)) { + fs = xlseek(fd, 0L, SEEK_END); + + if (fs < TT.s) { + TT.pos += fs; + TT.ppos += fs; + } else { + xlseek(fd, TT.s-TT.pos, SEEK_SET); + TT.ppos = TT.s; + TT.pos = TT.s; + } + } + + for (sl = 0; + 0 < (TT.len = readall(fd, toybuf, + (TT.n && TT.s+TT.n-TT.pos<16-(TT.bc%16)) + ? TT.s+TT.n-TT.pos : 16-(TT.bc%16))); + TT.pos += TT.len) { + // This block compares the data read from file to the last line printed. + // If they don't match a new line is printed, else the line is skipped. + // If a * has already been printed to indicate a skipped line, printing the + // * is also skipped. + for (i = 0; i < 16 && i < TT.len; i++){ + if (FLAG(v) || TT.len < 16 || toybuf[i] != TT.linebuf[i]) goto newline; + } + if (sl == 0) { + printf("*\n"); + sl = 1; + } + TT.ppos += TT.len; + continue; + +newline: + strncpy(TT.linebuf+(TT.bc%16), toybuf, TT.len); + TT.bc = TT.bc % 16 + TT.len; + sl = 0; + if (TT.pos + TT.bc == TT.s+TT.n || TT.fn == toys.optc || TT.bc == 16) { + if (!FLAG(C) && !FLAG(c)) { + printf("%07llx", TT.ppos); + adv = FLAG(b) ? 1 : 2; + for (i = 0; i < TT.bc; i += adv) { + block = (FLAG(b) || i == TT.bc-1) + ? TT.linebuf[i] : (TT.linebuf[i] | TT.linebuf[i+1] << 8); + printf(TT.fmt, block); + } + } else if (FLAG(C)) { + printf("%08llx", TT.ppos); + for (i = 0; i < 16; i++) { + if (!(i % 8)) putchar(' '); + if (i < TT.bc) printf(" %02x", TT.linebuf[i]); + else printf(" "); + } + printf(" |"); + for (i = 0; i < TT.bc; i++) { + if (TT.linebuf[i] < ' ' || TT.linebuf[i] > '~') putchar('.'); + else putchar(TT.linebuf[i]); + } + putchar('|'); + } else { + printf("%07llx", TT.ppos); + for (i = 0; i < TT.bc; i++) { + if (TT.linebuf[i] >= ' ' && TT.linebuf[i] <= '~') + printf("%4c", TT.linebuf[i]); + else printf("%4s", make_printable(TT.linebuf[i])); + } + } + putchar('\n'); + TT.ppos += TT.bc; + } + } + + if (TT.len < 0) perror_exit("read"); +} + +void hexdump_main(void) { + TT.fn = 0; + if FLAG(b) TT.fmt = " %03o"; + else if FLAG(d) TT.fmt = " %05d"; + else if FLAG(o) TT.fmt = " %06o"; + else TT.fmt = " %04x"; + + loopfiles(toys.optargs, do_hexdump); + FLAG(C) ? printf("%08llx\n", TT.pos) : printf("%07llx\n", TT.pos); +} -- 2.39.2