annotate toys/pending/compress.c @ 1200:de10c8142fce draft

Not buying it, eh?
author Rob Landley <rob@landley.net>
date Mon, 10 Feb 2014 08:30:05 -0600
parents ff8d73e78089
children 9adf66c7dd4e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
1 /* compress.c - deflate/inflate code for zip, gzip, zlib, and raw
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
2 *
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
3 * Copyright 2014 Rob Landley <rob@landley.net>
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
4 *
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
5 * The inflate/deflate code lives here, so the various things that use it
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
6 * either live here or call these commands to pipe data through them.
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
7 *
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
8 * Divergence from posix: replace obsolete "compress" with mutiplexer.
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
9 *
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
10 * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
11 * LSB 4.1 has gzip, gunzip, and zcat
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
12 * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
13
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
14 // Accept many different kinds of command line argument:
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
15
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
16 USE_COMPRESS(NEWTOY(compress, "zglrcd9[-cd][!zglr]", TOYFLAG_USR|TOYFLAG_BIN))
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
17
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
18 //zip unzip gzip gunzip zcat
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
19
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
20 config COMPRESS
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
21 bool "compress"
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
22 default n
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
23 help
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
24 usage: compress [-zglrcd9] [FILE]
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
25
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
26 Compress or decompress file (or stdin) using "deflate" algorithm.
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
27
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
28 -c compress
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
29 -d decompress
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
30 -g gzip
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
31 -l zlib
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
32 -r raw (default)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
33 -z zip
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
34 */
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
35
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
36 #define FOR_compress
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
37 #include "toys.h"
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
38
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
39 GLOBALS(
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
40 // base offset and extra bits tables (length and distance)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
41 char lenbits[29], distbits[30];
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
42 unsigned short lenbase[29], distbase[30];
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
43
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
44 unsigned (*crc)(char *data, int len);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
45
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
46 char *outbuf;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
47 unsigned outlen;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
48 int outfd;
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
49 )
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
50
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
51 // little endian bit buffer
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
52 struct bitbuf {
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
53 int fd, bitpos, len, max;
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
54 char buf[];
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
55 };
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
56
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
57 // malloc a struct bitbuf
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
58 struct bitbuf *bitbuf_init(int fd, int size)
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
59 {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
60 struct bitbuf *bb = xmalloc(sizeof(struct bitbuf)+size);
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
61
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
62 memset(bb, 0, sizeof(struct bitbuf));
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
63 bb->max = size;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
64 bb->fd = fd;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
65
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
66 return bb;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
67 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
68
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
69 // Advance bitpos without the overhead of recording bits
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
70 void bitbuf_skip(struct bitbuf *bb, int bits)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
71 {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
72 int pos = bb->bitpos + bits, len = bb->len << 3;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
73
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
74 while (pos >= len) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
75 pos -= len;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
76 len = (bb->len = read(bb->fd, bb->buf, bb->max)) << 3;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
77 if (bb->len < 1) perror_exit("inflate EOF");
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
78 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
79 bb->bitpos = pos;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
80 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
81
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
82 // Optimized single bit inlined version
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
83 static inline int bitbuf_bit(struct bitbuf *bb)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
84 {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
85 int bufpos = bb->bitpos>>3;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
86
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
87 if (bufpos == bb->len) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
88 bitbuf_skip(bb, 0);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
89 bufpos = 0;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
90 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
91
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
92 return (bb->buf[bufpos]>>(bb->bitpos++&7))&1;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
93 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
94
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
95 // Fetch the next X bits from the bitbuf, little endian
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
96 int bitbuf_get(struct bitbuf *bb, int bits)
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
97 {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
98 int result = 0, offset = 0;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
99
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
100 while (bits) {
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
101 int click = bb->bitpos >> 3, blow, blen;
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
102
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
103 // Load more data if buffer empty
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
104 if (click == bb->len) bitbuf_skip(bb, 0);
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
105
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
106 // grab bits from next byte
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
107 blow = bb->bitpos & 7;
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
108 blen = 8-blow;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
109 if (blen > bits) blen = bits;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
110 result |= ((bb->buf[click] >> blow) & ((1<<blen)-1)) << offset;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
111 offset += blen;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
112 bits -= blen;
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
113 bb->bitpos += blen;
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
114 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
115
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
116 return result;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
117 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
118
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
119 static void outbuf_crc(char *buf, int len)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
120 {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
121 if (TT.crc) TT.crc(buf, len);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
122 xwrite(TT.outfd, buf, len);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
123 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
124
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
125 // Huffman coding uses bits to traverse a binary tree to a leaf node,
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
126 // By placing frequently occurring symbols at shorter paths, frequently
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
127 // used symbols may be represented in fewer bits than uncommon symbols.
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
128
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
129 struct huff {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
130 unsigned short length[16];
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
131 unsigned short symbol[288];
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
132 };
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
133
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
134 // Create simple huffman tree from array of bit lengths.
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
135
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
136 // The symbols in deflate's huffman trees are sorted (first by bit length
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
137 // of the code to reach them, then by symbol number). This means that given
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
138 // the bit length of each symbol, we can construct a unique tree.
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
139 static void len2huff(struct huff *huff, char bitlen[], int len)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
140 {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
141 int offset[16];
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
142 int i, sum;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
143
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
144 // Count number of codes at each bit length
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
145 memset(huff, 0, sizeof(struct huff));
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
146 for (i = 0; i<len; i++) huff->length[bitlen[i]]++;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
147
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
148 // Sort symbols by bit length. (They'll remain sorted by symbol within that.)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
149 *huff->length = *offset = 0;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
150 for (i = sum = 0; i<16; i++) offset[i] = offset[i-1] + huff->length[i];
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
151 for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
152 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
153
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
154 // Fetch and decode next huffman coded symbol from bitbuf.
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
155 // This takes advantage of the the sorting to navigate the tree as an array:
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
156 // each time we fetch a bit we have all the codes at that bit level in
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
157 // order with no gaps..
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
158 static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
159 {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
160 unsigned short *length = huff->length;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
161 int start = 0, offset = 0;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
162
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
163 // Traverse through the bit lengths until our code is in this range
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
164 for (;;) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
165 offset = (offset << 1) | bitbuf_bit(bb);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
166 start += *++length;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
167 if ((offset -= *length) < 0) break;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
168 // Note to self: what prevents overflow?
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
169 // If we ensure ranges add up to sizeof(symbol), does that ensure all codes
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
170 // terminate within table? Patholotical is 11111111...
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
171 // if (length - huff_length > 16) error_exit("bad");
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
172 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
173
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
174 return huff->symbol[start + offset];
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
175 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
176
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
177 // Decompress deflated data from bitbuf to filehandle.
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
178 static void inflate(struct bitbuf *bb)
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
179 {
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
180 // repeat until spanked
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
181 for (;;) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
182 int final, type;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
183
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
184 final = bitbuf_get(bb, 1);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
185 type = bitbuf_get(bb, 2);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
186 fprintf(stderr, "final=%d type=%d\n", final, type);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
187
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
188 if (type == 3) error_exit("bad type");
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
189
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
190 // no compression?
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
191 if (!type) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
192 int len, nlen;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
193
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
194 // Align to byte, read length
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
195 bitbuf_skip(bb, bb->bitpos & 7);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
196 len = bitbuf_get(bb, 16);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
197 nlen = bitbuf_get(bb, 16);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
198 if (len != (0xffff & ~nlen)) error_exit("bad len");
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
199
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
200 // Dump output data
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
201 while (len) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
202 int pos = bb->bitpos >> 3, bblen = bb->len - pos;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
203
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
204 if (bblen > len) bblen = len;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
205 if (bblen) outbuf_crc(bb->buf+pos, bblen);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
206 len -= bblen;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
207 bitbuf_skip(bb, bblen << 3);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
208 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
209
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
210 // Compressed block
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
211 } else {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
212 // struct huff huff;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
213
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
214 // Dynamic huffman codes?
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
215 if (type == 2) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
216 struct huff hlo;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
217 int i, literal, distance, hufflen;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
218 char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b"
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
219 "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
220
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
221 // The huffman trees are stored as a series of bit lengths
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
222 literal = bitbuf_get(bb, 5)+257; // max 288
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
223 distance = bitbuf_get(bb, 5)+1; // max 32
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
224 hufflen = bitbuf_get(bb, 4)+4; // max 19
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
225
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
226 // The literal and distance codes are themselves compressed, in
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
227 // a complicated way: an array of bit lengths (hufflen many
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
228 // entries, each 3 bits) is used to fill out an array of 19 entries
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
229 // in a magic order, leaving the rest 0. Then make a tree out of it:
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
230 memset(bits = toybuf+1, 0, 19);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
231 for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
232 len2huff(&hlo, bits, 19);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
233
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
234 // Use that tree to read in the literal and distance bit lengths
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
235 for (i = 0; i < literal + distance; i++) {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
236 int sym = huff_and_puff(bb, &hlo);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
237
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
238 // 0-15 are literals, 16 = repeat previous code 3-6 times,
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
239 // 17 = 3-10 zeroes, 18 = 11-138 zeroes
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
240 if (sym < 16) bits[i] = sym;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
241 else memset(bits+i, bits[i-1] * !(sym&3),
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
242 bitbuf_get(bb, (sym-14)+((sym&2)<<2)));
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
243 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
244 // Static huffman codes
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
245 } else {
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
246 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
247 }
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
248
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
249 if (final) return;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
250 }
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
251 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
252
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
253 static void init_deflate(void)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
254 {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
255 int i, n = 1;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
256
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
257 // Ye olde deflate window
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
258 TT.outbuf = xmalloc(32768);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
259
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
260 // Calculate lenbits, lenbase, distbits, distbase
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
261 *TT.lenbase = 3;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
262 for (i = 0; i<sizeof(TT.lenbits)-1; i++) {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
263 if (i>4) {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
264 if (!(i&3)) {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
265 TT.lenbits[i]++;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
266 n <<= 1;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
267 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
268 if (i == 27) n--;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
269 else TT.lenbits[i+1] = TT.lenbits[i];
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
270 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
271 TT.lenbase[i+1] = n + TT.lenbase[i];
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
272 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
273 n = 0;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
274 for (i = 0; i<sizeof(TT.distbits); i++) {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
275 TT.distbase[i] = 1<<n;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
276 if (i) TT.distbase[i] += TT.distbase[i-1];
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
277 if (i>3 && !(i&1)) n++;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
278 TT.distbits[i] = n;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
279 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
280 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
281
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
282 // Return true/false whether we consumed a gzip header.
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
283 static int is_gzip(struct bitbuf *bb)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
284 {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
285 int flags;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
286
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
287 // Confirm signature
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
288 if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31)
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
289 return 0;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
290 bitbuf_skip(bb, 6*8);
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
291
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
292 // Skip extra, name, comment, header CRC fields
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
293 if (flags & 4) bitbuf_skip(bb, 16);
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
294 if (flags & 8) while (bitbuf_get(bb, 8));
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
295 if (flags & 16) while (bitbuf_get(bb, 8));
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
296 if (flags & 2) bitbuf_skip(bb, 16);
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
297
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
298 return 1;
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
299 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
300
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
301 static void do_gzip(int fd, char *name)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
302 {
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
303 struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf));
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
304
1200
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
305 if (!is_gzip(bb)) error_exit("not gzip");
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
306 TT.outfd = 1;
de10c8142fce Not buying it, eh?
Rob Landley <rob@landley.net>
parents: 1199
diff changeset
307 inflate(bb);
1199
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
308
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
309 // tail: crc32, len32
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
310
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
311 free(bb);
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
312 }
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
313
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
314 // Parse many different kinds of command line argument:
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
315
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
316 void compress_main(void)
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
317 {
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
318 init_deflate();
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
319
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
320 loopfiles(toys.optargs, do_gzip);
ff8d73e78089 Nothing to see here, move along.
Rob Landley <rob@landley.net>
parents:
diff changeset
321 }