1199
|
1 /* compress.c - deflate/inflate code for zip, gzip, zlib, and raw
|
|
2 *
|
|
3 * Copyright 2014 Rob Landley <rob@landley.net>
|
|
4 *
|
|
5 * The inflate/deflate code lives here, so the various things that use it
|
|
6 * either live here or call these commands to pipe data through them.
|
|
7 *
|
|
8 * Divergence from posix: replace obsolete "compress" with mutiplexer.
|
|
9 *
|
|
10 * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip)
|
|
11 * LSB 4.1 has gzip, gunzip, and zcat
|
|
12 * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout
|
|
13
|
|
14 // Accept many different kinds of command line argument:
|
|
15
|
|
16 USE_COMPRESS(NEWTOY(compress, "zglrcd9[-cd][!zglr]", TOYFLAG_USR|TOYFLAG_BIN))
|
|
17
|
|
18 //zip unzip gzip gunzip zcat
|
|
19
|
|
20 config COMPRESS
|
|
21 bool "compress"
|
|
22 default n
|
|
23 help
|
|
24 usage: compress [-zglrcd9] [FILE]
|
|
25
|
|
26 Compress or decompress file (or stdin) using "deflate" algorithm.
|
|
27
|
|
28 -c compress
|
|
29 -d decompress
|
|
30 -g gzip
|
|
31 -l zlib
|
|
32 -r raw (default)
|
|
33 -z zip
|
|
34 */
|
|
35
|
|
36 #define FOR_compress
|
|
37 #include "toys.h"
|
|
38
|
|
39 GLOBALS(
|
|
40 // base offset and extra bits tables (length and distance)
|
|
41 char lenbits[29], distbits[30];
|
|
42 unsigned short lenbase[29], distbase[30];
|
1200
|
43
|
|
44 unsigned (*crc)(char *data, int len);
|
|
45
|
|
46 char *outbuf;
|
|
47 unsigned outlen;
|
|
48 int outfd;
|
1199
|
49 )
|
|
50
|
|
51 // little endian bit buffer
|
|
52 struct bitbuf {
|
1200
|
53 int fd, bitpos, len, max;
|
1199
|
54 char buf[];
|
|
55 };
|
|
56
|
1200
|
57 // malloc a struct bitbuf
|
|
58 struct bitbuf *bitbuf_init(int fd, int size)
|
1199
|
59 {
|
|
60 struct bitbuf *bb = xmalloc(sizeof(struct bitbuf)+size);
|
|
61
|
|
62 memset(bb, 0, sizeof(struct bitbuf));
|
|
63 bb->max = size;
|
|
64 bb->fd = fd;
|
|
65
|
|
66 return bb;
|
|
67 }
|
|
68
|
1200
|
69 // Advance bitpos without the overhead of recording bits
|
|
70 void bitbuf_skip(struct bitbuf *bb, int bits)
|
|
71 {
|
|
72 int pos = bb->bitpos + bits, len = bb->len << 3;
|
|
73
|
|
74 while (pos >= len) {
|
|
75 pos -= len;
|
|
76 len = (bb->len = read(bb->fd, bb->buf, bb->max)) << 3;
|
|
77 if (bb->len < 1) perror_exit("inflate EOF");
|
|
78 }
|
|
79 bb->bitpos = pos;
|
|
80 }
|
|
81
|
|
82 // Optimized single bit inlined version
|
|
83 static inline int bitbuf_bit(struct bitbuf *bb)
|
|
84 {
|
|
85 int bufpos = bb->bitpos>>3;
|
|
86
|
|
87 if (bufpos == bb->len) {
|
|
88 bitbuf_skip(bb, 0);
|
|
89 bufpos = 0;
|
|
90 }
|
|
91
|
|
92 return (bb->buf[bufpos]>>(bb->bitpos++&7))&1;
|
|
93 }
|
|
94
|
|
95 // Fetch the next X bits from the bitbuf, little endian
|
|
96 int bitbuf_get(struct bitbuf *bb, int bits)
|
1199
|
97 {
|
|
98 int result = 0, offset = 0;
|
|
99
|
|
100 while (bits) {
|
1200
|
101 int click = bb->bitpos >> 3, blow, blen;
|
1199
|
102
|
|
103 // Load more data if buffer empty
|
1200
|
104 if (click == bb->len) bitbuf_skip(bb, 0);
|
1199
|
105
|
|
106 // grab bits from next byte
|
1200
|
107 blow = bb->bitpos & 7;
|
1199
|
108 blen = 8-blow;
|
|
109 if (blen > bits) blen = bits;
|
|
110 result |= ((bb->buf[click] >> blow) & ((1<<blen)-1)) << offset;
|
|
111 offset += blen;
|
|
112 bits -= blen;
|
1200
|
113 bb->bitpos += blen;
|
1199
|
114 }
|
|
115
|
|
116 return result;
|
|
117 }
|
|
118
|
1200
|
119 static void outbuf_crc(char *buf, int len)
|
|
120 {
|
|
121 if (TT.crc) TT.crc(buf, len);
|
|
122 xwrite(TT.outfd, buf, len);
|
|
123 }
|
|
124
|
|
125 // Huffman coding uses bits to traverse a binary tree to a leaf node,
|
|
126 // By placing frequently occurring symbols at shorter paths, frequently
|
|
127 // used symbols may be represented in fewer bits than uncommon symbols.
|
|
128
|
|
129 struct huff {
|
|
130 unsigned short length[16];
|
|
131 unsigned short symbol[288];
|
|
132 };
|
|
133
|
|
134 // Create simple huffman tree from array of bit lengths.
|
|
135
|
|
136 // The symbols in deflate's huffman trees are sorted (first by bit length
|
|
137 // of the code to reach them, then by symbol number). This means that given
|
|
138 // the bit length of each symbol, we can construct a unique tree.
|
|
139 static void len2huff(struct huff *huff, char bitlen[], int len)
|
|
140 {
|
|
141 int offset[16];
|
|
142 int i, sum;
|
|
143
|
|
144 // Count number of codes at each bit length
|
|
145 memset(huff, 0, sizeof(struct huff));
|
|
146 for (i = 0; i<len; i++) huff->length[bitlen[i]]++;
|
|
147
|
|
148 // Sort symbols by bit length. (They'll remain sorted by symbol within that.)
|
|
149 *huff->length = *offset = 0;
|
|
150 for (i = sum = 0; i<16; i++) offset[i] = offset[i-1] + huff->length[i];
|
|
151 for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
|
|
152 }
|
|
153
|
|
154 // Fetch and decode next huffman coded symbol from bitbuf.
|
|
155 // This takes advantage of the the sorting to navigate the tree as an array:
|
|
156 // each time we fetch a bit we have all the codes at that bit level in
|
|
157 // order with no gaps..
|
|
158 static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff)
|
|
159 {
|
|
160 unsigned short *length = huff->length;
|
|
161 int start = 0, offset = 0;
|
|
162
|
|
163 // Traverse through the bit lengths until our code is in this range
|
|
164 for (;;) {
|
|
165 offset = (offset << 1) | bitbuf_bit(bb);
|
|
166 start += *++length;
|
|
167 if ((offset -= *length) < 0) break;
|
|
168 // Note to self: what prevents overflow?
|
|
169 // If we ensure ranges add up to sizeof(symbol), does that ensure all codes
|
|
170 // terminate within table? Patholotical is 11111111...
|
|
171 // if (length - huff_length > 16) error_exit("bad");
|
|
172 }
|
|
173
|
|
174 return huff->symbol[start + offset];
|
|
175 }
|
|
176
|
|
177 // Decompress deflated data from bitbuf to filehandle.
|
|
178 static void inflate(struct bitbuf *bb)
|
1199
|
179 {
|
1200
|
180 // repeat until spanked
|
|
181 for (;;) {
|
|
182 int final, type;
|
|
183
|
|
184 final = bitbuf_get(bb, 1);
|
|
185 type = bitbuf_get(bb, 2);
|
|
186 fprintf(stderr, "final=%d type=%d\n", final, type);
|
|
187
|
|
188 if (type == 3) error_exit("bad type");
|
|
189
|
|
190 // no compression?
|
|
191 if (!type) {
|
|
192 int len, nlen;
|
|
193
|
|
194 // Align to byte, read length
|
|
195 bitbuf_skip(bb, bb->bitpos & 7);
|
|
196 len = bitbuf_get(bb, 16);
|
|
197 nlen = bitbuf_get(bb, 16);
|
|
198 if (len != (0xffff & ~nlen)) error_exit("bad len");
|
|
199
|
|
200 // Dump output data
|
|
201 while (len) {
|
|
202 int pos = bb->bitpos >> 3, bblen = bb->len - pos;
|
|
203
|
|
204 if (bblen > len) bblen = len;
|
|
205 if (bblen) outbuf_crc(bb->buf+pos, bblen);
|
|
206 len -= bblen;
|
|
207 bitbuf_skip(bb, bblen << 3);
|
|
208 }
|
|
209
|
|
210 // Compressed block
|
|
211 } else {
|
|
212 // struct huff huff;
|
|
213
|
|
214 // Dynamic huffman codes?
|
|
215 if (type == 2) {
|
|
216 struct huff hlo;
|
|
217 int i, literal, distance, hufflen;
|
|
218 char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b"
|
|
219 "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits;
|
|
220
|
|
221 // The huffman trees are stored as a series of bit lengths
|
|
222 literal = bitbuf_get(bb, 5)+257; // max 288
|
|
223 distance = bitbuf_get(bb, 5)+1; // max 32
|
|
224 hufflen = bitbuf_get(bb, 4)+4; // max 19
|
|
225
|
|
226 // The literal and distance codes are themselves compressed, in
|
|
227 // a complicated way: an array of bit lengths (hufflen many
|
|
228 // entries, each 3 bits) is used to fill out an array of 19 entries
|
|
229 // in a magic order, leaving the rest 0. Then make a tree out of it:
|
|
230 memset(bits = toybuf+1, 0, 19);
|
|
231 for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3);
|
|
232 len2huff(&hlo, bits, 19);
|
|
233
|
|
234 // Use that tree to read in the literal and distance bit lengths
|
|
235 for (i = 0; i < literal + distance; i++) {
|
|
236 int sym = huff_and_puff(bb, &hlo);
|
|
237
|
|
238 // 0-15 are literals, 16 = repeat previous code 3-6 times,
|
|
239 // 17 = 3-10 zeroes, 18 = 11-138 zeroes
|
|
240 if (sym < 16) bits[i] = sym;
|
|
241 else memset(bits+i, bits[i-1] * !(sym&3),
|
|
242 bitbuf_get(bb, (sym-14)+((sym&2)<<2)));
|
|
243 }
|
|
244 // Static huffman codes
|
|
245 } else {
|
|
246 }
|
|
247 }
|
|
248
|
|
249 if (final) return;
|
|
250 }
|
1199
|
251 }
|
|
252
|
|
253 static void init_deflate(void)
|
|
254 {
|
|
255 int i, n = 1;
|
|
256
|
1200
|
257 // Ye olde deflate window
|
|
258 TT.outbuf = xmalloc(32768);
|
|
259
|
1199
|
260 // Calculate lenbits, lenbase, distbits, distbase
|
|
261 *TT.lenbase = 3;
|
|
262 for (i = 0; i<sizeof(TT.lenbits)-1; i++) {
|
|
263 if (i>4) {
|
|
264 if (!(i&3)) {
|
|
265 TT.lenbits[i]++;
|
|
266 n <<= 1;
|
|
267 }
|
|
268 if (i == 27) n--;
|
|
269 else TT.lenbits[i+1] = TT.lenbits[i];
|
|
270 }
|
|
271 TT.lenbase[i+1] = n + TT.lenbase[i];
|
|
272 }
|
|
273 n = 0;
|
|
274 for (i = 0; i<sizeof(TT.distbits); i++) {
|
|
275 TT.distbase[i] = 1<<n;
|
|
276 if (i) TT.distbase[i] += TT.distbase[i-1];
|
|
277 if (i>3 && !(i&1)) n++;
|
|
278 TT.distbits[i] = n;
|
|
279 }
|
|
280 }
|
|
281
|
|
282 // Return true/false whether we consumed a gzip header.
|
|
283 static int is_gzip(struct bitbuf *bb)
|
|
284 {
|
|
285 int flags;
|
|
286
|
|
287 // Confirm signature
|
1200
|
288 if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31)
|
|
289 return 0;
|
|
290 bitbuf_skip(bb, 6*8);
|
1199
|
291
|
|
292 // Skip extra, name, comment, header CRC fields
|
1200
|
293 if (flags & 4) bitbuf_skip(bb, 16);
|
|
294 if (flags & 8) while (bitbuf_get(bb, 8));
|
|
295 if (flags & 16) while (bitbuf_get(bb, 8));
|
|
296 if (flags & 2) bitbuf_skip(bb, 16);
|
1199
|
297
|
|
298 return 1;
|
|
299 }
|
|
300
|
|
301 static void do_gzip(int fd, char *name)
|
|
302 {
|
1200
|
303 struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf));
|
1199
|
304
|
1200
|
305 if (!is_gzip(bb)) error_exit("not gzip");
|
|
306 TT.outfd = 1;
|
|
307 inflate(bb);
|
1199
|
308
|
|
309 // tail: crc32, len32
|
|
310
|
|
311 free(bb);
|
|
312 }
|
|
313
|
|
314 // Parse many different kinds of command line argument:
|
|
315
|
|
316 void compress_main(void)
|
|
317 {
|
|
318 init_deflate();
|
|
319
|
|
320 loopfiles(toys.optargs, do_gzip);
|
|
321 }
|