changeset 1201:9adf66c7dd4e draft

Ok, _maybe_ I'm rewriting deflate from scratch rather than cleaning up the existing one, but you can't prove it. I plead the fifth, third, twelvefth, twentieth, twenty-first, twenth-fith, and twenty-seventh.
author Rob Landley <rob@landley.net>
date Thu, 13 Feb 2014 06:45:35 -0600
parents de10c8142fce
children 4f080cdb2f6e
files toys/pending/compress.c
diffstat 1 files changed, 66 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/toys/pending/compress.c	Mon Feb 10 08:30:05 2014 -0600
+++ b/toys/pending/compress.c	Thu Feb 13 06:45:35 2014 -0600
@@ -41,7 +41,8 @@
   char lenbits[29], distbits[30];
   unsigned short lenbase[29], distbase[30];
 
-  unsigned (*crc)(char *data, int len);
+  unsigned (*crcfunc)(char *data, int len);
+  unsigned crc;
 
   char *outbuf;
   unsigned outlen;
@@ -116,10 +117,14 @@
   return result;
 }
 
-static void outbuf_crc(char *buf, int len)
+static void outbuf_crc(char sym)
 {
-  if (TT.crc) TT.crc(buf, len);
-  xwrite(TT.outfd, buf, len);
+  TT.outbuf[TT.outlen++ & 32767] = sym;
+
+  if (!(TT.outlen & 32767)) {
+    xwrite(TT.outfd, TT.outbuf, 32768);
+    if (TT.crcfunc) TT.crcfunc(0, 32768);
+  }
 }
 
 // Huffman coding uses bits to traverse a binary tree to a leaf node,
@@ -139,7 +144,7 @@
 static void len2huff(struct huff *huff, char bitlen[], int len)
 {
   int offset[16];
-  int i, sum;
+  int i;
 
   // Count number of codes at each bit length
   memset(huff, 0, sizeof(struct huff));
@@ -147,7 +152,8 @@
 
   // Sort symbols by bit length. (They'll remain sorted by symbol within that.)
   *huff->length = *offset = 0;
-  for (i = sum = 0; i<16; i++) offset[i] = offset[i-1] + huff->length[i];
+  for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1];
+
   for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
 }
 
@@ -165,10 +171,7 @@
     offset = (offset << 1) | bitbuf_bit(bb);
     start += *++length;
     if ((offset -= *length) < 0) break;
-// Note to self: what prevents overflow?
-// If we ensure ranges add up to sizeof(symbol), does that ensure all codes
-// terminate within table? Patholotical is 11111111...
-//    if (length - huff_length > 16) error_exit("bad");
+    if ((length - huff->length) & 16) error_exit("bad symbol");
   }
 
   return huff->symbol[start + offset];
@@ -177,13 +180,18 @@
 // Decompress deflated data from bitbuf to filehandle.
 static void inflate(struct bitbuf *bb)
 {
+  struct huff *disthuff, *lithuff, *fixdisthuff = (struct huff *)(toybuf+2048),
+               *fixlithuff = (struct huff *)(toybuf+2560);
+
+//  len2huff(
+
+  TT.crc = ~0;
   // repeat until spanked
   for (;;) {
     int final, type;
 
     final = bitbuf_get(bb, 1);
     type = bitbuf_get(bb, 2);
-fprintf(stderr, "final=%d type=%d\n", final, type);
 
     if (type == 3) error_exit("bad type");
 
@@ -200,27 +208,29 @@
       // Dump output data
       while (len) {
         int pos = bb->bitpos >> 3, bblen = bb->len - pos;
+        char *p = bb->buf+pos;
 
+        // dump bytes until done or end of current bitbuf contents
         if (bblen > len) bblen = len;
-        if (bblen) outbuf_crc(bb->buf+pos, bblen);
+        pos = bblen;
+        while (pos--) outbuf_crc(*(p++));
+        bitbuf_skip(bb, bblen << 3);
         len -= bblen;
-        bitbuf_skip(bb, bblen << 3);
       }
 
     // Compressed block
     } else {
-//      struct huff huff;
 
       // Dynamic huffman codes?
       if (type == 2) {
-        struct huff hlo;
-        int i, literal, distance, hufflen;
+        struct huff *h2 = (struct huff *)(toybuf+512);
+        int i, litlen, distlen, hufflen;
         char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b"
                               "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits;
 
         // The huffman trees are stored as a series of bit lengths
-        literal = bitbuf_get(bb, 5)+257; // max 288
-        distance = bitbuf_get(bb, 5)+1;  // max 32
+        litlen = bitbuf_get(bb, 5)+257;  // max 288
+        distlen = bitbuf_get(bb, 5)+1;   // max 32
         hufflen = bitbuf_get(bb, 4)+4;   // max 19
 
         // The literal and distance codes are themselves compressed, in
@@ -229,25 +239,55 @@
         // in a magic order, leaving the rest 0. Then make a tree out of it:
         memset(bits = toybuf+1, 0, 19);
         for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3);
-        len2huff(&hlo, bits, 19);
+        len2huff(h2, bits, 19);
 
         // Use that tree to read in the literal and distance bit lengths
-        for (i = 0; i < literal + distance; i++) {
-          int sym = huff_and_puff(bb, &hlo);
+        for (i = 0; i < litlen + distlen;) {
+          int sym = huff_and_puff(bb, h2);
 
           // 0-15 are literals, 16 = repeat previous code 3-6 times,
-          // 17 = 3-10 zeroes, 18 = 11-138 zeroes
-          if (sym < 16) bits[i] = sym;
-          else memset(bits+i, bits[i-1] * !(sym&3),
-                      bitbuf_get(bb, (sym-14)+((sym&2)<<2)));
+          // 17 = 3-10 zeroes (3 bit), 18 = 11-138 zeroes (7 bit)
+          if (sym < 16) bits[i++] = sym;
+          else {
+            int len = sym & 2;
+
+            len = bitbuf_get(bb, sym-14+len+(len>>1)) + 3 + (len<<2);
+            memset(bits+i, bits[i-1] * !(sym&3), len);
+            i += len;
+          }
         }
+        if (i > litlen+distlen) error_exit("bad tree");
+
+        len2huff(lithuff = (struct huff *)(toybuf+1024), bits, litlen);
+        len2huff(disthuff = (struct huff *)(toybuf+1536), bits+litlen, distlen);
+
       // Static huffman codes
       } else {
+        lithuff = fixlithuff;
+        disthuff = fixdisthuff;
+error_exit("todo static huffman init");
+      }
+      for (;;) {
+        int sym = huff_and_puff(bb, lithuff);
+
+        if (sym < 256) outbuf_crc(sym);
+        else if (sym > 256) {
+          int len, dist;
+
+          sym -= 257;
+          len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]);
+          sym = huff_and_puff(bb, disthuff);
+          dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]);
+          sym = TT.outlen & 32767;
+
+          while (len--) outbuf_crc(TT.outbuf[(TT.outlen-dist) & 32767]);
+        } else break;
       }
     }
 
-    if (final) return;
+    if (final) break;
   }
+  if (TT.outlen & 32767) xwrite(TT.outfd, TT.outbuf, TT.outlen & 32767);
 }
 
 static void init_deflate(void)