From 94a0d2b7d7f8ead365b165f4abd4543b06e76782 Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Fri, 27 Jun 2025 13:21:01 -0500
Subject: [PATCH] xzcat cleanup: reorder to remove some function prototypes and
 predeclarations, yank unused xz_ret states (using xmalloc() to eliminate one
 and just ignore the other), and memeq() is just !memcmp()

---
 toys/pending/xzcat.c | 755 ++++++++++++++++++++-----------------------
 1 file changed, 346 insertions(+), 409 deletions(-)

diff --git a/toys/pending/xzcat.c b/toys/pending/xzcat.c
index e1dbfc6e..4feb7d02 100644
--- a/toys/pending/xzcat.c
+++ b/toys/pending/xzcat.c
@@ -1,5 +1,4 @@
-/* xzcat.c - Simple XZ decoder command line tool
- *
+/*
  * Author: Lasse Collin <xz@tukaani.org>
  *
  * This file has been put into the public domain.
@@ -25,20 +24,10 @@ config XZCAT
 // BEGIN xz.h
 
 enum xz_ret {
-  // Doing fine, More input or output space needed
-  XZ_OK,
-  // EOF, Everything went fine
-  XZ_STREAM_END,
-  // Integrity check type is not supported. Decoding is still possible in
-  // multi-call mode by simply calling xz_dec_run() again.  Note that this
-  // return value is used only if XZ_DEC_ANY_CHECK was defined at build
-  // time, which is not used in the kernel. Unsupported check types return
-  // XZ_OPTIONS_ERROR if XZ_DEC_ANY_CHECK was not defined at build time.
-  XZ_UNSUPPORTED_CHECK,
-  // Cant allocate memory
-  XZ_MEM_ERROR,
+  XZ_OK,			// Need more input
+  XZ_STREAM_END,		// Successful finish
   // OOM
-  XZ_MEMLIMIT_ERROR,
+  XZ_MEMLIMIT_ERROR,            // Dictionary too big
   // Not a xz file
   XZ_FORMAT_ERROR,
   // Compression option not available
@@ -55,6 +44,141 @@ enum xz_ret {
   // truncated or otherwise corrupt.
 };
 
+/*
+ * This enum is used to track which LZMA symbols have occurred most recently
+ * and in which order. This information is used to predict the next symbol.
+ *
+ * Symbols:
+ *  - Literal: One 8-bit byte
+ *  - Match: Repeat a chunk of data at some distance
+ *  - Long repeat: Multi-byte match at a recently seen distance
+ *  - Short repeat: One-byte repeat at a recently seen distance
+ *
+ * The symbol names are in from STATE_oldest_older_previous. REP means
+ * either short or long repeated match, and NONLIT means any non-literal.
+ */
+enum lzma_state {
+  STATE_LIT_LIT,
+  STATE_MATCH_LIT_LIT,
+  STATE_REP_LIT_LIT,
+  STATE_SHORTREP_LIT_LIT,
+  STATE_MATCH_LIT,
+  STATE_REP_LIT,
+  STATE_SHORTREP_LIT,
+  STATE_LIT_MATCH,
+  STATE_LIT_LONGREP,
+  STATE_LIT_SHORTREP,
+  STATE_NONLIT_MATCH,
+  STATE_NONLIT_REP
+};
+
+/* Total number of states */
+#define STATES 12
+
+/* The lowest 7 states indicate that the previous state was a literal. */
+#define LIT_STATES 7
+
+/* Each literal coder is divided in three sections:
+ *   - 0x001-0x0FF: Without match byte
+ *   - 0x101-0x1FF: With match byte; match bit is 0
+ *   - 0x201-0x2FF: With match byte; match bit is 1
+ *
+ * Match byte is used when the previous LZMA symbol was something else than
+ * a literal (that is, it was some kind of match).
+ */
+#define LITERAL_CODER_SIZE 0x300
+
+/* Maximum number of literal coders */
+#define LITERAL_CODERS_MAX (1 << 4)
+
+/* Minimum length of a match is two bytes. */
+#define MATCH_LEN_MIN 2
+
+/*
+ * Maximum number of position states. A position state is the lowest pb
+ * number of bits of the current uncompressed offset. In some places there
+ * are different sets of probabilities for different position states.
+ */
+#define POS_STATES_MAX (1 << 4)
+
+/* Match distances up to 127 are fully encoded using probabilities. Since
+ * the highest two bits (distance slot) are always encoded using six bits,
+ * the distances 0-3 don't need any additional bits to encode, since the
+ * distance slot itself is the same as the actual distance. DIST_MODEL_START
+ * indicates the first distance slot where at least one additional bit is
+ * needed.
+ */
+#define DIST_MODEL_START 4
+
+/*
+ * Match distances greater than 127 are encoded in three pieces:
+ *   - distance slot: the highest two bits
+ *   - direct bits: 2-26 bits below the highest two bits
+ *   - alignment bits: four lowest bits
+ *
+ * Direct bits don't use any probabilities.
+ *
+ * The distance slot value of 14 is for distances 128-191.
+ */
+#define DIST_MODEL_END 14
+
+/*
+ * Different sets of probabilities are used for match distances that have
+ * very short match length: Lengths of 2, 3, and 4 bytes have a separate
+ * set of probabilities for each length. The matches with longer length
+ * use a shared set of probabilities.
+ */
+#define DIST_STATES 4
+
+/*
+ * The highest two bits of a 32-bit match distance are encoded using six bits.
+ * This six-bit value is called a distance slot. This way encoding a 32-bit
+ * value takes 6-36 bits, larger values taking more bits.
+ */
+#define DIST_SLOTS		(1 << 6)
+
+/* Distance slots that indicate a distance <= 127. */
+#define FULL_DISTANCES (1 << (DIST_MODEL_END/2))
+
+/*
+ * For match distances greater than 127, only the highest two bits and the
+ * lowest four bits (alignment) is encoded using probabilities.
+ */
+#define ALIGN_BITS 4
+#define ALIGN_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_SIZE - 1)
+
+/* Total number of all probability variables */
+#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
+
+/*
+ * LZMA remembers the four most recent match distances. Reusing these
+ * distances tends to take less space than re-encoding the actual
+ * distance value.
+ */
+#define REPS 4
+
+/* Match length is encoded with 4, 5, or 10 bits.
+ *
+ * Length   Bits
+ *  2-9      4 = Choice=0 + 3 bits
+ * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
+ * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
+ */
+#define LEN_LOW_SYMBOLS (1 << 3)
+#define LEN_MID_SYMBOLS (1 << 3)
+#define LEN_HIGH_SYMBOLS (1 << 8)
+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
+
+/*
+ * Minimum number of usable input buffer to safely decode one LZMA symbol.
+ * The worst case is that we decode 22 bits using probabilities and 26
+ * direct bits. This may decode at maximum of 20 bytes of input. However,
+ * lzma_main() does an extra normalization before returning, thus we
+ * need to put 21 here.
+ */
+#define LZMA_IN_REQUIRED 21
+
 // Passing input and output buffers to XZ code
 // Only the contents of the output buffer from out[out_pos] onward, and
 // the variables in_pos and out_pos are modified by the XZ code.
@@ -71,6 +195,195 @@ struct xz_buf {
   size_t out_size;
 };
 
+struct xz_dec_lzma2 {
+  /*
+   * The order below is important on x86 to reduce code size and
+   * it shouldn't hurt on other platforms. Everything up to and
+   * including lzma.pos_mask are in the first 128 bytes on x86-32,
+   * which allows using smaller instructions to access those
+   * variables. On x86-64, fewer variables fit into the first 128
+   * bytes, but this is still the best order without sacrificing
+   * the readability by splitting the structures.
+   */
+  // range decoder
+  struct rc_dec {
+    unsigned range, code;
+
+    // Number of initializing bytes remaining to be read by rc_read_init().
+    unsigned init_bytes_left;
+
+   // Input buffer: either temp.buf or the caller-provided input buffer.
+    const char *in;
+    size_t in_pos, in_limit;
+  } rc;
+
+  /*
+   * Dictionary (history buffer)
+   *
+   * These are always true:
+   *    start <= pos <= full <= end
+   *    pos <= limit <= end
+   *    end == size
+   *    size <= size_max
+   *    allocated <= size
+   *
+   * Most of these variables are size_t as a relic of single-call mode,
+   * in which the dictionary variables address the actual output
+   * buffer directly.
+   */
+  struct dictionary {
+    // Beginning of the history buffer
+    char *buf;
+    // Old position in buf (before decoding more data)
+    size_t start;
+    // Position in buf
+    size_t pos;
+    // How full dictionary is. This is used to detect corrupt input that
+    // would read beyond the beginning of the uncompressed stream.
+    size_t full;
+    /* Write limit; we don't write to buf[limit] or later bytes. */
+    size_t limit;
+    // End of the dictionary buffer. This is the same as the dictionary size.
+    size_t end;
+    // Size of the dictionary as specified in Block Header. This is used
+    // together with "full" to detect corrupt input that would make us
+    // read beyond the beginning of the uncompressed stream.
+    unsigned size;
+    // Maximum allowed dictionary size.
+    unsigned size_max;
+    // Amount of memory currently allocated for the dictionary.
+    unsigned allocated;
+  } dict;
+
+  struct lzma2_dec {
+    /* Position in xz_dec_lzma2_run(). */
+    enum lzma2_seq {
+      SEQ_CONTROL,
+      SEQ_UNCOMPRESSED_1,
+      SEQ_UNCOMPRESSED_2,
+      SEQ_COMPRESSED_0,
+      SEQ_COMPRESSED_1,
+      SEQ_PROPERTIES,
+      SEQ_LZMA_PREPARE,
+      SEQ_LZMA_RUN,
+      SEQ_COPY
+    } sequence;
+
+    /* Next position after decoding the compressed size of the chunk. */
+    enum lzma2_seq next_sequence;
+
+    /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
+    unsigned uncompressed;
+
+    /*
+     * Compressed size of LZMA chunk or compressed/uncompressed
+     * size of uncompressed chunk (64 KiB at maximum)
+     */
+    unsigned compressed;
+
+    /*
+     * True if dictionary reset is needed. This is false before
+     * the first chunk (LZMA or uncompressed).
+     */
+    int need_dict_reset;
+
+    /*
+     * True if new LZMA properties are needed. This is false
+     * before the first LZMA chunk.
+     */
+    int need_props;
+  } lzma2;
+  struct lzma_dec {
+    /* Distances of latest four matches */
+    unsigned rep0, rep1, rep2, rep3;
+
+    /* Types of the most recently seen LZMA symbols */
+    enum lzma_state state;
+
+    // Length of a match, so dict_repeat can finish repeating the whole match.
+    unsigned len;
+
+    /*
+     * LZMA properties or related bit masks (number of literal
+     * context bits, a mask dervied from the number of literal
+     * position bits, and a mask dervied from the number
+     * position bits)
+     */
+    unsigned lc, literal_pos_mask, pos_mask;
+
+    // If 1, it's a match. Otherwise it's a single 8-bit literal.
+    uint16_t is_match[STATES][POS_STATES_MAX];
+
+    // If 1, it's a repeated match. The distance is one of rep0 .. rep3.
+    uint16_t is_rep[STATES];
+
+    // If 0, distance of a repeated match is rep0, otherwise check is_rep1.
+    uint16_t is_rep0[STATES];
+
+    // If 0, distance of a repeated match is rep1, otherwise check is_rep2.
+    uint16_t is_rep1[STATES];
+
+    // If 0, distance of a repeated match is rep2. Otherwise it is rep3.
+    uint16_t is_rep2[STATES];
+
+    /*
+     * If 1, the repeated match has length of one byte. Otherwise
+     * the length is decoded from rep_len_decoder.
+     */
+    uint16_t is_rep0_long[STATES][POS_STATES_MAX];
+
+    /*
+     * Probability tree for the highest two bits of the match
+     * distance. There is a separate probability tree for match
+     * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
+     */
+    uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
+
+    /*
+     * Probility trees for additional bits for match distance
+     * when the distance is in the range [4, 127].
+     */
+    uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
+
+    /*
+     * Probability tree for the lowest four bits of a match
+     * distance that is equal to or greater than 128.
+     */
+    uint16_t dist_align[ALIGN_SIZE];
+
+    /* Probabilities for a length decoder. */
+    struct lzma_len_dec {
+      /* Probability of match length being at least 10 */
+      uint16_t choice;
+
+      /* Probability of match length being at least 18 */
+      uint16_t choice2;
+
+      /* Probabilities for match lengths 2-9 */
+      uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
+
+      /* Probabilities for match lengths 10-17 */
+      uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
+
+      /* Probabilities for match lengths 18-273 */
+      uint16_t high[LEN_HIGH_SYMBOLS];
+    // Length of a normal or repeated match
+    } match_len_dec, rep_len_dec;
+
+    /* Probabilities of literals */
+    uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
+  } lzma;
+
+  /*
+   * Temporary buffer which holds small number of input bytes between
+   * decoder calls. See lzma2_lzma() for details.
+   */
+  struct {
+    unsigned size;
+    char buf[3 * LZMA_IN_REQUIRED];
+  } temp;
+};
+
 // Opaque type to hold the XZ decoder state
 struct xz_dec;
 
@@ -97,8 +410,6 @@ static uint64_t xz_crc64_table[256];
 // END xz.h
 // BEGIN xz_private.h
 
-#define memeq(a, b, size) (!memcmp(a, b, size))
-
 /* Inline functions to access unaligned unsigned 32-bit integers */
 static unsigned get_unaligned_le32(const char *buf)
 {
@@ -132,17 +443,6 @@ static void put_unaligned_be32(unsigned val, char *buf)
   buf[3] = (char)val;
 }
 
-// Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
-// before calling xz_dec_lzma2_run().
-struct xz_dec_lzma2 *xz_dec_lzma2_create(unsigned dict_max);
-
-// Decode the LZMA2 properties (one byte) and reset the decoder. Return
-// XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
-// big enough, and XZ_OPTIONS_ERROR if props indicates something that this
-// decoder doesn't support.
-enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
-           char props);
-
 /* Decode raw LZMA2 stream from b->in to b->out. */
 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
                struct xz_buf *b);
@@ -711,47 +1011,6 @@ enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, char id)
 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
 #define RC_MOVE_BITS 5
 
-/*
- * Maximum number of position states. A position state is the lowest pb
- * number of bits of the current uncompressed offset. In some places there
- * are different sets of probabilities for different position states.
- */
-#define POS_STATES_MAX (1 << 4)
-
-/*
- * This enum is used to track which LZMA symbols have occurred most recently
- * and in which order. This information is used to predict the next symbol.
- *
- * Symbols:
- *  - Literal: One 8-bit byte
- *  - Match: Repeat a chunk of data at some distance
- *  - Long repeat: Multi-byte match at a recently seen distance
- *  - Short repeat: One-byte repeat at a recently seen distance
- *
- * The symbol names are in from STATE_oldest_older_previous. REP means
- * either short or long repeated match, and NONLIT means any non-literal.
- */
-enum lzma_state {
-  STATE_LIT_LIT,
-  STATE_MATCH_LIT_LIT,
-  STATE_REP_LIT_LIT,
-  STATE_SHORTREP_LIT_LIT,
-  STATE_MATCH_LIT,
-  STATE_REP_LIT,
-  STATE_SHORTREP_LIT,
-  STATE_LIT_MATCH,
-  STATE_LIT_LONGREP,
-  STATE_LIT_SHORTREP,
-  STATE_NONLIT_MATCH,
-  STATE_NONLIT_REP
-};
-
-/* Total number of states */
-#define STATES 12
-
-/* The lowest 7 states indicate that the previous state was a literal. */
-#define LIT_STATES 7
-
 /* Indicate that the latest symbol was a literal. */
 static void lzma_state_literal(enum lzma_state *state)
 {
@@ -781,50 +1040,11 @@ static void lzma_state_short_rep(enum lzma_state *state)
   *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
 }
 
-/* Each literal coder is divided in three sections:
- *   - 0x001-0x0FF: Without match byte
- *   - 0x101-0x1FF: With match byte; match bit is 0
- *   - 0x201-0x2FF: With match byte; match bit is 1
- *
- * Match byte is used when the previous LZMA symbol was something else than
- * a literal (that is, it was some kind of match).
- */
-#define LITERAL_CODER_SIZE 0x300
-
-/* Maximum number of literal coders */
-#define LITERAL_CODERS_MAX (1 << 4)
-
-/* Minimum length of a match is two bytes. */
-#define MATCH_LEN_MIN 2
-
-/* Match length is encoded with 4, 5, or 10 bits.
- *
- * Length   Bits
- *  2-9      4 = Choice=0 + 3 bits
- * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
- * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
- */
-#define LEN_LOW_BITS 3
-#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
-#define LEN_MID_BITS 3
-#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
-#define LEN_HIGH_BITS 8
-#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
-#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
-
 /*
  * Maximum length of a match is 273 which is a result of the encoding
  * described above.
  */
-#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
-
-/*
- * Different sets of probabilities are used for match distances that have
- * very short match length: Lengths of 2, 3, and 4 bytes have a separate
- * set of probabilities for each length. The matches with longer length
- * use a shared set of probabilities.
- */
-#define DIST_STATES 4
+// #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
 
 /*
  * Get the index of the appropriate probability array for decoding
@@ -836,47 +1056,6 @@ static unsigned lzma_get_dist_state(unsigned len)
       ? len - MATCH_LEN_MIN : DIST_STATES - 1;
 }
 
-/*
- * The highest two bits of a 32-bit match distance are encoded using six bits.
- * This six-bit value is called a distance slot. This way encoding a 32-bit
- * value takes 6-36 bits, larger values taking more bits.
- */
-#define DIST_SLOT_BITS 6
-#define DIST_SLOTS (1 << DIST_SLOT_BITS)
-
-/* Match distances up to 127 are fully encoded using probabilities. Since
- * the highest two bits (distance slot) are always encoded using six bits,
- * the distances 0-3 don't need any additional bits to encode, since the
- * distance slot itself is the same as the actual distance. DIST_MODEL_START
- * indicates the first distance slot where at least one additional bit is
- * needed.
- */
-#define DIST_MODEL_START 4
-
-/*
- * Match distances greater than 127 are encoded in three pieces:
- *   - distance slot: the highest two bits
- *   - direct bits: 2-26 bits below the highest two bits
- *   - alignment bits: four lowest bits
- *
- * Direct bits don't use any probabilities.
- *
- * The distance slot value of 14 is for distances 128-191.
- */
-#define DIST_MODEL_END 14
-
-/* Distance slots that indicate a distance <= 127. */
-#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
-#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
-
-/*
- * For match distances greater than 127, only the highest two bits and the
- * lowest four bits (alignment) is encoded using probabilities.
- */
-#define ALIGN_BITS 4
-#define ALIGN_SIZE (1 << ALIGN_BITS)
-#define ALIGN_MASK (ALIGN_SIZE - 1)
-
 /* Total number of all probability variables */
 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
 
@@ -895,237 +1074,6 @@ static unsigned lzma_get_dist_state(unsigned len)
  */
 #define RC_INIT_BYTES 5
 
-/*
- * Minimum number of usable input buffer to safely decode one LZMA symbol.
- * The worst case is that we decode 22 bits using probabilities and 26
- * direct bits. This may decode at maximum of 20 bytes of input. However,
- * lzma_main() does an extra normalization before returning, thus we
- * need to put 21 here.
- */
-#define LZMA_IN_REQUIRED 21
-
-/*
- * Dictionary (history buffer)
- *
- * These are always true:
- *    start <= pos <= full <= end
- *    pos <= limit <= end
- *    end == size
- *    size <= size_max
- *    allocated <= size
- *
- * Most of these variables are size_t as a relic of single-call mode,
- * in which the dictionary variables address the actual output
- * buffer directly.
- */
-struct dictionary {
-  // Beginning of the history buffer
-  char *buf;
-  // Old position in buf (before decoding more data)
-  size_t start;
-  // Position in buf
-  size_t pos;
-  // How full dictionary is. This is used to detect corrupt input that
-  // would read beyond the beginning of the uncompressed stream.
-  size_t full;
-  /* Write limit; we don't write to buf[limit] or later bytes. */
-  size_t limit;
-  // End of the dictionary buffer. This is the same as the dictionary size.
-  size_t end;
-  // Size of the dictionary as specified in Block Header. This is used
-  // together with "full" to detect corrupt input that would make us
-  // read beyond the beginning of the uncompressed stream.
-  unsigned size;
-  // Maximum allowed dictionary size.
-  unsigned size_max;
-  // Amount of memory currently allocated for the dictionary.
-  unsigned allocated;
-};
-
-/* Range decoder */
-struct rc_dec {
-  unsigned range;
-  unsigned code;
-
-  /*
-   * Number of initializing bytes remaining to be read
-   * by rc_read_init().
-   */
-  unsigned init_bytes_left;
-
-  /*
-   * Buffer from which we read our input. It can be either
-   * temp.buf or the caller-provided input buffer.
-   */
-  const char *in;
-  size_t in_pos;
-  size_t in_limit;
-};
-
-/* Probabilities for a length decoder. */
-struct lzma_len_dec {
-  /* Probability of match length being at least 10 */
-  uint16_t choice;
-
-  /* Probability of match length being at least 18 */
-  uint16_t choice2;
-
-  /* Probabilities for match lengths 2-9 */
-  uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
-
-  /* Probabilities for match lengths 10-17 */
-  uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
-
-  /* Probabilities for match lengths 18-273 */
-  uint16_t high[LEN_HIGH_SYMBOLS];
-};
-
-struct lzma_dec {
-  /* Distances of latest four matches */
-  unsigned rep0;
-  unsigned rep1;
-  unsigned rep2;
-  unsigned rep3;
-
-  /* Types of the most recently seen LZMA symbols */
-  enum lzma_state state;
-
-  /*
-   * Length of a match. This is updated so that dict_repeat can
-   * be called again to finish repeating the whole match.
-   */
-  unsigned len;
-
-  /*
-   * LZMA properties or related bit masks (number of literal
-   * context bits, a mask dervied from the number of literal
-   * position bits, and a mask dervied from the number
-   * position bits)
-   */
-  unsigned lc;
-  unsigned literal_pos_mask; /* (1 << lp) - 1 */
-  unsigned pos_mask;         /* (1 << pb) - 1 */
-
-  /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
-  uint16_t is_match[STATES][POS_STATES_MAX];
-
-  /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
-  uint16_t is_rep[STATES];
-
-  /*
-   * If 0, distance of a repeated match is rep0.
-   * Otherwise check is_rep1.
-   */
-  uint16_t is_rep0[STATES];
-
-  /*
-   * If 0, distance of a repeated match is rep1.
-   * Otherwise check is_rep2.
-   */
-  uint16_t is_rep1[STATES];
-
-  /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
-  uint16_t is_rep2[STATES];
-
-  /*
-   * If 1, the repeated match has length of one byte. Otherwise
-   * the length is decoded from rep_len_decoder.
-   */
-  uint16_t is_rep0_long[STATES][POS_STATES_MAX];
-
-  /*
-   * Probability tree for the highest two bits of the match
-   * distance. There is a separate probability tree for match
-   * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
-   */
-  uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
-
-  /*
-   * Probility trees for additional bits for match distance
-   * when the distance is in the range [4, 127].
-   */
-  uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
-
-  /*
-   * Probability tree for the lowest four bits of a match
-   * distance that is equal to or greater than 128.
-   */
-  uint16_t dist_align[ALIGN_SIZE];
-
-  /* Length of a normal match */
-  struct lzma_len_dec match_len_dec;
-
-  /* Length of a repeated match */
-  struct lzma_len_dec rep_len_dec;
-
-  /* Probabilities of literals */
-  uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
-};
-
-struct lzma2_dec {
-  /* Position in xz_dec_lzma2_run(). */
-  enum lzma2_seq {
-    SEQ_CONTROL,
-    SEQ_UNCOMPRESSED_1,
-    SEQ_UNCOMPRESSED_2,
-    SEQ_COMPRESSED_0,
-    SEQ_COMPRESSED_1,
-    SEQ_PROPERTIES,
-    SEQ_LZMA_PREPARE,
-    SEQ_LZMA_RUN,
-    SEQ_COPY
-  } sequence;
-
-  /* Next position after decoding the compressed size of the chunk. */
-  enum lzma2_seq next_sequence;
-
-  /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
-  unsigned uncompressed;
-
-  /*
-   * Compressed size of LZMA chunk or compressed/uncompressed
-   * size of uncompressed chunk (64 KiB at maximum)
-   */
-  unsigned compressed;
-
-  /*
-   * True if dictionary reset is needed. This is false before
-   * the first chunk (LZMA or uncompressed).
-   */
-  int need_dict_reset;
-
-  /*
-   * True if new LZMA properties are needed. This is false
-   * before the first LZMA chunk.
-   */
-  int need_props;
-};
-
-struct xz_dec_lzma2 {
-  /*
-   * The order below is important on x86 to reduce code size and
-   * it shouldn't hurt on other platforms. Everything up to and
-   * including lzma.pos_mask are in the first 128 bytes on x86-32,
-   * which allows using smaller instructions to access those
-   * variables. On x86-64, fewer variables fit into the first 128
-   * bytes, but this is still the best order without sacrificing
-   * the readability by splitting the structures.
-   */
-  struct rc_dec rc;
-  struct dictionary dict;
-  struct lzma2_dec lzma2;
-  struct lzma_dec lzma;
-
-  /*
-   * Temporary buffer which holds small number of input bytes between
-   * decoder calls. See lzma2_lzma() for details.
-   */
-  struct {
-    unsigned size;
-    char buf[3 * LZMA_IN_REQUIRED];
-  } temp;
-};
-
 /**************
  * Dictionary *
  **************/
@@ -1927,6 +1875,8 @@ enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b)
   return XZ_OK;
 }
 
+// Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
+// before calling xz_dec_lzma2_run().
 struct xz_dec_lzma2 *xz_dec_lzma2_create(unsigned dict_max)
 {
   struct xz_dec_lzma2 *s = malloc(sizeof(*s));
@@ -1940,6 +1890,10 @@ struct xz_dec_lzma2 *xz_dec_lzma2_create(unsigned dict_max)
   return s;
 }
 
+// Decode the LZMA2 properties (one byte) and reset the decoder. Return
+// XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
+// big enough, and XZ_OPTIONS_ERROR if props indicates something that this
+// decoder doesn't support.
 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, char props)
 {
   /* This limits dictionary size to 3 GiB to keep parsing simpler. */
@@ -1957,11 +1911,7 @@ enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, char props)
   if (s->dict.allocated < s->dict.size) {
     s->dict.allocated = s->dict.size;
     free(s->dict.buf);
-    s->dict.buf = malloc(s->dict.size);
-    if (s->dict.buf == NULL) {
-      s->dict.allocated = 0;
-      return XZ_MEM_ERROR;
-    }
+    s->dict.buf = xmalloc(s->dict.size);
   }
 
   s->lzma.len = 0;
@@ -2372,30 +2322,20 @@ static int check_skip(struct xz_dec *s, struct xz_buf *b)
 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
 static enum xz_ret dec_stream_header(struct xz_dec *s)
 {
-  if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
+  if (memcmp(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
     return XZ_FORMAT_ERROR;
 
   if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
       != get_unaligned_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
     return XZ_DATA_ERROR;
 
-  if (s->temp.buf[HEADER_MAGIC_SIZE])
-    return XZ_OPTIONS_ERROR;
+  if (s->temp.buf[HEADER_MAGIC_SIZE]) return XZ_OPTIONS_ERROR;
 
-  /*
-   * Of integrity checks, we support none (Check ID = 0),
-   * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
-   * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
-   * check types too, but then the check won't be verified and
-   * a warning (XZ_UNSUPPORTED_CHECK) will be given.
-   */
+  // Integrity checks none (0), CRC32 (1), and CRC64 (4) supported,
+  // Other check types silently skipped.
   s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
 
-  if (s->check_type > XZ_CHECK_MAX)
-    return XZ_OPTIONS_ERROR;
-
-  if (s->check_type > XZ_CHECK_CRC32 && s->check_type != XZ_CHECK_CRC64)
-    return XZ_UNSUPPORTED_CHECK;
+  if (s->check_type > XZ_CHECK_MAX) return XZ_OPTIONS_ERROR;
 
   return XZ_OK;
 }
@@ -2403,7 +2343,7 @@ static enum xz_ret dec_stream_header(struct xz_dec *s)
 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
 static enum xz_ret dec_stream_footer(struct xz_dec *s)
 {
-  if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
+  if (memcmp(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
     return XZ_DATA_ERROR;
 
   if (xz_crc32(s->temp.buf + 4, 6, 0) != get_unaligned_le32(s->temp.buf))
@@ -2660,8 +2600,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
       index_update(s, b);
 
       /* Compare the hashes to validate the Index field. */
-      if (!memeq(&s->block.hash, &s->index.hash,
-          sizeof(s->block.hash)))
+      if (memcmp(&s->block.hash, &s->index.hash, sizeof(s->block.hash)))
         return XZ_DATA_ERROR;
 
       s->sequence = SEQ_INDEX_CRC32;
@@ -2849,8 +2788,7 @@ void do_xzcat(int fd, char *name)
   uint64_t r;
 
   char *errors[] = {
-    "Memory allocation failed",
-    "Memory usage limit reached",
+    "Dictionary too big",
     "Not a .xz file",
     "Unsupported options in the .xz headers",
     // 2 things in the enum xz_ret use this
@@ -2905,8 +2843,7 @@ void do_xzcat(int fd, char *name)
       b.out_pos = 0;
     }
 
-    if (ret == XZ_OK || ret == XZ_UNSUPPORTED_CHECK)
-      continue;
+    if (ret == XZ_OK) continue;
 
     if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
       msg = "Write error\n";
@@ -2918,7 +2855,7 @@ void do_xzcat(int fd, char *name)
       return;
     }
 
-    msg = (ret-3 < ARRAY_LEN(errors)) ? errors[ret-3] : "Bug!";
+    msg = (ret-2 < ARRAY_LEN(errors)) ? errors[ret-2] : "Bug!";
     goto error;
   }
 
-- 
2.39.5