changeset 449:758a978b6605

Move i386-specifc files into an i386 subdirectory.
author Rob Landley <rob@landley.net>
date Sat, 12 May 2007 00:15:39 -0400
parents 4b36c07af991
children cd7e1ce83b92
files Makefile i386-asm.c i386-asm.h i386-gen.c i386/i386-asm.c i386/i386-asm.h i386/i386-gen.c tcc.c tcctok.h
diffstat 9 files changed, 2697 insertions(+), 2697 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Tue May 08 22:16:31 2007 -0400
+++ b/Makefile	Sat May 12 00:15:39 2007 -0400
@@ -136,11 +136,11 @@
 
 # Host Tiny C Compiler
 ifdef CONFIG_WIN32
-tcc$(EXESUF): tcc.c i386-gen.c tccelf.c tccasm.c i386-asm.c tcctok.h libtcc.h i386-asm.h win32/tccpe.c
+tcc$(EXESUF): tcc.c i386/* tccelf.c tccasm.c tcctok.h libtcc.h win32/tccpe.c
 	$(CC) $(CFLAGS) -DTCC_TARGET_PE -o $@ $< $(LIBS)
 else
 ifeq ($(ARCH),i386)
-tcc$(EXESUF): tcc.c i386-gen.c tccelf.c tccasm.c i386-asm.c tcctok.h libtcc.h i386-asm.h
+tcc$(EXESUF): tcc.c i386/* tccelf.c tccasm.c tcctok.h libtcc.h
 	$(CC) $(CFLAGS) -o $@ $< $(LIBS)
 endif
 ifeq ($(ARCH),arm)
@@ -150,7 +150,7 @@
 endif
 
 # Cross Tiny C Compilers
-i386-tcc$(EXESUF): tcc.c i386-gen.c tccelf.c tccasm.c i386-asm.c tcctok.h libtcc.h i386-asm.h
+i386-tcc$(EXESUF): tcc.c i386/* tccelf.c tccasm.c tcctok.h libtcc.h
 	$(CC) $(CFLAGS) -o $@ $< $(LIBS)
 
 c67-tcc$(EXESUF): tcc.c c67-gen.c tccelf.c tccasm.c tcctok.h libtcc.h tcccoff.c
@@ -159,7 +159,7 @@
 arm-tcc$(EXESUF): tcc.c arm-gen.c tccelf.c tccasm.c tcctok.h libtcc.h
 	$(CC) $(CFLAGS) -DTCC_TARGET_ARM -DTCC_ARM_EABI -o $@ $< $(LIBS)
 
-i386-win32-tcc$(EXESUF): tcc.c i386-gen.c tccelf.c tccasm.c i386-asm.c tcctok.h libtcc.h i386-asm.h win32/tccpe.c
+i386-win32-tcc$(EXESUF): tcc.c i386/* tccelf.c tccasm.c tcctok.h libtcc.h win32/tccpe.c
 	$(CC) $(CFLAGS) -DTCC_TARGET_PE -o $@ $< $(LIBS)
 
 # windows utilities
@@ -234,7 +234,7 @@
 	mkdir -p "$(includedir)"
 	$(INSTALL) -m644 libtcc.h "$(includedir)"
 
-libtcc.o: tcc.c i386-gen.c Makefile
+libtcc.o: tcc.c i386/i386-gen.c Makefile
 	$(CC) $(CFLAGS) -DLIBTCC -c -o $@ $<
 
 libtcc.a: libtcc.o 
--- a/i386-asm.c	Tue May 08 22:16:31 2007 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1209 +0,0 @@
-/*
- *  i386 specific functions for TCC assembler
- * 
- *  Copyright (c) 2001, 2002 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#define MAX_OPERANDS 3
-
-typedef struct ASMInstr {
-    uint16_t sym;
-    uint16_t opcode;
-    uint16_t instr_type;
-#define OPC_JMP       0x01  /* jmp operand */
-#define OPC_B         0x02  /* only used zith OPC_WL */
-#define OPC_WL        0x04  /* accepts w, l or no suffix */
-#define OPC_BWL       (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
-#define OPC_REG       0x08 /* register is added to opcode */
-#define OPC_MODRM     0x10 /* modrm encoding */
-#define OPC_FWAIT     0x20 /* add fwait opcode */
-#define OPC_TEST      0x40 /* test opcodes */
-#define OPC_SHIFT     0x80 /* shift opcodes */
-#define OPC_D16      0x0100 /* generate data16 prefix */
-#define OPC_ARITH    0x0200 /* arithmetic opcodes */
-#define OPC_SHORTJMP 0x0400 /* short jmp operand */
-#define OPC_FARITH   0x0800 /* FPU arithmetic opcodes */
-#define OPC_GROUP_SHIFT 13
-
-/* in order to compress the operand type, we use specific operands and
-   we or only with EA  */ 
-#define OPT_REG8  0 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_REG16 1 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_REG32 2 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_MMX   3 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_SSE   4 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_CR    5 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_TR    6 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_DB    7 /* warning: value is hardcoded from TOK_ASM_xxx */
-#define OPT_SEG   8
-#define OPT_ST    9
-#define OPT_IM8   10
-#define OPT_IM8S  11
-#define OPT_IM16  12
-#define OPT_IM32  13
-#define OPT_EAX   14 /* %al, %ax or %eax register */
-#define OPT_ST0   15 /* %st(0) register */
-#define OPT_CL    16 /* %cl register */
-#define OPT_DX    17 /* %dx register */
-#define OPT_ADDR  18 /* OP_EA with only offset */
-#define OPT_INDIR 19 /* *(expr) */
-
-/* composite types */ 
-#define OPT_COMPOSITE_FIRST   20
-#define OPT_IM       20 /* IM8 | IM16 | IM32 */
-#define OPT_REG      21 /* REG8 | REG16 | REG32 */ 
-#define OPT_REGW     22 /* REG16 | REG32 */
-#define OPT_IMW      23 /* IM16 | IM32 */ 
-
-/* can be ored with any OPT_xxx */
-#define OPT_EA    0x80
-
-    uint8_t nb_ops;
-    uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
-} ASMInstr;
-
-typedef struct Operand {
-    uint32_t type;
-#define OP_REG8   (1 << OPT_REG8)
-#define OP_REG16  (1 << OPT_REG16)
-#define OP_REG32  (1 << OPT_REG32)
-#define OP_MMX    (1 << OPT_MMX)
-#define OP_SSE    (1 << OPT_SSE)
-#define OP_CR     (1 << OPT_CR)
-#define OP_TR     (1 << OPT_TR)
-#define OP_DB     (1 << OPT_DB)
-#define OP_SEG    (1 << OPT_SEG)
-#define OP_ST     (1 << OPT_ST)
-#define OP_IM8    (1 << OPT_IM8)
-#define OP_IM8S   (1 << OPT_IM8S)
-#define OP_IM16   (1 << OPT_IM16)
-#define OP_IM32   (1 << OPT_IM32)
-#define OP_EAX    (1 << OPT_EAX)
-#define OP_ST0    (1 << OPT_ST0)
-#define OP_CL     (1 << OPT_CL)
-#define OP_DX     (1 << OPT_DX)
-#define OP_ADDR   (1 << OPT_ADDR)
-#define OP_INDIR  (1 << OPT_INDIR)
-
-#define OP_EA     0x40000000
-#define OP_REG    (OP_REG8 | OP_REG16 | OP_REG32)
-#define OP_IM     OP_IM32
-    int8_t  reg; /* register, -1 if none */
-    int8_t  reg2; /* second register, -1 if none */
-    uint8_t shift;
-    ExprValue e;
-} Operand;
-
-static const uint8_t reg_to_size[5] = {
-    [OP_REG8] = 0,
-    [OP_REG16] = 1,
-    [OP_REG32] = 2,
-};
-    
-#define WORD_PREFIX_OPCODE 0x66
-
-#define NB_TEST_OPCODES 30
-
-static const uint8_t test_bits[NB_TEST_OPCODES] = {
- 0x00, /* o */
- 0x01, /* no */
- 0x02, /* b */
- 0x02, /* c */
- 0x02, /* nae */
- 0x03, /* nb */
- 0x03, /* nc */
- 0x03, /* ae */
- 0x04, /* e */
- 0x04, /* z */
- 0x05, /* ne */
- 0x05, /* nz */
- 0x06, /* be */
- 0x06, /* na */
- 0x07, /* nbe */
- 0x07, /* a */
- 0x08, /* s */
- 0x09, /* ns */
- 0x0a, /* p */
- 0x0a, /* pe */
- 0x0b, /* np */
- 0x0b, /* po */
- 0x0c, /* l */
- 0x0c, /* nge */
- 0x0d, /* nl */
- 0x0d, /* ge */
- 0x0e, /* le */
- 0x0e, /* ng */
- 0x0f, /* nle */
- 0x0f, /* g */
-};
-
-static const uint8_t segment_prefixes[] = {
- 0x26, /* es */
- 0x2e, /* cs */
- 0x36, /* ss */
- 0x3e, /* ds */
- 0x64, /* fs */
- 0x65  /* gs */
-};
-
-static const ASMInstr asm_instrs[] = {
-#define ALT(x) x
-#define DEF_ASM_OP0(name, opcode)
-#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
-#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
-#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
-#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
-#include "i386-asm.h"
-
-    /* last operation */
-    { 0, },
-};
-
-static const uint16_t op0_codes[] = {
-#define ALT(x)
-#define DEF_ASM_OP0(x, opcode) opcode,
-#define DEF_ASM_OP0L(name, opcode, group, instr_type)
-#define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
-#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
-#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
-#include "i386-asm.h"
-};
-
-static inline int get_reg_shift(TCCState *s1)
-{
-    int shift, v;
-
-    v = asm_int_expr(s1);
-    switch(v) {
-    case 1:
-        shift = 0;
-        break;
-    case 2:
-        shift = 1;
-        break;
-    case 4:
-        shift = 2;
-        break;
-    case 8:
-        shift = 3;
-        break;
-    default:
-        expect("1, 2, 4 or 8 constant");
-        shift = 0;
-        break;
-    }
-    return shift;
-}
-
-static int asm_parse_reg(void)
-{
-    int reg;
-    if (tok != '%')
-        goto error_32;
-    next();
-    if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
-        reg = tok - TOK_ASM_eax;
-        next();
-        return reg;
-    } else {
-    error_32:
-        expect("32 bit register");
-        return 0;
-    }
-}
-
-static void parse_operand(TCCState *s1, Operand *op)
-{
-    ExprValue e;
-    int reg, indir;
-    const char *p;
-
-    indir = 0;
-    if (tok == '*') {
-        next();
-        indir = OP_INDIR;
-    }
-
-    if (tok == '%') {
-        next();
-        if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
-            reg = tok - TOK_ASM_al;
-            op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
-            op->reg = reg & 7;
-            if ((op->type & OP_REG) && op->reg == TREG_EAX)
-                op->type |= OP_EAX;
-            else if (op->type == OP_REG8 && op->reg == TREG_ECX)
-                op->type |= OP_CL;
-            else if (op->type == OP_REG16 && op->reg == TREG_EDX)
-                op->type |= OP_DX;
-        } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
-            op->type = OP_DB;
-            op->reg = tok - TOK_ASM_dr0;
-        } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
-            op->type = OP_SEG;
-            op->reg = tok - TOK_ASM_es;
-        } else if (tok == TOK_ASM_st) {
-            op->type = OP_ST;
-            op->reg = 0;
-            next();
-            if (tok == '(') {
-                next();
-                if (tok != TOK_PPNUM)
-                    goto reg_error;
-                p = tokc.cstr->data;
-                reg = p[0] - '0';
-                if ((unsigned)reg >= 8 || p[1] != '\0')
-                    goto reg_error;
-                op->reg = reg;
-                next();
-                skip(')');
-            }
-            if (op->reg == 0)
-                op->type |= OP_ST0;
-            goto no_skip;
-        } else {
-        reg_error:
-            error("unknown register");
-        }
-        next();
-    no_skip: ;
-    } else if (tok == '$') {
-        /* constant value */
-        next();
-        asm_expr(s1, &e);
-        op->type = OP_IM32;
-        op->e.v = e.v;
-        op->e.sym = e.sym;
-        if (!op->e.sym) {
-            if (op->e.v == (uint8_t)op->e.v)
-                op->type |= OP_IM8;
-            if (op->e.v == (int8_t)op->e.v)
-                op->type |= OP_IM8S;
-            if (op->e.v == (uint16_t)op->e.v)
-                op->type |= OP_IM16;
-        }
-    } else {
-        /* address(reg,reg2,shift) with all variants */
-        op->type = OP_EA;
-        op->reg = -1;
-        op->reg2 = -1;
-        op->shift = 0;
-        if (tok != '(') {
-            asm_expr(s1, &e);
-            op->e.v = e.v;
-            op->e.sym = e.sym;
-        } else {
-            op->e.v = 0;
-            op->e.sym = NULL;
-        }
-        if (tok == '(') {
-            next();
-            if (tok != ',') {
-                op->reg = asm_parse_reg();
-            }
-            if (tok == ',') {
-                next();
-                if (tok != ',') {
-                    op->reg2 = asm_parse_reg();
-                } 
-                if (tok == ',') {
-                    next();
-                    op->shift = get_reg_shift(s1);
-                }
-            }
-            skip(')');
-        }
-        if (op->reg == -1 && op->reg2 == -1)
-            op->type |= OP_ADDR;
-    }
-    op->type |= indir;
-}
-
-/* XXX: unify with C code output ? */
-static void gen_expr32(ExprValue *pe)
-{
-    if (pe->sym)
-        greloc(cur_text_section, pe->sym, ind, R_386_32);
-    gen_le32(pe->v);
-}
-
-/* XXX: unify with C code output ? */
-static void gen_disp32(ExprValue *pe)
-{
-    Sym *sym;
-    sym = pe->sym;
-    if (sym) {
-        if (sym->r == cur_text_section->sh_num) {
-            /* same section: we can output an absolute value. Note
-               that the TCC compiler behaves differently here because
-               it always outputs a relocation to ease (future) code
-               elimination in the linker */
-            gen_le32(pe->v + (long)sym->next - ind - 4);
-        } else {
-            greloc(cur_text_section, sym, ind, R_386_PC32);
-            gen_le32(pe->v - 4);
-        }
-    } else {
-        /* put an empty PC32 relocation */
-        put_elf_reloc(symtab_section, cur_text_section, 
-                      ind, R_386_PC32, 0);
-        gen_le32(pe->v - 4);
-    }
-}
-
-
-static void gen_le16(int v)
-{
-    g(v);
-    g(v >> 8);
-}
-
-/* generate the modrm operand */
-static inline void asm_modrm(int reg, Operand *op)
-{
-    int mod, reg1, reg2, sib_reg1;
-
-    if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
-        g(0xc0 + (reg << 3) + op->reg);
-    } else if (op->reg == -1 && op->reg2 == -1) {
-        /* displacement only */
-        g(0x05 + (reg << 3));
-        gen_expr32(&op->e);
-    } else {
-        sib_reg1 = op->reg;
-        /* fist compute displacement encoding */
-        if (sib_reg1 == -1) {
-            sib_reg1 = 5;
-            mod = 0x00;
-        } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
-            mod = 0x00;
-        } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
-            mod = 0x40;
-        } else {
-            mod = 0x80;
-        }
-        /* compute if sib byte needed */
-        reg1 = op->reg;
-        if (op->reg2 != -1)
-            reg1 = 4;
-        g(mod + (reg << 3) + reg1);
-        if (reg1 == 4) {
-            /* add sib byte */
-            reg2 = op->reg2;
-            if (reg2 == -1)
-                reg2 = 4; /* indicate no index */
-            g((op->shift << 6) + (reg2 << 3) + sib_reg1);
-        }
-
-        /* add offset */
-        if (mod == 0x40) {
-            g(op->e.v);
-        } else if (mod == 0x80 || op->reg == -1) {
-            gen_expr32(&op->e);
-        }
-    }
-}
-
-static void asm_opcode(TCCState *s1, int opcode)
-{
-    const ASMInstr *pa;
-    int i, modrm_index, reg, v, op1, is_short_jmp, has_seg_prefix;
-    int nb_ops, s, ss;
-    Operand ops[MAX_OPERANDS], *pop, seg_prefix;
-    int op_type[3]; /* decoded op type */
-
-    /* get operands */
-    pop = ops;
-    nb_ops = 0;
-    has_seg_prefix = 0;
-    for(;;) {
-        if (tok == ';' || tok == TOK_LINEFEED)
-            break;
-        if (nb_ops >= MAX_OPERANDS) {
-            error("incorrect number of operands");
-        }
-        parse_operand(s1, pop);
-        if (tok == ':') {
-           if (pop->type != OP_SEG || has_seg_prefix) {
-               error("incorrect prefix");
-           }
-           seg_prefix = *pop;
-           has_seg_prefix = 1;
-           next();
-           parse_operand(s1, pop);
-           if (!(pop->type & OP_EA)) {
-               error("segment prefix must be followed by memory reference");
-           }
-        }
-        pop++;
-        nb_ops++;
-        if (tok != ',')
-            break;
-        next();
-    }
-
-    is_short_jmp = 0;
-    s = 0; /* avoid warning */
-    
-    /* optimize matching by using a lookup table (no hashing is needed
-       !) */
-    for(pa = asm_instrs; pa->sym != 0; pa++) {
-        s = 0;
-        if (pa->instr_type & OPC_FARITH) {
-            v = opcode - pa->sym;
-            if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
-                continue;
-        } else if (pa->instr_type & OPC_ARITH) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + 8 * 4))
-                continue;
-            goto compute_size;
-        } else if (pa->instr_type & OPC_SHIFT) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + 7 * 4))
-                continue;
-            goto compute_size;
-        } else if (pa->instr_type & OPC_TEST) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
-                continue;
-        } else if (pa->instr_type & OPC_B) {
-            if (!(opcode >= pa->sym && opcode <= pa->sym + 3))
-                continue;
-        compute_size:
-            s = (opcode - pa->sym) & 3;
-        } else if (pa->instr_type & OPC_WL) {
-            if (!(opcode >= pa->sym && opcode <= pa->sym + 2))
-                continue;
-            s = opcode - pa->sym + 1;
-        } else {
-            if (pa->sym != opcode)
-                continue;
-        }
-        if (pa->nb_ops != nb_ops)
-            continue;
-        /* now decode and check each operand */
-        for(i = 0; i < nb_ops; i++) {
-            int op1, op2;
-            op1 = pa->op_type[i];
-            op2 = op1 & 0x1f;
-            switch(op2) {
-            case OPT_IM:
-                v = OP_IM8 | OP_IM16 | OP_IM32;
-                break;
-            case OPT_REG:
-                v = OP_REG8 | OP_REG16 | OP_REG32;
-                break;
-            case OPT_REGW:
-                v = OP_REG16 | OP_REG32;
-                break;
-            case OPT_IMW:
-                v = OP_IM16 | OP_IM32;
-                break;
-            default:
-                v = 1 << op2;
-                break;
-            }
-            if (op1 & OPT_EA)
-                v |= OP_EA;
-            op_type[i] = v;
-            if ((ops[i].type & v) == 0)
-                goto next;
-        }
-        /* all is matching ! */
-        break;
-    next: ;
-    }
-    if (pa->sym == 0) {
-        if (opcode >= TOK_ASM_pusha && opcode <= TOK_ASM_emms) {
-            int b;
-            b = op0_codes[opcode - TOK_ASM_pusha];
-            if (b & 0xff00) 
-                g(b >> 8);
-            g(b);
-            return;
-        } else {
-            error("unknown opcode '%s'", 
-                  get_tok_str(opcode, NULL));
-        }
-    }
-    /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
-    if (s == 3) {
-        for(i = 0; s == 3 && i < nb_ops; i++) {
-            if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
-                s = reg_to_size[ops[i].type & OP_REG];
-        }
-        if (s == 3) {
-            if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && 
-                (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
-                s = 2;
-            else
-                error("cannot infer opcode suffix");
-        }
-    }
-
-    /* generate data16 prefix if needed */
-    ss = s;
-    if (s == 1 || (pa->instr_type & OPC_D16))
-        g(WORD_PREFIX_OPCODE);
-    else if (s == 2)
-        s = 1;
-    /* now generates the operation */
-    if (pa->instr_type & OPC_FWAIT)
-        g(0x9b);
-    if (has_seg_prefix)
-        g(segment_prefixes[seg_prefix.reg]);
-
-    v = pa->opcode;
-    if (v == 0x69 || v == 0x69) {
-        /* kludge for imul $im, %reg */
-        nb_ops = 3;
-        ops[2] = ops[1];
-    } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
-        v--; /* int $3 case */
-        nb_ops = 0;
-    } else if ((v == 0x06 || v == 0x07)) {
-        if (ops[0].reg >= 4) {
-            /* push/pop %fs or %gs */
-            v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
-        } else {
-            v += ops[0].reg << 3;
-        }
-        nb_ops = 0;
-    } else if (v <= 0x05) {
-        /* arith case */
-        v += ((opcode - TOK_ASM_addb) >> 2) << 3;
-    } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
-        /* fpu arith case */
-        v += ((opcode - pa->sym) / 6) << 3;
-    }
-    if (pa->instr_type & OPC_REG) {
-        for(i = 0; i < nb_ops; i++) {
-            if (op_type[i] & (OP_REG | OP_ST)) {
-                v += ops[i].reg;
-                break;
-            }
-        }
-        /* mov $im, %reg case */
-        if (pa->opcode == 0xb0 && s >= 1)
-            v += 7;
-    }
-    if (pa->instr_type & OPC_B)
-        v += s;
-    if (pa->instr_type & OPC_TEST)
-        v += test_bits[opcode - pa->sym]; 
-    if (pa->instr_type & OPC_SHORTJMP) {
-        Sym *sym;
-        int jmp_disp;
-
-        /* see if we can really generate the jump with a byte offset */
-        sym = ops[0].e.sym;
-        if (!sym)
-            goto no_short_jump;
-        if (sym->r != cur_text_section->sh_num)
-            goto no_short_jump;
-        jmp_disp = ops[0].e.v + (long)sym->next - ind - 2;
-        if (jmp_disp == (int8_t)jmp_disp) {
-            /* OK to generate jump */
-            is_short_jmp = 1;
-            ops[0].e.v = jmp_disp;
-        } else {
-        no_short_jump:
-            if (pa->instr_type & OPC_JMP) {
-                /* long jump will be allowed. need to modify the
-                   opcode slightly */
-                if (v == 0xeb)
-                    v = 0xe9;
-                else 
-                    v += 0x0f10;
-            } else {
-                error("invalid displacement");
-            }
-        }
-    }
-    op1 = v >> 8;
-    if (op1)
-        g(op1);
-    g(v);
-        
-    /* search which operand will used for modrm */
-    modrm_index = 0;
-    if (pa->instr_type & OPC_SHIFT) {
-        reg = (opcode - pa->sym) >> 2; 
-        if (reg == 6)
-            reg = 7;
-    } else if (pa->instr_type & OPC_ARITH) {
-        reg = (opcode - pa->sym) >> 2;
-    } else if (pa->instr_type & OPC_FARITH) {
-        reg = (opcode - pa->sym) / 6;
-    } else {
-        reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
-    }
-    if (pa->instr_type & OPC_MODRM) {
-        /* first look for an ea operand */
-        for(i = 0;i < nb_ops; i++) {
-            if (op_type[i] & OP_EA)
-                goto modrm_found;
-        }
-        /* then if not found, a register or indirection (shift instructions) */
-        for(i = 0;i < nb_ops; i++) {
-            if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
-                goto modrm_found;
-        }
-#ifdef ASM_DEBUG
-        error("bad op table");
-#endif      
-    modrm_found:
-        modrm_index = i;
-        /* if a register is used in another operand then it is
-           used instead of group */
-        for(i = 0;i < nb_ops; i++) {
-            v = op_type[i];
-            if (i != modrm_index && 
-                (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
-                reg = ops[i].reg;
-                break;
-            }
-        }
-
-        asm_modrm(reg, &ops[modrm_index]);
-    }
-
-    /* emit constants */
-    if (pa->opcode == 0x9a || pa->opcode == 0xea) {
-        /* ljmp or lcall kludge */
-        gen_expr32(&ops[1].e);
-        if (ops[0].e.sym)
-            error("cannot relocate");
-        gen_le16(ops[0].e.v);
-    } else {
-        for(i = 0;i < nb_ops; i++) {
-            v = op_type[i];
-            if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM8S | OP_ADDR)) {
-                /* if multiple sizes are given it means we must look
-                   at the op size */
-                if (v == (OP_IM8 | OP_IM16 | OP_IM32) ||
-                    v == (OP_IM16 | OP_IM32)) {
-                    if (ss == 0)
-                        v = OP_IM8;
-                    else if (ss == 1)
-                        v = OP_IM16;
-                    else
-                        v = OP_IM32;
-                }
-                if (v & (OP_IM8 | OP_IM8S)) {
-                    if (ops[i].e.sym)
-                        goto error_relocate;
-                    g(ops[i].e.v);
-                } else if (v & OP_IM16) {
-                    if (ops[i].e.sym) {
-                    error_relocate:
-                        error("cannot relocate");
-                    }
-                    gen_le16(ops[i].e.v);
-                } else {
-                    if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
-                        if (is_short_jmp)
-                            g(ops[i].e.v);
-                        else
-                            gen_disp32(&ops[i].e);
-                    } else {
-                        gen_expr32(&ops[i].e);
-                    }
-                }
-            }
-        }
-    }
-}
-
-#define NB_SAVED_REGS 3
-#define NB_ASM_REGS 8
-
-/* return the constraint priority (we allocate first the lowest
-   numbered constraints) */
-static inline int constraint_priority(const char *str)
-{
-    int priority, c, pr;
-
-    /* we take the lowest priority */
-    priority = 0;
-    for(;;) {
-        c = *str;
-        if (c == '\0')
-            break;
-        str++;
-        switch(c) {
-        case 'A':
-            pr = 0;
-            break;
-        case 'a':
-        case 'b':
-        case 'c':
-        case 'd':
-        case 'S':
-        case 'D':
-            pr = 1;
-            break;
-        case 'q':
-            pr = 2;
-            break;
-        case 'r':
-            pr = 3;
-            break;
-        case 'N':
-        case 'M':
-        case 'I':
-        case 'i':
-        case 'm':
-        case 'g':
-            pr = 4;
-            break;
-        default:
-            error("unknown constraint '%c'", c);
-            pr = 0;
-        }
-        if (pr > priority)
-            priority = pr;
-    }
-    return priority;
-}
-
-static const char *skip_constraint_modifiers(const char *p)
-{
-    while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
-        p++;
-    return p;
-}
-
-#define REG_OUT_MASK 0x01
-#define REG_IN_MASK  0x02
-
-#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
-
-static void asm_compute_constraints(ASMOperand *operands, 
-                                    int nb_operands, int nb_outputs, 
-                                    const uint8_t *clobber_regs,
-                                    int *pout_reg)
-{
-    ASMOperand *op;
-    int sorted_op[MAX_ASM_OPERANDS];
-    int i, j, k, p1, p2, tmp, reg, c, reg_mask;
-    const char *str;
-    uint8_t regs_allocated[NB_ASM_REGS];
-    
-    /* init fields */
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        op->input_index = -1;
-        op->ref_index = -1;
-        op->reg = -1;
-        op->is_memory = 0;
-        op->is_rw = 0;
-    }
-    /* compute constraint priority and evaluate references to output
-       constraints if input constraints */
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        str = op->constraint;
-        str = skip_constraint_modifiers(str);
-        if (isnum(*str) || *str == '[') {
-            /* this is a reference to another constraint */
-            k = find_constraint(operands, nb_operands, str, NULL);
-            if ((unsigned)k >= i || i < nb_outputs)
-                error("invalid reference in constraint %d ('%s')",
-                      i, str);
-            op->ref_index = k;
-            if (operands[k].input_index >= 0)
-                error("cannot reference twice the same operand");
-            operands[k].input_index = i;
-            op->priority = 5;
-        } else {
-            op->priority = constraint_priority(str);
-        }
-    }
-    
-    /* sort operands according to their priority */
-    for(i=0;i<nb_operands;i++)
-        sorted_op[i] = i;
-    for(i=0;i<nb_operands - 1;i++) {
-        for(j=i+1;j<nb_operands;j++) {
-            p1 = operands[sorted_op[i]].priority; 
-            p2 = operands[sorted_op[j]].priority;
-            if (p2 < p1) {
-                tmp = sorted_op[i];
-                sorted_op[i] = sorted_op[j];
-                sorted_op[j] = tmp;
-            }
-        }
-    }
-
-    for(i = 0;i < NB_ASM_REGS; i++) {
-        if (clobber_regs[i])
-            regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
-        else
-            regs_allocated[i] = 0;
-    }
-    /* esp cannot be used */
-    regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK; 
-    /* ebp cannot be used yet */
-    regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK; 
-
-    /* allocate registers and generate corresponding asm moves */
-    for(i=0;i<nb_operands;i++) {
-        j = sorted_op[i];
-        op = &operands[j];
-        str = op->constraint;
-        /* no need to allocate references */
-        if (op->ref_index >= 0)
-            continue;
-        /* select if register is used for output, input or both */
-        if (op->input_index >= 0) {
-            reg_mask = REG_IN_MASK | REG_OUT_MASK;
-        } else if (j < nb_outputs) {
-            reg_mask = REG_OUT_MASK;
-        } else {
-            reg_mask = REG_IN_MASK;
-        }
-    try_next:
-        c = *str++;
-        switch(c) {
-        case '=':
-            goto try_next;
-        case '+':
-            op->is_rw = 1;
-            /* FALL THRU */
-        case '&':
-            if (j >= nb_outputs)
-                error("'%c' modifier can only be applied to outputs", c);
-            reg_mask = REG_IN_MASK | REG_OUT_MASK;
-            goto try_next;
-        case 'A':
-            /* allocate both eax and edx */
-            if (is_reg_allocated(TREG_EAX) || 
-                is_reg_allocated(TREG_EDX))
-                goto try_next;
-            op->is_llong = 1;
-            op->reg = TREG_EAX;
-            regs_allocated[TREG_EAX] |= reg_mask;
-            regs_allocated[TREG_EDX] |= reg_mask;
-            break;
-        case 'a':
-            reg = TREG_EAX;
-            goto alloc_reg;
-        case 'b':
-            reg = 3;
-            goto alloc_reg;
-        case 'c':
-            reg = TREG_ECX;
-            goto alloc_reg;
-        case 'd':
-            reg = TREG_EDX;
-            goto alloc_reg;
-        case 'S':
-            reg = 6;
-            goto alloc_reg;
-        case 'D':
-            reg = 7;
-        alloc_reg:
-            if (is_reg_allocated(reg))
-                goto try_next;
-            goto reg_found;
-        case 'q':
-            /* eax, ebx, ecx or edx */
-            for(reg = 0; reg < 4; reg++) {
-                if (!is_reg_allocated(reg))
-                    goto reg_found;
-            }
-            goto try_next;
-        case 'r':
-            /* any general register */
-            for(reg = 0; reg < 8; reg++) {
-                if (!is_reg_allocated(reg))
-                    goto reg_found;
-            }
-            goto try_next;
-        reg_found:
-            /* now we can reload in the register */
-            op->is_llong = 0;
-            op->reg = reg;
-            regs_allocated[reg] |= reg_mask;
-            break;
-        case 'i':
-            if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
-                goto try_next;
-            break;
-        case 'I':
-        case 'N':
-        case 'M':
-            if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
-                goto try_next;
-            break;
-        case 'm':
-        case 'g':
-            /* nothing special to do because the operand is already in
-               memory, except if the pointer itself is stored in a
-               memory variable (VT_LLOCAL case) */
-            /* XXX: fix constant case */
-            /* if it is a reference to a memory zone, it must lie
-               in a register, so we reserve the register in the
-               input registers and a load will be generated
-               later */
-            if (j < nb_outputs || c == 'm') {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
-                    /* any general register */
-                    for(reg = 0; reg < 8; reg++) {
-                        if (!(regs_allocated[reg] & REG_IN_MASK))
-                            goto reg_found1;
-                    }
-                    goto try_next;
-                reg_found1:
-                    /* now we can reload in the register */
-                    regs_allocated[reg] |= REG_IN_MASK;
-                    op->reg = reg;
-                    op->is_memory = 1;
-                }
-            }
-            break;
-        default:
-            error("asm constraint %d ('%s') could not be satisfied", 
-                  j, op->constraint);
-            break;
-        }
-        /* if a reference is present for that operand, we assign it too */
-        if (op->input_index >= 0) {
-            operands[op->input_index].reg = op->reg;
-            operands[op->input_index].is_llong = op->is_llong;
-        }
-    }
-    
-    /* compute out_reg. It is used to store outputs registers to memory
-       locations references by pointers (VT_LLOCAL case) */
-    *pout_reg = -1;
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        if (op->reg >= 0 && 
-            (op->vt->r & VT_VALMASK) == VT_LLOCAL  &&
-            !op->is_memory) {
-            for(reg = 0; reg < 8; reg++) {
-                if (!(regs_allocated[reg] & REG_OUT_MASK))
-                    goto reg_found2;
-            }
-            error("could not find free output register for reloading");
-        reg_found2:
-            *pout_reg = reg;
-            break;
-        }
-    }
-    
-    /* print sorted constraints */
-#ifdef ASM_DEBUG
-    for(i=0;i<nb_operands;i++) {
-        j = sorted_op[i];
-        op = &operands[j];
-        printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", 
-               j,                
-               op->id ? get_tok_str(op->id, NULL) : "", 
-               op->constraint,
-               op->vt->r,
-               op->reg);
-    }
-    if (*pout_reg >= 0)
-        printf("out_reg=%d\n", *pout_reg);
-#endif
-}
-
-static void subst_asm_operand(CString *add_str, 
-                              SValue *sv, int modifier)
-{
-    int r, reg, size, val;
-    char buf[64];
-
-    r = sv->r;
-    if ((r & VT_VALMASK) == VT_CONST) {
-        if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
-            cstr_ccat(add_str, '$');
-        if (r & VT_SYM) {
-            cstr_cat(add_str, get_tok_str(sv->sym->v, NULL));
-            if (sv->c.i != 0) {
-                cstr_ccat(add_str, '+');
-            } else {
-                return;
-            }
-        }
-        val = sv->c.i;
-        if (modifier == 'n')
-            val = -val;
-        snprintf(buf, sizeof(buf), "%d", sv->c.i);
-        cstr_cat(add_str, buf);
-    } else if ((r & VT_VALMASK) == VT_LOCAL) {
-        snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i);
-        cstr_cat(add_str, buf);
-    } else if (r & VT_LVAL) {
-        reg = r & VT_VALMASK;
-        if (reg >= VT_CONST)
-            error("internal compiler error");
-        snprintf(buf, sizeof(buf), "(%%%s)", 
-                 get_tok_str(TOK_ASM_eax + reg, NULL));
-        cstr_cat(add_str, buf);
-    } else {
-        /* register case */
-        reg = r & VT_VALMASK;
-        if (reg >= VT_CONST)
-            error("internal compiler error");
-
-        /* choose register operand size */
-        if ((sv->type.t & VT_BTYPE) == VT_BYTE)
-            size = 1;
-        else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
-            size = 2;
-        else
-            size = 4;
-        if (size == 1 && reg >= 4)
-            size = 4;
-
-        if (modifier == 'b') {
-            if (reg >= 4)
-                error("cannot use byte register");
-            size = 1;
-        } else if (modifier == 'h') {
-            if (reg >= 4)
-                error("cannot use byte register");
-            size = -1;
-        } else if (modifier == 'w') {
-            size = 2;
-        }
-
-        switch(size) {
-        case -1:
-            reg = TOK_ASM_ah + reg;
-            break;
-        case 1:
-            reg = TOK_ASM_al + reg;
-            break;
-        case 2:
-            reg = TOK_ASM_ax + reg;
-            break;
-        default:
-            reg = TOK_ASM_eax + reg;
-            break;
-        }
-        snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
-        cstr_cat(add_str, buf);
-    }
-}
-
-/* generate prolog and epilog code for asm statment */
-static void asm_gen_code(ASMOperand *operands, int nb_operands, 
-                         int nb_outputs, int is_output,
-                         uint8_t *clobber_regs,
-                         int out_reg)
-{
-    uint8_t regs_allocated[NB_ASM_REGS];
-    ASMOperand *op;
-    int i, reg;
-    static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
-
-    /* mark all used registers */
-    memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
-    for(i = 0; i < nb_operands;i++) {
-        op = &operands[i];
-        if (op->reg >= 0)
-            regs_allocated[op->reg] = 1;
-    }
-    if (!is_output) {
-        /* generate reg save code */
-        for(i = 0; i < NB_SAVED_REGS; i++) {
-            reg = reg_saved[i];
-            if (regs_allocated[reg]) 
-                g(0x50 + reg);
-        }
-
-        /* generate load code */
-        for(i = 0; i < nb_operands; i++) {
-            op = &operands[i];
-            if (op->reg >= 0) {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
-                    op->is_memory) {
-                    /* memory reference case (for both input and
-                       output cases) */
-                    SValue sv;
-                    sv = *op->vt;
-                    sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
-                    load(op->reg, &sv);
-                } else if (i >= nb_outputs || op->is_rw) {
-                    /* load value in register */
-                    load(op->reg, op->vt);
-                    if (op->is_llong) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.c.ul += 4;
-                        load(TREG_EDX, &sv);
-                    }
-                }
-            }
-        }
-    } else {
-        /* generate save code */
-        for(i = 0 ; i < nb_outputs; i++) {
-            op = &operands[i];
-            if (op->reg >= 0) {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
-                    if (!op->is_memory) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
-                        load(out_reg, &sv);
-
-                        sv.r = (sv.r & ~VT_VALMASK) | out_reg;
-                        store(op->reg, &sv);
-                    }
-                } else {
-                    store(op->reg, op->vt);
-                    if (op->is_llong) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.c.ul += 4;
-                        store(TREG_EDX, &sv);
-                    }
-                }
-            }
-        }
-        /* generate reg restore code */
-        for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
-            reg = reg_saved[i];
-            if (regs_allocated[reg]) 
-                g(0x58 + reg);
-        }
-    }
-}
-
-static void asm_clobber(uint8_t *clobber_regs, const char *str)
-{
-    int reg;
-    TokenSym *ts;
-
-    if (!strcmp(str, "memory") || 
-        !strcmp(str, "cc"))
-        return;
-    ts = tok_alloc(str, strlen(str));
-    reg = ts->tok;
-    if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
-        reg -= TOK_ASM_eax;
-    } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
-        reg -= TOK_ASM_ax;
-    } else {
-        error("invalid clobber register '%s'", str);
-    }
-    clobber_regs[reg] = 1;
-}
--- a/i386-asm.h	Tue May 08 22:16:31 2007 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,446 +0,0 @@
-     DEF_ASM_OP0(pusha, 0x60) /* must be first OP0 */
-     DEF_ASM_OP0(popa, 0x61)
-     DEF_ASM_OP0(clc, 0xf8)
-     DEF_ASM_OP0(cld, 0xfc)
-     DEF_ASM_OP0(cli, 0xfa)
-     DEF_ASM_OP0(clts, 0x0f06)
-     DEF_ASM_OP0(cmc, 0xf5)
-     DEF_ASM_OP0(lahf, 0x9f)
-     DEF_ASM_OP0(sahf, 0x9e)
-     DEF_ASM_OP0(pushfl, 0x9c)
-     DEF_ASM_OP0(popfl, 0x9d)
-     DEF_ASM_OP0(pushf, 0x9c)
-     DEF_ASM_OP0(popf, 0x9d)
-     DEF_ASM_OP0(stc, 0xf9)
-     DEF_ASM_OP0(std, 0xfd)
-     DEF_ASM_OP0(sti, 0xfb)
-     DEF_ASM_OP0(aaa, 0x37)
-     DEF_ASM_OP0(aas, 0x3f)
-     DEF_ASM_OP0(daa, 0x27)
-     DEF_ASM_OP0(das, 0x2f)
-     DEF_ASM_OP0(aad, 0xd50a)
-     DEF_ASM_OP0(aam, 0xd40a)
-     DEF_ASM_OP0(cbw, 0x6698)
-     DEF_ASM_OP0(cwd, 0x6699)
-     DEF_ASM_OP0(cwde, 0x98)
-     DEF_ASM_OP0(cdq, 0x99)
-     DEF_ASM_OP0(cbtw, 0x6698)
-     DEF_ASM_OP0(cwtl, 0x98)
-     DEF_ASM_OP0(cwtd, 0x6699)
-     DEF_ASM_OP0(cltd, 0x99)
-     DEF_ASM_OP0(int3, 0xcc)
-     DEF_ASM_OP0(into, 0xce)
-     DEF_ASM_OP0(iret, 0xcf)
-     DEF_ASM_OP0(rsm, 0x0faa)
-     DEF_ASM_OP0(hlt, 0xf4)
-     DEF_ASM_OP0(wait, 0x9b)
-     DEF_ASM_OP0(nop, 0x90)
-     DEF_ASM_OP0(xlat, 0xd7)
-
-     /* strings */
-ALT(DEF_ASM_OP0L(cmpsb, 0xa6, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(scmpb, 0xa6, 0, OPC_BWL))
-
-ALT(DEF_ASM_OP0L(insb, 0x6c, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(outsb, 0x6e, 0, OPC_BWL))
-
-ALT(DEF_ASM_OP0L(lodsb, 0xac, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(slodb, 0xac, 0, OPC_BWL))
-
-ALT(DEF_ASM_OP0L(movsb, 0xa4, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(smovb, 0xa4, 0, OPC_BWL))
-
-ALT(DEF_ASM_OP0L(scasb, 0xae, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(sscab, 0xae, 0, OPC_BWL))
-
-ALT(DEF_ASM_OP0L(stosb, 0xaa, 0, OPC_BWL))
-ALT(DEF_ASM_OP0L(sstob, 0xaa, 0, OPC_BWL))
-
-     /* bits */
-     
-ALT(DEF_ASM_OP2(bsfw, 0x0fbc, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
-ALT(DEF_ASM_OP2(bsrw, 0x0fbd, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
-
-ALT(DEF_ASM_OP2(btw, 0x0fa3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP2(btw, 0x0fba, 4, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
-
-ALT(DEF_ASM_OP2(btsw, 0x0fab, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP2(btsw, 0x0fba, 5, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
-
-ALT(DEF_ASM_OP2(btrw, 0x0fb3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
-
-ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
-
-     /* prefixes */
-     DEF_ASM_OP0(aword, 0x67)
-     DEF_ASM_OP0(addr16, 0x67)
-     DEF_ASM_OP0(word, 0x66)
-     DEF_ASM_OP0(data16, 0x66)
-     DEF_ASM_OP0(lock, 0xf0)
-     DEF_ASM_OP0(rep, 0xf3)
-     DEF_ASM_OP0(repe, 0xf3)
-     DEF_ASM_OP0(repz, 0xf3)
-     DEF_ASM_OP0(repne, 0xf2)
-     DEF_ASM_OP0(repnz, 0xf2)
-             
-     DEF_ASM_OP0(invd, 0x0f08)
-     DEF_ASM_OP0(wbinvd, 0x0f09)
-     DEF_ASM_OP0(cpuid, 0x0fa2)
-     DEF_ASM_OP0(wrmsr, 0x0f30)
-     DEF_ASM_OP0(rdtsc, 0x0f31)
-     DEF_ASM_OP0(rdmsr, 0x0f32)
-     DEF_ASM_OP0(rdpmc, 0x0f33)
-     DEF_ASM_OP0(ud2, 0x0f0b)
-
-     /* NOTE: we took the same order as gas opcode definition order */
-ALT(DEF_ASM_OP2(movb, 0xa0, 0, OPC_BWL, OPT_ADDR, OPT_EAX))
-ALT(DEF_ASM_OP2(movb, 0xa2, 0, OPC_BWL, OPT_EAX, OPT_ADDR))
-ALT(DEF_ASM_OP2(movb, 0x88, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
-ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWL, OPT_IM, OPT_REG))
-ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_REG | OPT_EA))
-
-ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WL, OPT_SEG, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(movw, 0x8e, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_SEG))
-
-ALT(DEF_ASM_OP2(movw, 0x0f20, 0, OPC_MODRM | OPC_WL, OPT_CR, OPT_REG32))
-ALT(DEF_ASM_OP2(movw, 0x0f21, 0, OPC_MODRM | OPC_WL, OPT_DB, OPT_REG32))
-ALT(DEF_ASM_OP2(movw, 0x0f24, 0, OPC_MODRM | OPC_WL, OPT_TR, OPT_REG32))
-ALT(DEF_ASM_OP2(movw, 0x0f22, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_CR))
-ALT(DEF_ASM_OP2(movw, 0x0f23, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_DB))
-ALT(DEF_ASM_OP2(movw, 0x0f26, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_TR))
-
-ALT(DEF_ASM_OP2(movsbl, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(movsbw, 0x0fbe, 0, OPC_MODRM | OPC_D16, OPT_REG8 | OPT_EA, OPT_REG16))
-ALT(DEF_ASM_OP2(movswl, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(movzbw, 0x0fb6, 0, OPC_MODRM | OPC_WL, OPT_REG8 | OPT_EA, OPT_REGW))
-ALT(DEF_ASM_OP2(movzwl, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
-
-ALT(DEF_ASM_OP1(pushw, 0x50, 0, OPC_REG | OPC_WL, OPT_REGW))
-ALT(DEF_ASM_OP1(pushw, 0xff, 6, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP1(pushw, 0x6a, 0, OPC_WL, OPT_IM8S))
-ALT(DEF_ASM_OP1(pushw, 0x68, 0, OPC_WL, OPT_IM32))
-ALT(DEF_ASM_OP1(pushw, 0x06, 0, OPC_WL, OPT_SEG))
-
-ALT(DEF_ASM_OP1(popw, 0x58, 0, OPC_REG | OPC_WL, OPT_REGW))
-ALT(DEF_ASM_OP1(popw, 0x8f, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
-ALT(DEF_ASM_OP1(popw, 0x07, 0, OPC_WL, OPT_SEG))
-
-ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_REG, OPT_EAX))
-ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_EAX, OPT_REG))
-ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
-
-ALT(DEF_ASM_OP2(inb, 0xe4, 0, OPC_BWL, OPT_IM8, OPT_EAX))
-ALT(DEF_ASM_OP1(inb, 0xe4, 0, OPC_BWL, OPT_IM8))
-ALT(DEF_ASM_OP2(inb, 0xec, 0, OPC_BWL, OPT_DX, OPT_EAX))
-ALT(DEF_ASM_OP1(inb, 0xec, 0, OPC_BWL, OPT_DX))
-
-ALT(DEF_ASM_OP2(outb, 0xe6, 0, OPC_BWL, OPT_EAX, OPT_IM8))
-ALT(DEF_ASM_OP1(outb, 0xe6, 0, OPC_BWL, OPT_IM8))
-ALT(DEF_ASM_OP2(outb, 0xee, 0, OPC_BWL, OPT_EAX, OPT_DX))
-ALT(DEF_ASM_OP1(outb, 0xee, 0, OPC_BWL, OPT_DX))
-
-ALT(DEF_ASM_OP2(leaw, 0x8d, 0, OPC_MODRM | OPC_WL, OPT_EA, OPT_REG))
-
-ALT(DEF_ASM_OP2(les, 0xc4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(lds, 0xc5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(lss, 0x0fb2, 0, OPC_MODRM, OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(lfs, 0x0fb4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
-ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
-
-     /* arith */
-ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */
-ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
-ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWL, OPT_IM, OPT_EAX))
-ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WL, OPT_IM8S, OPT_EA | OPT_REG))
-
-ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
-ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWL, OPT_IM, OPT_EAX))
-ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
-
-ALT(DEF_ASM_OP1(incw, 0x40, 0, OPC_REG | OPC_WL, OPT_REGW))
-ALT(DEF_ASM_OP1(incb, 0xfe, 0, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-ALT(DEF_ASM_OP1(decw, 0x48, 0, OPC_REG | OPC_WL, OPT_REGW))
-ALT(DEF_ASM_OP1(decb, 0xfe, 1, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-
-ALT(DEF_ASM_OP1(notb, 0xf6, 2, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-ALT(DEF_ASM_OP1(negb, 0xf6, 3, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-
-ALT(DEF_ASM_OP1(mulb, 0xf6, 4, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-ALT(DEF_ASM_OP1(imulb, 0xf6, 5, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-
-ALT(DEF_ASM_OP2(imulw, 0x0faf, 0, OPC_MODRM | OPC_WL, OPT_REG | OPT_EA, OPT_REG))
-ALT(DEF_ASM_OP3(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW | OPT_EA, OPT_REGW))
-ALT(DEF_ASM_OP2(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW))
-ALT(DEF_ASM_OP3(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW | OPT_EA, OPT_REGW))
-ALT(DEF_ASM_OP2(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW))
-
-ALT(DEF_ASM_OP1(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-ALT(DEF_ASM_OP2(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
-ALT(DEF_ASM_OP1(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
-ALT(DEF_ASM_OP2(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
-
-     /* shifts */
-ALT(DEF_ASM_OP2(rolb, 0xc0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_IM8, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP2(rolb, 0xd2, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_CL, OPT_EA | OPT_REG))
-ALT(DEF_ASM_OP1(rolb, 0xd0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_EA | OPT_REG))
-
-ALT(DEF_ASM_OP3(shldw, 0x0fa4, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
-ALT(DEF_ASM_OP3(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
-ALT(DEF_ASM_OP2(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
-ALT(DEF_ASM_OP3(shrdw, 0x0fac, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
-ALT(DEF_ASM_OP3(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
-ALT(DEF_ASM_OP2(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
-
-ALT(DEF_ASM_OP1(call, 0xff, 2, OPC_MODRM, OPT_INDIR))
-ALT(DEF_ASM_OP1(call, 0xe8, 0, OPC_JMP, OPT_ADDR))
-ALT(DEF_ASM_OP1(jmp, 0xff, 4, OPC_MODRM, OPT_INDIR))
-ALT(DEF_ASM_OP1(jmp, 0xeb, 0, OPC_SHORTJMP | OPC_JMP, OPT_ADDR))
-
-ALT(DEF_ASM_OP2(lcall, 0x9a, 0, 0, OPT_IM16, OPT_IM32))
-ALT(DEF_ASM_OP1(lcall, 0xff, 3, 0, OPT_EA))
-ALT(DEF_ASM_OP2(ljmp, 0xea, 0, 0, OPT_IM16, OPT_IM32))
-ALT(DEF_ASM_OP1(ljmp, 0xff, 5, 0, OPT_EA))
-
-ALT(DEF_ASM_OP1(int, 0xcd, 0, 0, OPT_IM8))
-ALT(DEF_ASM_OP1(seto, 0x0f90, 0, OPC_MODRM | OPC_TEST, OPT_REG8 | OPT_EA))
-    DEF_ASM_OP2(enter, 0xc8, 0, 0, OPT_IM16, OPT_IM8)
-    DEF_ASM_OP0(leave, 0xc9)
-    DEF_ASM_OP0(ret, 0xc3)
-ALT(DEF_ASM_OP1(ret, 0xc2, 0, 0, OPT_IM16))
-    DEF_ASM_OP0(lret, 0xcb)
-ALT(DEF_ASM_OP1(lret, 0xca, 0, 0, OPT_IM16))
-
-ALT(DEF_ASM_OP1(jo, 0x70, 0, OPC_SHORTJMP | OPC_JMP | OPC_TEST, OPT_ADDR))
-    DEF_ASM_OP1(loopne, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
-    DEF_ASM_OP1(loopnz, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
-    DEF_ASM_OP1(loope, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
-    DEF_ASM_OP1(loopz, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
-    DEF_ASM_OP1(loop, 0xe2, 0, OPC_SHORTJMP, OPT_ADDR)
-    DEF_ASM_OP1(jecxz, 0xe3, 0, OPC_SHORTJMP, OPT_ADDR)
-     
-     /* float */
-     /* specific fcomp handling */
-ALT(DEF_ASM_OP0L(fcomp, 0xd8d9, 0, 0))
-
-ALT(DEF_ASM_OP1(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST))
-ALT(DEF_ASM_OP2(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
-ALT(DEF_ASM_OP0L(fadd, 0xdec1, 0, OPC_FARITH))
-ALT(DEF_ASM_OP1(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST))
-ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
-ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST0, OPT_ST))
-ALT(DEF_ASM_OP0L(faddp, 0xdec1, 0, OPC_FARITH))
-ALT(DEF_ASM_OP1(fadds, 0xd8, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
-ALT(DEF_ASM_OP1(fiaddl, 0xda, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
-ALT(DEF_ASM_OP1(faddl, 0xdc, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
-ALT(DEF_ASM_OP1(fiadds, 0xde, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
-
-     DEF_ASM_OP0(fucompp, 0xdae9)
-     DEF_ASM_OP0(ftst, 0xd9e4)
-     DEF_ASM_OP0(fxam, 0xd9e5)
-     DEF_ASM_OP0(fld1, 0xd9e8)
-     DEF_ASM_OP0(fldl2t, 0xd9e9)
-     DEF_ASM_OP0(fldl2e, 0xd9ea)
-     DEF_ASM_OP0(fldpi, 0xd9eb)
-     DEF_ASM_OP0(fldlg2, 0xd9ec)
-     DEF_ASM_OP0(fldln2, 0xd9ed)
-     DEF_ASM_OP0(fldz, 0xd9ee)
-
-     DEF_ASM_OP0(f2xm1, 0xd9f0)
-     DEF_ASM_OP0(fyl2x, 0xd9f1)
-     DEF_ASM_OP0(fptan, 0xd9f2)
-     DEF_ASM_OP0(fpatan, 0xd9f3)
-     DEF_ASM_OP0(fxtract, 0xd9f4)
-     DEF_ASM_OP0(fprem1, 0xd9f5)
-     DEF_ASM_OP0(fdecstp, 0xd9f6)
-     DEF_ASM_OP0(fincstp, 0xd9f7)
-     DEF_ASM_OP0(fprem, 0xd9f8)
-     DEF_ASM_OP0(fyl2xp1, 0xd9f9)
-     DEF_ASM_OP0(fsqrt, 0xd9fa)
-     DEF_ASM_OP0(fsincos, 0xd9fb)
-     DEF_ASM_OP0(frndint, 0xd9fc)
-     DEF_ASM_OP0(fscale, 0xd9fd)
-     DEF_ASM_OP0(fsin, 0xd9fe)
-     DEF_ASM_OP0(fcos, 0xd9ff)
-     DEF_ASM_OP0(fchs, 0xd9e0)
-     DEF_ASM_OP0(fabs, 0xd9e1)
-     DEF_ASM_OP0(fninit, 0xdbe3)
-     DEF_ASM_OP0(fnclex, 0xdbe2)
-     DEF_ASM_OP0(fnop, 0xd9d0)
-     DEF_ASM_OP0(fwait, 0x9b)
-
-    /* fp load */
-    DEF_ASM_OP1(fld, 0xd9c0, 0, OPC_REG, OPT_ST)
-    DEF_ASM_OP1(fldl, 0xd9c0, 0, OPC_REG, OPT_ST)
-    DEF_ASM_OP1(flds, 0xd9, 0, OPC_MODRM, OPT_EA)
-ALT(DEF_ASM_OP1(fldl, 0xdd, 0, OPC_MODRM, OPT_EA))
-    DEF_ASM_OP1(fildl, 0xdb, 0, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fildq, 0xdf, 5, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fildll, 0xdf, 5, OPC_MODRM,OPT_EA)
-    DEF_ASM_OP1(fldt, 0xdb, 5, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fbld, 0xdf, 4, OPC_MODRM, OPT_EA)
-    
-    /* fp store */
-    DEF_ASM_OP1(fst, 0xddd0, 0, OPC_REG, OPT_ST)
-    DEF_ASM_OP1(fstl, 0xddd0, 0, OPC_REG, OPT_ST)
-    DEF_ASM_OP1(fsts, 0xd9, 2, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fstps, 0xd9, 3, OPC_MODRM, OPT_EA)
-ALT(DEF_ASM_OP1(fstl, 0xdd, 2, OPC_MODRM, OPT_EA))
-    DEF_ASM_OP1(fstpl, 0xdd, 3, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fist, 0xdf, 2, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fistp, 0xdf, 3, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fistl, 0xdb, 2, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fistpl, 0xdb, 3, OPC_MODRM, OPT_EA)
-
-    DEF_ASM_OP1(fstp, 0xddd8, 0, OPC_REG, OPT_ST)
-    DEF_ASM_OP1(fistpq, 0xdf, 7, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fistpll, 0xdf, 7, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fstpt, 0xdb, 7, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(fbstp, 0xdf, 6, OPC_MODRM, OPT_EA)
-
-    /* exchange */
-    DEF_ASM_OP0(fxch, 0xd9c9)
-ALT(DEF_ASM_OP1(fxch, 0xd9c8, 0, OPC_REG, OPT_ST))
-
-    /* misc FPU */
-    DEF_ASM_OP1(fucom, 0xdde0, 0, OPC_REG, OPT_ST )
-    DEF_ASM_OP1(fucomp, 0xdde8, 0, OPC_REG, OPT_ST )
-
-    DEF_ASM_OP0L(finit, 0xdbe3, 0, OPC_FWAIT)
-    DEF_ASM_OP1(fldcw, 0xd9, 5, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fnstcw, 0xd9, 7, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fstcw, 0xd9, 7, OPC_MODRM | OPC_FWAIT, OPT_EA )
-    DEF_ASM_OP0(fnstsw, 0xdfe0)
-ALT(DEF_ASM_OP1(fnstsw, 0xdfe0, 0, 0, OPT_EAX ))
-ALT(DEF_ASM_OP1(fnstsw, 0xdd, 7, OPC_MODRM, OPT_EA ))
-    DEF_ASM_OP1(fstsw, 0xdfe0, 0, OPC_FWAIT, OPT_EAX )
-ALT(DEF_ASM_OP0L(fstsw, 0xdfe0, 0, OPC_FWAIT))
-ALT(DEF_ASM_OP1(fstsw, 0xdd, 7, OPC_MODRM | OPC_FWAIT, OPT_EA ))
-    DEF_ASM_OP0L(fclex, 0xdbe2, 0, OPC_FWAIT)
-    DEF_ASM_OP1(fnstenv, 0xd9, 6, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fstenv, 0xd9, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
-    DEF_ASM_OP1(fldenv, 0xd9, 4, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fnsave, 0xdd, 6, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fsave, 0xdd, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
-    DEF_ASM_OP1(frstor, 0xdd, 4, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(ffree, 0xddc0, 4, OPC_REG, OPT_ST )
-    DEF_ASM_OP1(ffreep, 0xdfc0, 4, OPC_REG, OPT_ST )
-    DEF_ASM_OP1(fxsave, 0x0fae, 0, OPC_MODRM, OPT_EA )
-    DEF_ASM_OP1(fxrstor, 0x0fae, 1, OPC_MODRM, OPT_EA )
-
-    /* segments */
-    DEF_ASM_OP2(arpl, 0x63, 0, OPC_MODRM, OPT_REG16, OPT_REG16 | OPT_EA)
-    DEF_ASM_OP2(lar, 0x0f02, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG32)
-    DEF_ASM_OP1(lgdt, 0x0f01, 2, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(lidt, 0x0f01, 3, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(lldt, 0x0f00, 2, OPC_MODRM, OPT_EA | OPT_REG)
-    DEF_ASM_OP1(lmsw, 0x0f01, 6, OPC_MODRM, OPT_EA | OPT_REG)
-ALT(DEF_ASM_OP2(lslw, 0x0f03, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_REG))
-    DEF_ASM_OP1(ltr, 0x0f00, 3, OPC_MODRM, OPT_EA | OPT_REG)
-    DEF_ASM_OP1(sgdt, 0x0f01, 0, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(sidt, 0x0f01, 1, OPC_MODRM, OPT_EA)
-    DEF_ASM_OP1(sldt, 0x0f00, 0, OPC_MODRM, OPT_REG | OPT_EA)
-    DEF_ASM_OP1(smsw, 0x0f01, 4, OPC_MODRM, OPT_REG | OPT_EA)
-    DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM, OPT_REG16| OPT_EA)
-    DEF_ASM_OP1(verr, 0x0f00, 4, OPC_MODRM, OPT_REG | OPT_EA)
-    DEF_ASM_OP1(verw, 0x0f00, 5, OPC_MODRM, OPT_REG | OPT_EA)
-
-    /* 486 */
-    DEF_ASM_OP1(bswap, 0x0fc8, 0, OPC_REG, OPT_REG32 )
-ALT(DEF_ASM_OP2(xaddb, 0x0fc0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
-ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
-    DEF_ASM_OP1(invlpg, 0x0f01, 7, OPC_MODRM, OPT_EA )
-
-    DEF_ASM_OP2(boundl, 0x62, 0, OPC_MODRM, OPT_REG32, OPT_EA)
-    DEF_ASM_OP2(boundw, 0x62, 0, OPC_MODRM | OPC_D16, OPT_REG16, OPT_EA)
-
-    /* pentium */
-    DEF_ASM_OP1(cmpxchg8b, 0x0fc7, 1, OPC_MODRM, OPT_EA )
-    
-    /* pentium pro */
-    ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST, OPT_REG32 | OPT_EA, OPT_REG32))
-
-    DEF_ASM_OP2(fcmovb, 0xdac0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmove, 0xdac8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovbe, 0xdad0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovu, 0xdad8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovnb, 0xdbc0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovne, 0xdbc8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovnbe, 0xdbd0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcmovnu, 0xdbd8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-
-    DEF_ASM_OP2(fucomi, 0xdbe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcomi, 0xdbf0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fucomip, 0xdfe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
-    DEF_ASM_OP2(fcomip, 0xdff0, 0, OPC_REG, OPT_ST, OPT_ST0 )
-
-    /* mmx */
-    DEF_ASM_OP0(emms, 0x0f77) /* must be last OP0 */
-    DEF_ASM_OP2(movd, 0x0f6e, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_MMX )
-ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_REG32 ))
-    DEF_ASM_OP2(movq, 0x0f6f, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(movq, 0x0f7f, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_MMX ))
-    DEF_ASM_OP2(packssdw, 0x0f6b, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(packsswb, 0x0f63, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(packuswb, 0x0f67, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddb, 0x0ffc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddw, 0x0ffd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddd, 0x0ffe, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddsb, 0x0fec, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddsw, 0x0fed, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddusb, 0x0fdc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(paddusw, 0x0fdd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pand, 0x0fdb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pandn, 0x0fdf, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpeqb, 0x0f74, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpeqw, 0x0f75, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpeqd, 0x0f76, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpgtb, 0x0f64, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpgtw, 0x0f65, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pcmpgtd, 0x0f66, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pmaddwd, 0x0ff5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pmulhw, 0x0fe5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pmullw, 0x0fd5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(por, 0x0feb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psllw, 0x0ff1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psllw, 0x0f71, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(pslld, 0x0ff2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(pslld, 0x0f72, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psllq, 0x0ff3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psllq, 0x0f73, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psraw, 0x0fe1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psraw, 0x0f71, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psrad, 0x0fe2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psrad, 0x0f72, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psrlw, 0x0fd1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psrlw, 0x0f71, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psrld, 0x0fd2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psrld, 0x0f72, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psrlq, 0x0fd3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
-    DEF_ASM_OP2(psubb, 0x0ff8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubw, 0x0ff9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubd, 0x0ffa, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubsb, 0x0fe8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubsw, 0x0fe9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubusb, 0x0fd8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(psubusw, 0x0fd9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpckhbw, 0x0f68, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpckhwd, 0x0f69, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpckhdq, 0x0f6a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-    DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
-
-#undef ALT
-#undef DEF_ASM_OP0
-#undef DEF_ASM_OP0L
-#undef DEF_ASM_OP1
-#undef DEF_ASM_OP2
-#undef DEF_ASM_OP3
--- a/i386-gen.c	Tue May 08 22:16:31 2007 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1033 +0,0 @@
-/*
- *  X86 code generator for TCC
- * 
- *  Copyright (c) 2001-2004 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/* number of available registers */
-#define NB_REGS             4
-
-/* a register can belong to several classes. The classes must be
-   sorted from more general to more precise (see gv2() code which does
-   assumptions on it). */
-#define RC_INT     0x0001 /* generic integer register */
-#define RC_FLOAT   0x0002 /* generic float register */
-#define RC_EAX     0x0004
-#define RC_ST0     0x0008 
-#define RC_ECX     0x0010
-#define RC_EDX     0x0020
-#define RC_IRET    RC_EAX /* function return: integer register */
-#define RC_LRET    RC_EDX /* function return: second integer register */
-#define RC_FRET    RC_ST0 /* function return: float register */
-
-/* pretty names for the registers */
-enum {
-    TREG_EAX = 0,
-    TREG_ECX,
-    TREG_EDX,
-    TREG_ST0,
-};
-
-int reg_classes[NB_REGS] = {
-    /* eax */ RC_INT | RC_EAX,
-    /* ecx */ RC_INT | RC_ECX,
-    /* edx */ RC_INT | RC_EDX,
-    /* st0 */ RC_FLOAT | RC_ST0,
-};
-
-/* return registers for function */
-#define REG_IRET TREG_EAX /* single word int return register */
-#define REG_LRET TREG_EDX /* second word return register (for long long) */
-#define REG_FRET TREG_ST0 /* float return register */
-
-/* defined if function parameters must be evaluated in reverse order */
-#define INVERT_FUNC_PARAMS
-
-/* defined if structures are passed as pointers. Otherwise structures
-   are directly pushed on stack. */
-//#define FUNC_STRUCT_PARAM_AS_PTR
-
-/* pointer size, in bytes */
-#define PTR_SIZE 4
-
-/* long double size and alignment, in bytes */
-#define LDOUBLE_SIZE  12
-#define LDOUBLE_ALIGN 4
-/* maximum alignment (for aligned attribute support) */
-#define MAX_ALIGN     8
-
-/******************************************************/
-/* ELF defines */
-
-#define EM_TCC_TARGET EM_386
-
-/* relocation type for 32 bit data relocation */
-#define R_DATA_32   R_386_32
-#define R_JMP_SLOT  R_386_JMP_SLOT
-#define R_COPY      R_386_COPY
-
-#define ELF_START_ADDR 0x08048000
-#define ELF_PAGE_SIZE  0x1000
-
-/******************************************************/
-
-static unsigned long func_sub_sp_offset;
-static unsigned long func_bound_offset;
-static int func_ret_sub;
-
-/* XXX: make it faster ? */
-void g(int c)
-{
-    int ind1;
-    ind1 = ind + 1;
-    if (ind1 > cur_text_section->data_allocated)
-        section_realloc(cur_text_section, ind1);
-    cur_text_section->data[ind] = c;
-    ind = ind1;
-}
-
-void o(unsigned int c)
-{
-    while (c) {
-        g(c);
-        c = c >> 8;
-    }
-}
-
-void gen_le32(int c)
-{
-    g(c);
-    g(c >> 8);
-    g(c >> 16);
-    g(c >> 24);
-}
-
-/* output a symbol and patch all calls to it */
-void gsym_addr(int t, int a)
-{
-    int n, *ptr;
-    while (t) {
-        ptr = (int *)(cur_text_section->data + t);
-        n = *ptr; /* next value */
-        *ptr = a - t - 4;
-        t = n;
-    }
-}
-
-void gsym(int t)
-{
-    gsym_addr(t, ind);
-}
-
-/* psym is used to put an instruction with a data field which is a
-   reference to a symbol. It is in fact the same as oad ! */
-#define psym oad
-
-/* instruction + 4 bytes data. Return the address of the data */
-static int oad(int c, int s)
-{
-    int ind1;
-
-    o(c);
-    ind1 = ind + 4;
-    if (ind1 > cur_text_section->data_allocated)
-        section_realloc(cur_text_section, ind1);
-    *(int *)(cur_text_section->data + ind) = s;
-    s = ind;
-    ind = ind1;
-    return s;
-}
-
-/* output constant with relocation if 'r & VT_SYM' is true */
-static void gen_addr32(int r, Sym *sym, int c)
-{
-    if (r & VT_SYM)
-        greloc(cur_text_section, sym, ind, R_386_32);
-    gen_le32(c);
-}
-
-/* generate a modrm reference. 'op_reg' contains the addtionnal 3
-   opcode bits */
-static void gen_modrm(int op_reg, int r, Sym *sym, int c)
-{
-    op_reg = op_reg << 3;
-    if ((r & VT_VALMASK) == VT_CONST) {
-        /* constant memory reference */
-        o(0x05 | op_reg);
-        gen_addr32(r, sym, c);
-    } else if ((r & VT_VALMASK) == VT_LOCAL) {
-        /* currently, we use only ebp as base */
-        if (c == (char)c) {
-            /* short reference */
-            o(0x45 | op_reg);
-            g(c);
-        } else {
-            oad(0x85 | op_reg, c);
-        }
-    } else {
-        g(0x00 | op_reg | (r & VT_VALMASK));
-    }
-}
-
-
-/* load 'r' from value 'sv' */
-void load(int r, SValue *sv)
-{
-    int v, t, ft, fc, fr;
-    SValue v1;
-
-    fr = sv->r;
-    ft = sv->type.t;
-    fc = sv->c.ul;
-
-    v = fr & VT_VALMASK;
-    if (fr & VT_LVAL) {
-        if (v == VT_LLOCAL) {
-            v1.type.t = VT_INT;
-            v1.r = VT_LOCAL | VT_LVAL;
-            v1.c.ul = fc;
-            load(r, &v1);
-            fr = r;
-        }
-        if ((ft & VT_BTYPE) == VT_FLOAT) {
-            o(0xd9); /* flds */
-            r = 0;
-        } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
-            o(0xdd); /* fldl */
-            r = 0;
-        } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
-            o(0xdb); /* fldt */
-            r = 5;
-        } else if ((ft & VT_TYPE) == VT_BYTE) {
-            o(0xbe0f);   /* movsbl */
-        } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
-            o(0xb60f);   /* movzbl */
-        } else if ((ft & VT_TYPE) == VT_SHORT) {
-            o(0xbf0f);   /* movswl */
-        } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
-            o(0xb70f);   /* movzwl */
-        } else {
-            o(0x8b);     /* movl */
-        }
-        gen_modrm(r, fr, sv->sym, fc);
-    } else {
-        if (v == VT_CONST) {
-            o(0xb8 + r); /* mov $xx, r */
-            gen_addr32(fr, sv->sym, fc);
-        } else if (v == VT_LOCAL) {
-            o(0x8d); /* lea xxx(%ebp), r */
-            gen_modrm(r, VT_LOCAL, sv->sym, fc);
-        } else if (v == VT_CMP) {
-            oad(0xb8 + r, 0); /* mov $0, r */
-            o(0x0f); /* setxx %br */
-            o(fc);
-            o(0xc0 + r);
-        } else if (v == VT_JMP || v == VT_JMPI) {
-            t = v & 1;
-            oad(0xb8 + r, t); /* mov $1, r */
-            o(0x05eb); /* jmp after */
-            gsym(fc);
-            oad(0xb8 + r, t ^ 1); /* mov $0, r */
-        } else if (v != r) {
-            o(0x89);
-            o(0xc0 + r + v * 8); /* mov v, r */
-        }
-    }
-}
-
-/* store register 'r' in lvalue 'v' */
-void store(int r, SValue *v)
-{
-    int fr, bt, ft, fc;
-
-    ft = v->type.t;
-    fc = v->c.ul;
-    fr = v->r & VT_VALMASK;
-    bt = ft & VT_BTYPE;
-    /* XXX: incorrect if float reg to reg */
-    if (bt == VT_FLOAT) {
-        o(0xd9); /* fsts */
-        r = 2;
-    } else if (bt == VT_DOUBLE) {
-        o(0xdd); /* fstpl */
-        r = 2;
-    } else if (bt == VT_LDOUBLE) {
-        o(0xc0d9); /* fld %st(0) */
-        o(0xdb); /* fstpt */
-        r = 7;
-    } else {
-        if (bt == VT_SHORT)
-            o(0x66);
-        if (bt == VT_BYTE || bt == VT_BOOL)
-            o(0x88);
-        else
-            o(0x89);
-    }
-    if (fr == VT_CONST ||
-        fr == VT_LOCAL ||
-        (v->r & VT_LVAL)) {
-        gen_modrm(r, v->r, v->sym, fc);
-    } else if (fr != r) {
-        o(0xc0 + fr + r * 8); /* mov r, fr */
-    }
-}
-
-static void gadd_sp(int val)
-{
-    if (val == (char)val) {
-        o(0xc483);
-        g(val);
-    } else {
-        oad(0xc481, val); /* add $xxx, %esp */
-    }
-}
-
-/* 'is_jmp' is '1' if it is a jump */
-static void gcall_or_jmp(int is_jmp)
-{
-    int r;
-    if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
-        /* constant case */
-        if (vtop->r & VT_SYM) {
-            /* relocation case */
-            greloc(cur_text_section, vtop->sym, 
-                   ind + 1, R_386_PC32);
-        } else {
-            /* put an empty PC32 relocation */
-            put_elf_reloc(symtab_section, cur_text_section, 
-                          ind + 1, R_386_PC32, 0);
-        }
-        oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
-    } else {
-        /* otherwise, indirect call */
-        r = gv(RC_INT);
-        o(0xff); /* call/jmp *r */
-        o(0xd0 + r + (is_jmp << 4));
-    }
-}
-
-static uint8_t fastcall_regs[3] = { TREG_EAX, TREG_EDX, TREG_ECX };
-static uint8_t fastcallw_regs[2] = { TREG_ECX, TREG_EDX };
-
-/* Generate function call. The function address is pushed first, then
-   all the parameters in call order. This functions pops all the
-   parameters and the function address. */
-void gfunc_call(int nb_args)
-{
-    int size, align, r, args_size, i, func_call;
-    Sym *func_sym;
-    
-    args_size = 0;
-    for(i = 0;i < nb_args; i++) {
-        if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
-            size = type_size(&vtop->type, &align);
-            /* align to stack align size */
-            size = (size + 3) & ~3;
-            /* allocate the necessary size on stack */
-            oad(0xec81, size); /* sub $xxx, %esp */
-            /* generate structure store */
-            r = get_reg(RC_INT);
-            o(0x89); /* mov %esp, r */
-            o(0xe0 + r);
-            vset(&vtop->type, r | VT_LVAL, 0);
-            vswap();
-            vstore();
-            args_size += size;
-        } else if (is_float(vtop->type.t)) {
-            gv(RC_FLOAT); /* only one float register */
-            if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
-                size = 4;
-            else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
-                size = 8;
-            else
-                size = 12;
-            oad(0xec81, size); /* sub $xxx, %esp */
-            if (size == 12)
-                o(0x7cdb);
-            else
-                o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */
-            g(0x24);
-            g(0x00);
-            args_size += size;
-        } else {
-            /* simple type (currently always same size) */
-            /* XXX: implicit cast ? */
-            r = gv(RC_INT);
-            if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
-                size = 8;
-                o(0x50 + vtop->r2); /* push r */
-            } else {
-                size = 4;
-            }
-            o(0x50 + r); /* push r */
-            args_size += size;
-        }
-        vtop--;
-    }
-    save_regs(0); /* save used temporary registers */
-    func_sym = vtop->type.ref;
-    func_call = func_sym->r;
-    /* fast call case */
-    if ((func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) ||
-        func_call == FUNC_FASTCALLW) {
-        int fastcall_nb_regs;
-        uint8_t *fastcall_regs_ptr;
-        if (func_call == FUNC_FASTCALLW) {
-            fastcall_regs_ptr = fastcallw_regs;
-            fastcall_nb_regs = 2;
-        } else {
-            fastcall_regs_ptr = fastcall_regs;
-            fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
-        }
-        for(i = 0;i < fastcall_nb_regs; i++) {
-            if (args_size <= 0)
-                break;
-            o(0x58 + fastcall_regs_ptr[i]); /* pop r */
-            /* XXX: incorrect for struct/floats */
-            args_size -= 4;
-        }
-    }
-    gcall_or_jmp(0);
-    if (args_size && func_sym->r != FUNC_STDCALL)
-        gadd_sp(args_size);
-    vtop--;
-}
-
-#ifdef TCC_TARGET_PE
-#define FUNC_PROLOG_SIZE 10
-#else
-#define FUNC_PROLOG_SIZE 9
-#endif
-
-/* generate function prolog of type 't' */
-void gfunc_prolog(CType *func_type)
-{
-    int addr, align, size, func_call, fastcall_nb_regs;
-    int param_index, param_addr;
-    uint8_t *fastcall_regs_ptr;
-    Sym *sym;
-    CType *type;
-
-    sym = func_type->ref;
-    func_call = sym->r;
-    addr = 8;
-    loc = 0;
-    if (func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) {
-        fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
-        fastcall_regs_ptr = fastcall_regs;
-    } else if (func_call == FUNC_FASTCALLW) {
-        fastcall_nb_regs = 2;
-        fastcall_regs_ptr = fastcallw_regs;
-    } else {
-        fastcall_nb_regs = 0;
-        fastcall_regs_ptr = NULL;
-    }
-    param_index = 0;
-
-    ind += FUNC_PROLOG_SIZE;
-    func_sub_sp_offset = ind;
-    /* if the function returns a structure, then add an
-       implicit pointer parameter */
-    func_vt = sym->type;
-    if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
-        /* XXX: fastcall case ? */
-        func_vc = addr;
-        addr += 4;
-        param_index++;
-    }
-    /* define parameters */
-    while ((sym = sym->next) != NULL) {
-        type = &sym->type;
-        size = type_size(type, &align);
-        size = (size + 3) & ~3;
-#ifdef FUNC_STRUCT_PARAM_AS_PTR
-        /* structs are passed as pointer */
-        if ((type->t & VT_BTYPE) == VT_STRUCT) {
-            size = 4;
-        }
-#endif
-        if (param_index < fastcall_nb_regs) {
-            /* save FASTCALL register */
-            loc -= 4;
-            o(0x89);     /* movl */
-            gen_modrm(fastcall_regs_ptr[param_index], VT_LOCAL, NULL, loc);
-            param_addr = loc;
-        } else {
-            param_addr = addr;
-            addr += size;
-        }
-        sym_push(sym->v & ~SYM_FIELD, type,
-                 VT_LOCAL | VT_LVAL, param_addr);
-        param_index++;
-    }
-    func_ret_sub = 0;
-    /* pascal type call ? */
-    if (func_call == FUNC_STDCALL)
-        func_ret_sub = addr - 8;
-
-    /* leave some room for bound checking code */
-    if (do_bounds_check) {
-        oad(0xb8, 0); /* lbound section pointer */
-        oad(0xb8, 0); /* call to function */
-        func_bound_offset = lbounds_section->data_offset;
-    }
-}
-
-/* generate function epilog */
-void gfunc_epilog(void)
-{
-    int v, saved_ind;
-
-#ifdef CONFIG_TCC_BCHECK
-    if (do_bounds_check && func_bound_offset != lbounds_section->data_offset) {
-        int saved_ind;
-        int *bounds_ptr;
-        Sym *sym, *sym_data;
-        /* add end of table info */
-        bounds_ptr = section_ptr_add(lbounds_section, sizeof(int));
-        *bounds_ptr = 0;
-        /* generate bound local allocation */
-        saved_ind = ind;
-        ind = func_sub_sp_offset;
-        sym_data = get_sym_ref(&char_pointer_type, lbounds_section, 
-                               func_bound_offset, lbounds_section->data_offset);
-        greloc(cur_text_section, sym_data,
-               ind + 1, R_386_32);
-        oad(0xb8, 0); /* mov %eax, xxx */
-        sym = external_global_sym(TOK___bound_local_new, &func_old_type, 0);
-        greloc(cur_text_section, sym, 
-               ind + 1, R_386_PC32);
-        oad(0xe8, -4);
-        ind = saved_ind;
-        /* generate bound check local freeing */
-        o(0x5250); /* save returned value, if any */
-        greloc(cur_text_section, sym_data,
-               ind + 1, R_386_32);
-        oad(0xb8, 0); /* mov %eax, xxx */
-        sym = external_global_sym(TOK___bound_local_delete, &func_old_type, 0);
-        greloc(cur_text_section, sym, 
-               ind + 1, R_386_PC32);
-        oad(0xe8, -4);
-        o(0x585a); /* restore returned value, if any */
-    }
-#endif
-    o(0xc9); /* leave */
-    if (func_ret_sub == 0) {
-        o(0xc3); /* ret */
-    } else {
-        o(0xc2); /* ret n */
-        g(func_ret_sub);
-        g(func_ret_sub >> 8);
-    }
-    /* align local size to word & save local variables */
-    
-    v = (-loc + 3) & -4; 
-    saved_ind = ind;
-    ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
-#ifdef TCC_TARGET_PE
-    if (v >= 4096) {
-        Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
-        oad(0xb8, v); /* mov stacksize, %eax */
-        oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
-        greloc(cur_text_section, sym, ind-4, R_386_PC32);
-    } else
-#endif
-    {
-        o(0xe58955);  /* push %ebp, mov %esp, %ebp */
-        o(0xec81);  /* sub esp, stacksize */
-        gen_le32(v);
-#if FUNC_PROLOG_SIZE == 10
-        o(0x90);  /* adjust to FUNC_PROLOG_SIZE */
-#endif
-    }
-    ind = saved_ind;
-}
-
-/* generate a jump to a label */
-int gjmp(int t)
-{
-    return psym(0xe9, t);
-}
-
-/* generate a jump to a fixed address */
-void gjmp_addr(int a)
-{
-    int r;
-    r = a - ind - 2;
-    if (r == (char)r) {
-        g(0xeb);
-        g(r);
-    } else {
-        oad(0xe9, a - ind - 5);
-    }
-}
-
-/* generate a test. set 'inv' to invert test. Stack entry is popped */
-int gtst(int inv, int t)
-{
-    int v, *p;
-
-    v = vtop->r & VT_VALMASK;
-    if (v == VT_CMP) {
-        /* fast case : can jump directly since flags are set */
-        g(0x0f);
-        t = psym((vtop->c.i - 16) ^ inv, t);
-    } else if (v == VT_JMP || v == VT_JMPI) {
-        /* && or || optimization */
-        if ((v & 1) == inv) {
-            /* insert vtop->c jump list in t */
-            p = &vtop->c.i;
-            while (*p != 0)
-                p = (int *)(cur_text_section->data + *p);
-            *p = t;
-            t = vtop->c.i;
-        } else {
-            t = gjmp(t);
-            gsym(vtop->c.i);
-        }
-    } else {
-        if (is_float(vtop->type.t) || is_llong(vtop->type.t)) {
-            /* compare != 0 to get a 32-bit int for testing */
-            vpushi(0);
-            gen_op(TOK_NE);
-        }
-        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
-            /* constant jmp optimization */
-            if ((vtop->c.i != 0) != inv) 
-                t = gjmp(t);
-        } else {
-            v = gv(RC_INT);
-            o(0x85);
-            o(0xc0 + v * 9);
-            g(0x0f);
-            t = psym(0x85 ^ inv, t);
-        }
-    }
-    vtop--;
-    return t;
-}
-
-/* generate an integer binary operation */
-void gen_opi(int op)
-{
-    int r, fr, opc, c;
-
-    switch(op) {
-    case '+':
-    case TOK_ADDC1: /* add with carry generation */
-        opc = 0;
-    gen_op8:
-        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
-            /* constant case */
-            vswap();
-            r = gv(RC_INT);
-            vswap();
-            c = vtop->c.i;
-            if (c == (char)c) {
-                /* XXX: generate inc and dec for smaller code ? */
-                o(0x83);
-                o(0xc0 | (opc << 3) | r);
-                g(c);
-            } else {
-                o(0x81);
-                oad(0xc0 | (opc << 3) | r, c);
-            }
-        } else {
-            gv2(RC_INT, RC_INT);
-            r = vtop[-1].r;
-            fr = vtop[0].r;
-            o((opc << 3) | 0x01);
-            o(0xc0 + r + fr * 8); 
-        }
-        vtop--;
-        if (op >= TOK_ULT && op <= TOK_GT) {
-            vtop->r = VT_CMP;
-            vtop->c.i = op;
-        }
-        break;
-    case '-':
-    case TOK_SUBC1: /* sub with carry generation */
-        opc = 5;
-        goto gen_op8;
-    case TOK_ADDC2: /* add with carry use */
-        opc = 2;
-        goto gen_op8;
-    case TOK_SUBC2: /* sub with carry use */
-        opc = 3;
-        goto gen_op8;
-    case '&':
-        opc = 4;
-        goto gen_op8;
-    case '^':
-        opc = 6;
-        goto gen_op8;
-    case '|':
-        opc = 1;
-        goto gen_op8;
-    case '*':
-        gv2(RC_INT, RC_INT);
-        r = vtop[-1].r;
-        fr = vtop[0].r;
-        vtop--;
-        o(0xaf0f); /* imul fr, r */
-        o(0xc0 + fr + r * 8);
-        break;
-    case TOK_SHL:
-        opc = 4;
-        goto gen_shift;
-    case TOK_SHR:
-        opc = 5;
-        goto gen_shift;
-    case TOK_SAR:
-        opc = 7;
-    gen_shift:
-        opc = 0xc0 | (opc << 3);
-        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
-            /* constant case */
-            vswap();
-            r = gv(RC_INT);
-            vswap();
-            c = vtop->c.i & 0x1f;
-            o(0xc1); /* shl/shr/sar $xxx, r */
-            o(opc | r);
-            g(c);
-        } else {
-            /* we generate the shift in ecx */
-            gv2(RC_INT, RC_ECX);
-            r = vtop[-1].r;
-            o(0xd3); /* shl/shr/sar %cl, r */
-            o(opc | r);
-        }
-        vtop--;
-        break;
-    case '/':
-    case TOK_UDIV:
-    case TOK_PDIV:
-    case '%':
-    case TOK_UMOD:
-    case TOK_UMULL:
-        /* first operand must be in eax */
-        /* XXX: need better constraint for second operand */
-        gv2(RC_EAX, RC_ECX);
-        r = vtop[-1].r;
-        fr = vtop[0].r;
-        vtop--;
-        save_reg(TREG_EDX);
-        if (op == TOK_UMULL) {
-            o(0xf7); /* mul fr */
-            o(0xe0 + fr);
-            vtop->r2 = TREG_EDX;
-            r = TREG_EAX;
-        } else {
-            if (op == TOK_UDIV || op == TOK_UMOD) {
-                o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
-                o(0xf0 + fr);
-            } else {
-                o(0xf799); /* cltd, idiv fr, %eax */
-                o(0xf8 + fr);
-            }
-            if (op == '%' || op == TOK_UMOD)
-                r = TREG_EDX;
-            else
-                r = TREG_EAX;
-        }
-        vtop->r = r;
-        break;
-    default:
-        opc = 7;
-        goto gen_op8;
-    }
-}
-
-/* generate a floating point operation 'v = t1 op t2' instruction. The
-   two operands are guaranted to have the same floating point type */
-/* XXX: need to use ST1 too */
-void gen_opf(int op)
-{
-    int a, ft, fc, swapped, r;
-
-    /* convert constants to memory references */
-    if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
-        vswap();
-        gv(RC_FLOAT);
-        vswap();
-    }
-    if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
-        gv(RC_FLOAT);
-
-    /* must put at least one value in the floating point register */
-    if ((vtop[-1].r & VT_LVAL) &&
-        (vtop[0].r & VT_LVAL)) {
-        vswap();
-        gv(RC_FLOAT);
-        vswap();
-    }
-    swapped = 0;
-    /* swap the stack if needed so that t1 is the register and t2 is
-       the memory reference */
-    if (vtop[-1].r & VT_LVAL) {
-        vswap();
-        swapped = 1;
-    }
-    if (op >= TOK_ULT && op <= TOK_GT) {
-        /* load on stack second operand */
-        load(TREG_ST0, vtop);
-        save_reg(TREG_EAX); /* eax is used by FP comparison code */
-        if (op == TOK_GE || op == TOK_GT)
-            swapped = !swapped;
-        else if (op == TOK_EQ || op == TOK_NE)
-            swapped = 0;
-        if (swapped)
-            o(0xc9d9); /* fxch %st(1) */
-        o(0xe9da); /* fucompp */
-        o(0xe0df); /* fnstsw %ax */
-        if (op == TOK_EQ) {
-            o(0x45e480); /* and $0x45, %ah */
-            o(0x40fC80); /* cmp $0x40, %ah */
-        } else if (op == TOK_NE) {
-            o(0x45e480); /* and $0x45, %ah */
-            o(0x40f480); /* xor $0x40, %ah */
-            op = TOK_NE;
-        } else if (op == TOK_GE || op == TOK_LE) {
-            o(0x05c4f6); /* test $0x05, %ah */
-            op = TOK_EQ;
-        } else {
-            o(0x45c4f6); /* test $0x45, %ah */
-            op = TOK_EQ;
-        }
-        vtop--;
-        vtop->r = VT_CMP;
-        vtop->c.i = op;
-    } else {
-        /* no memory reference possible for long double operations */
-        if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
-            load(TREG_ST0, vtop);
-            swapped = !swapped;
-        }
-        
-        switch(op) {
-        default:
-        case '+':
-            a = 0;
-            break;
-        case '-':
-            a = 4;
-            if (swapped)
-                a++;
-            break;
-        case '*':
-            a = 1;
-            break;
-        case '/':
-            a = 6;
-            if (swapped)
-                a++;
-            break;
-        }
-        ft = vtop->type.t;
-        fc = vtop->c.ul;
-        if ((ft & VT_BTYPE) == VT_LDOUBLE) {
-            o(0xde); /* fxxxp %st, %st(1) */
-            o(0xc1 + (a << 3));
-        } else {
-            /* if saved lvalue, then we must reload it */
-            r = vtop->r;
-            if ((r & VT_VALMASK) == VT_LLOCAL) {
-                SValue v1;
-                r = get_reg(RC_INT);
-                v1.type.t = VT_INT;
-                v1.r = VT_LOCAL | VT_LVAL;
-                v1.c.ul = fc;
-                load(r, &v1);
-                fc = 0;
-            }
-
-            if ((ft & VT_BTYPE) == VT_DOUBLE)
-                o(0xdc);
-            else
-                o(0xd8);
-            gen_modrm(a, r, vtop->sym, fc);
-        }
-        vtop--;
-    }
-}
-
-/* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
-   and 'long long' cases. */
-void gen_cvt_itof(int t)
-{
-    save_reg(TREG_ST0);
-    gv(RC_INT);
-    if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
-        /* signed long long to float/double/long double (unsigned case
-           is handled generically) */
-        o(0x50 + vtop->r2); /* push r2 */
-        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-        o(0x242cdf); /* fildll (%esp) */
-        o(0x08c483); /* add $8, %esp */
-    } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == 
-               (VT_INT | VT_UNSIGNED)) {
-        /* unsigned int to float/double/long double */
-        o(0x6a); /* push $0 */
-        g(0x00);
-        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-        o(0x242cdf); /* fildll (%esp) */
-        o(0x08c483); /* add $8, %esp */
-    } else {
-        /* int to float/double/long double */
-        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-        o(0x2404db); /* fildl (%esp) */
-        o(0x04c483); /* add $4, %esp */
-    }
-    vtop->r = TREG_ST0;
-}
-
-/* convert fp to int 't' type */
-/* XXX: handle long long case */
-void gen_cvt_ftoi(int t)
-{
-    int r, r2, size;
-    Sym *sym;
-    CType ushort_type;
-
-    ushort_type.t = VT_SHORT | VT_UNSIGNED;
-
-    gv(RC_FLOAT);
-    if (t != VT_INT)
-        size = 8;
-    else 
-        size = 4;
-    
-    o(0x2dd9); /* ldcw xxx */
-    sym = external_global_sym(TOK___tcc_int_fpu_control, 
-                              &ushort_type, VT_LVAL);
-    greloc(cur_text_section, sym, 
-           ind, R_386_32);
-    gen_le32(0);
-    
-    oad(0xec81, size); /* sub $xxx, %esp */
-    if (size == 4)
-        o(0x1cdb); /* fistpl */
-    else
-        o(0x3cdf); /* fistpll */
-    o(0x24);
-    o(0x2dd9); /* ldcw xxx */
-    sym = external_global_sym(TOK___tcc_fpu_control, 
-                              &ushort_type, VT_LVAL);
-    greloc(cur_text_section, sym, 
-           ind, R_386_32);
-    gen_le32(0);
-
-    r = get_reg(RC_INT);
-    o(0x58 + r); /* pop r */
-    if (size == 8) {
-        if (t == VT_LLONG) {
-            vtop->r = r; /* mark reg as used */
-            r2 = get_reg(RC_INT);
-            o(0x58 + r2); /* pop r2 */
-            vtop->r2 = r2;
-        } else {
-            o(0x04c483); /* add $4, %esp */
-        }
-    }
-    vtop->r = r;
-}
-
-/* convert from one floating point type to another */
-void gen_cvt_ftof(int t)
-{
-    /* all we have to do on i386 is to put the float in a register */
-    gv(RC_FLOAT);
-}
-
-/* computed goto support */
-void ggoto(void)
-{
-    gcall_or_jmp(1);
-    vtop--;
-}
-
-/* bound check support functions */
-#ifdef CONFIG_TCC_BCHECK
-
-/* generate a bounded pointer addition */
-void gen_bounded_ptr_add(void)
-{
-    Sym *sym;
-
-    /* prepare fast i386 function call (args in eax and edx) */
-    gv2(RC_EAX, RC_EDX);
-    /* save all temporary registers */
-    vtop -= 2;
-    save_regs(0);
-    /* do a fast function call */
-    sym = external_global_sym(TOK___bound_ptr_add, &func_old_type, 0);
-    greloc(cur_text_section, sym, 
-           ind + 1, R_386_PC32);
-    oad(0xe8, -4);
-    /* returned pointer is in eax */
-    vtop++;
-    vtop->r = TREG_EAX | VT_BOUNDED;
-    /* address of bounding function call point */
-    vtop->c.ul = (cur_text_section->reloc->data_offset - sizeof(Elf32_Rel)); 
-}
-
-/* patch pointer addition in vtop so that pointer dereferencing is
-   also tested */
-void gen_bounded_ptr_deref(void)
-{
-    int func;
-    int size, align;
-    Elf32_Rel *rel;
-    Sym *sym;
-
-    size = 0;
-    /* XXX: put that code in generic part of tcc */
-    if (!is_float(vtop->type.t)) {
-        if (vtop->r & VT_LVAL_BYTE)
-            size = 1;
-        else if (vtop->r & VT_LVAL_SHORT)
-            size = 2;
-    }
-    if (!size)
-        size = type_size(&vtop->type, &align);
-    switch(size) {
-    case  1: func = TOK___bound_ptr_indir1; break;
-    case  2: func = TOK___bound_ptr_indir2; break;
-    case  4: func = TOK___bound_ptr_indir4; break;
-    case  8: func = TOK___bound_ptr_indir8; break;
-    case 12: func = TOK___bound_ptr_indir12; break;
-    case 16: func = TOK___bound_ptr_indir16; break;
-    default:
-        error("unhandled size when derefencing bounded pointer");
-        func = 0;
-        break;
-    }
-
-    /* patch relocation */
-    /* XXX: find a better solution ? */
-    rel = (Elf32_Rel *)(cur_text_section->reloc->data + vtop->c.ul);
-    sym = external_global_sym(func, &func_old_type, 0);
-    if (!sym->c)
-        put_extern_sym(sym, NULL, 0, 0);
-    rel->r_info = ELF32_R_INFO(sym->c, ELF32_R_TYPE(rel->r_info));
-}
-#endif
-
-/* end of X86 code generator */
-/*************************************************************/
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i386/i386-asm.c	Sat May 12 00:15:39 2007 -0400
@@ -0,0 +1,1209 @@
+/*
+ *  i386 specific functions for TCC assembler
+ * 
+ *  Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define MAX_OPERANDS 3
+
+typedef struct ASMInstr {
+    uint16_t sym;
+    uint16_t opcode;
+    uint16_t instr_type;
+#define OPC_JMP       0x01  /* jmp operand */
+#define OPC_B         0x02  /* only used zith OPC_WL */
+#define OPC_WL        0x04  /* accepts w, l or no suffix */
+#define OPC_BWL       (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
+#define OPC_REG       0x08 /* register is added to opcode */
+#define OPC_MODRM     0x10 /* modrm encoding */
+#define OPC_FWAIT     0x20 /* add fwait opcode */
+#define OPC_TEST      0x40 /* test opcodes */
+#define OPC_SHIFT     0x80 /* shift opcodes */
+#define OPC_D16      0x0100 /* generate data16 prefix */
+#define OPC_ARITH    0x0200 /* arithmetic opcodes */
+#define OPC_SHORTJMP 0x0400 /* short jmp operand */
+#define OPC_FARITH   0x0800 /* FPU arithmetic opcodes */
+#define OPC_GROUP_SHIFT 13
+
+/* in order to compress the operand type, we use specific operands and
+   we or only with EA  */ 
+#define OPT_REG8  0 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_REG16 1 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_REG32 2 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_MMX   3 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_SSE   4 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_CR    5 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_TR    6 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_DB    7 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_SEG   8
+#define OPT_ST    9
+#define OPT_IM8   10
+#define OPT_IM8S  11
+#define OPT_IM16  12
+#define OPT_IM32  13
+#define OPT_EAX   14 /* %al, %ax or %eax register */
+#define OPT_ST0   15 /* %st(0) register */
+#define OPT_CL    16 /* %cl register */
+#define OPT_DX    17 /* %dx register */
+#define OPT_ADDR  18 /* OP_EA with only offset */
+#define OPT_INDIR 19 /* *(expr) */
+
+/* composite types */ 
+#define OPT_COMPOSITE_FIRST   20
+#define OPT_IM       20 /* IM8 | IM16 | IM32 */
+#define OPT_REG      21 /* REG8 | REG16 | REG32 */ 
+#define OPT_REGW     22 /* REG16 | REG32 */
+#define OPT_IMW      23 /* IM16 | IM32 */ 
+
+/* can be ored with any OPT_xxx */
+#define OPT_EA    0x80
+
+    uint8_t nb_ops;
+    uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
+} ASMInstr;
+
+typedef struct Operand {
+    uint32_t type;
+#define OP_REG8   (1 << OPT_REG8)
+#define OP_REG16  (1 << OPT_REG16)
+#define OP_REG32  (1 << OPT_REG32)
+#define OP_MMX    (1 << OPT_MMX)
+#define OP_SSE    (1 << OPT_SSE)
+#define OP_CR     (1 << OPT_CR)
+#define OP_TR     (1 << OPT_TR)
+#define OP_DB     (1 << OPT_DB)
+#define OP_SEG    (1 << OPT_SEG)
+#define OP_ST     (1 << OPT_ST)
+#define OP_IM8    (1 << OPT_IM8)
+#define OP_IM8S   (1 << OPT_IM8S)
+#define OP_IM16   (1 << OPT_IM16)
+#define OP_IM32   (1 << OPT_IM32)
+#define OP_EAX    (1 << OPT_EAX)
+#define OP_ST0    (1 << OPT_ST0)
+#define OP_CL     (1 << OPT_CL)
+#define OP_DX     (1 << OPT_DX)
+#define OP_ADDR   (1 << OPT_ADDR)
+#define OP_INDIR  (1 << OPT_INDIR)
+
+#define OP_EA     0x40000000
+#define OP_REG    (OP_REG8 | OP_REG16 | OP_REG32)
+#define OP_IM     OP_IM32
+    int8_t  reg; /* register, -1 if none */
+    int8_t  reg2; /* second register, -1 if none */
+    uint8_t shift;
+    ExprValue e;
+} Operand;
+
+static const uint8_t reg_to_size[5] = {
+    [OP_REG8] = 0,
+    [OP_REG16] = 1,
+    [OP_REG32] = 2,
+};
+    
+#define WORD_PREFIX_OPCODE 0x66
+
+#define NB_TEST_OPCODES 30
+
+static const uint8_t test_bits[NB_TEST_OPCODES] = {
+ 0x00, /* o */
+ 0x01, /* no */
+ 0x02, /* b */
+ 0x02, /* c */
+ 0x02, /* nae */
+ 0x03, /* nb */
+ 0x03, /* nc */
+ 0x03, /* ae */
+ 0x04, /* e */
+ 0x04, /* z */
+ 0x05, /* ne */
+ 0x05, /* nz */
+ 0x06, /* be */
+ 0x06, /* na */
+ 0x07, /* nbe */
+ 0x07, /* a */
+ 0x08, /* s */
+ 0x09, /* ns */
+ 0x0a, /* p */
+ 0x0a, /* pe */
+ 0x0b, /* np */
+ 0x0b, /* po */
+ 0x0c, /* l */
+ 0x0c, /* nge */
+ 0x0d, /* nl */
+ 0x0d, /* ge */
+ 0x0e, /* le */
+ 0x0e, /* ng */
+ 0x0f, /* nle */
+ 0x0f, /* g */
+};
+
+static const uint8_t segment_prefixes[] = {
+ 0x26, /* es */
+ 0x2e, /* cs */
+ 0x36, /* ss */
+ 0x3e, /* ds */
+ 0x64, /* fs */
+ 0x65  /* gs */
+};
+
+static const ASMInstr asm_instrs[] = {
+#define ALT(x) x
+#define DEF_ASM_OP0(name, opcode)
+#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
+#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
+#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
+#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
+#include "i386-asm.h"
+
+    /* last operation */
+    { 0, },
+};
+
+static const uint16_t op0_codes[] = {
+#define ALT(x)
+#define DEF_ASM_OP0(x, opcode) opcode,
+#define DEF_ASM_OP0L(name, opcode, group, instr_type)
+#define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
+#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
+#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
+#include "i386-asm.h"
+};
+
+static inline int get_reg_shift(TCCState *s1)
+{
+    int shift, v;
+
+    v = asm_int_expr(s1);
+    switch(v) {
+    case 1:
+        shift = 0;
+        break;
+    case 2:
+        shift = 1;
+        break;
+    case 4:
+        shift = 2;
+        break;
+    case 8:
+        shift = 3;
+        break;
+    default:
+        expect("1, 2, 4 or 8 constant");
+        shift = 0;
+        break;
+    }
+    return shift;
+}
+
+static int asm_parse_reg(void)
+{
+    int reg;
+    if (tok != '%')
+        goto error_32;
+    next();
+    if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
+        reg = tok - TOK_ASM_eax;
+        next();
+        return reg;
+    } else {
+    error_32:
+        expect("32 bit register");
+        return 0;
+    }
+}
+
+static void parse_operand(TCCState *s1, Operand *op)
+{
+    ExprValue e;
+    int reg, indir;
+    const char *p;
+
+    indir = 0;
+    if (tok == '*') {
+        next();
+        indir = OP_INDIR;
+    }
+
+    if (tok == '%') {
+        next();
+        if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
+            reg = tok - TOK_ASM_al;
+            op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
+            op->reg = reg & 7;
+            if ((op->type & OP_REG) && op->reg == TREG_EAX)
+                op->type |= OP_EAX;
+            else if (op->type == OP_REG8 && op->reg == TREG_ECX)
+                op->type |= OP_CL;
+            else if (op->type == OP_REG16 && op->reg == TREG_EDX)
+                op->type |= OP_DX;
+        } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
+            op->type = OP_DB;
+            op->reg = tok - TOK_ASM_dr0;
+        } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
+            op->type = OP_SEG;
+            op->reg = tok - TOK_ASM_es;
+        } else if (tok == TOK_ASM_st) {
+            op->type = OP_ST;
+            op->reg = 0;
+            next();
+            if (tok == '(') {
+                next();
+                if (tok != TOK_PPNUM)
+                    goto reg_error;
+                p = tokc.cstr->data;
+                reg = p[0] - '0';
+                if ((unsigned)reg >= 8 || p[1] != '\0')
+                    goto reg_error;
+                op->reg = reg;
+                next();
+                skip(')');
+            }
+            if (op->reg == 0)
+                op->type |= OP_ST0;
+            goto no_skip;
+        } else {
+        reg_error:
+            error("unknown register");
+        }
+        next();
+    no_skip: ;
+    } else if (tok == '$') {
+        /* constant value */
+        next();
+        asm_expr(s1, &e);
+        op->type = OP_IM32;
+        op->e.v = e.v;
+        op->e.sym = e.sym;
+        if (!op->e.sym) {
+            if (op->e.v == (uint8_t)op->e.v)
+                op->type |= OP_IM8;
+            if (op->e.v == (int8_t)op->e.v)
+                op->type |= OP_IM8S;
+            if (op->e.v == (uint16_t)op->e.v)
+                op->type |= OP_IM16;
+        }
+    } else {
+        /* address(reg,reg2,shift) with all variants */
+        op->type = OP_EA;
+        op->reg = -1;
+        op->reg2 = -1;
+        op->shift = 0;
+        if (tok != '(') {
+            asm_expr(s1, &e);
+            op->e.v = e.v;
+            op->e.sym = e.sym;
+        } else {
+            op->e.v = 0;
+            op->e.sym = NULL;
+        }
+        if (tok == '(') {
+            next();
+            if (tok != ',') {
+                op->reg = asm_parse_reg();
+            }
+            if (tok == ',') {
+                next();
+                if (tok != ',') {
+                    op->reg2 = asm_parse_reg();
+                } 
+                if (tok == ',') {
+                    next();
+                    op->shift = get_reg_shift(s1);
+                }
+            }
+            skip(')');
+        }
+        if (op->reg == -1 && op->reg2 == -1)
+            op->type |= OP_ADDR;
+    }
+    op->type |= indir;
+}
+
+/* XXX: unify with C code output ? */
+static void gen_expr32(ExprValue *pe)
+{
+    if (pe->sym)
+        greloc(cur_text_section, pe->sym, ind, R_386_32);
+    gen_le32(pe->v);
+}
+
+/* XXX: unify with C code output ? */
+static void gen_disp32(ExprValue *pe)
+{
+    Sym *sym;
+    sym = pe->sym;
+    if (sym) {
+        if (sym->r == cur_text_section->sh_num) {
+            /* same section: we can output an absolute value. Note
+               that the TCC compiler behaves differently here because
+               it always outputs a relocation to ease (future) code
+               elimination in the linker */
+            gen_le32(pe->v + (long)sym->next - ind - 4);
+        } else {
+            greloc(cur_text_section, sym, ind, R_386_PC32);
+            gen_le32(pe->v - 4);
+        }
+    } else {
+        /* put an empty PC32 relocation */
+        put_elf_reloc(symtab_section, cur_text_section, 
+                      ind, R_386_PC32, 0);
+        gen_le32(pe->v - 4);
+    }
+}
+
+
+static void gen_le16(int v)
+{
+    g(v);
+    g(v >> 8);
+}
+
+/* generate the modrm operand */
+static inline void asm_modrm(int reg, Operand *op)
+{
+    int mod, reg1, reg2, sib_reg1;
+
+    if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
+        g(0xc0 + (reg << 3) + op->reg);
+    } else if (op->reg == -1 && op->reg2 == -1) {
+        /* displacement only */
+        g(0x05 + (reg << 3));
+        gen_expr32(&op->e);
+    } else {
+        sib_reg1 = op->reg;
+        /* fist compute displacement encoding */
+        if (sib_reg1 == -1) {
+            sib_reg1 = 5;
+            mod = 0x00;
+        } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
+            mod = 0x00;
+        } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
+            mod = 0x40;
+        } else {
+            mod = 0x80;
+        }
+        /* compute if sib byte needed */
+        reg1 = op->reg;
+        if (op->reg2 != -1)
+            reg1 = 4;
+        g(mod + (reg << 3) + reg1);
+        if (reg1 == 4) {
+            /* add sib byte */
+            reg2 = op->reg2;
+            if (reg2 == -1)
+                reg2 = 4; /* indicate no index */
+            g((op->shift << 6) + (reg2 << 3) + sib_reg1);
+        }
+
+        /* add offset */
+        if (mod == 0x40) {
+            g(op->e.v);
+        } else if (mod == 0x80 || op->reg == -1) {
+            gen_expr32(&op->e);
+        }
+    }
+}
+
+static void asm_opcode(TCCState *s1, int opcode)
+{
+    const ASMInstr *pa;
+    int i, modrm_index, reg, v, op1, is_short_jmp, has_seg_prefix;
+    int nb_ops, s, ss;
+    Operand ops[MAX_OPERANDS], *pop, seg_prefix;
+    int op_type[3]; /* decoded op type */
+
+    /* get operands */
+    pop = ops;
+    nb_ops = 0;
+    has_seg_prefix = 0;
+    for(;;) {
+        if (tok == ';' || tok == TOK_LINEFEED)
+            break;
+        if (nb_ops >= MAX_OPERANDS) {
+            error("incorrect number of operands");
+        }
+        parse_operand(s1, pop);
+        if (tok == ':') {
+           if (pop->type != OP_SEG || has_seg_prefix) {
+               error("incorrect prefix");
+           }
+           seg_prefix = *pop;
+           has_seg_prefix = 1;
+           next();
+           parse_operand(s1, pop);
+           if (!(pop->type & OP_EA)) {
+               error("segment prefix must be followed by memory reference");
+           }
+        }
+        pop++;
+        nb_ops++;
+        if (tok != ',')
+            break;
+        next();
+    }
+
+    is_short_jmp = 0;
+    s = 0; /* avoid warning */
+    
+    /* optimize matching by using a lookup table (no hashing is needed
+       !) */
+    for(pa = asm_instrs; pa->sym != 0; pa++) {
+        s = 0;
+        if (pa->instr_type & OPC_FARITH) {
+            v = opcode - pa->sym;
+            if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
+                continue;
+        } else if (pa->instr_type & OPC_ARITH) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + 8 * 4))
+                continue;
+            goto compute_size;
+        } else if (pa->instr_type & OPC_SHIFT) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + 7 * 4))
+                continue;
+            goto compute_size;
+        } else if (pa->instr_type & OPC_TEST) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
+                continue;
+        } else if (pa->instr_type & OPC_B) {
+            if (!(opcode >= pa->sym && opcode <= pa->sym + 3))
+                continue;
+        compute_size:
+            s = (opcode - pa->sym) & 3;
+        } else if (pa->instr_type & OPC_WL) {
+            if (!(opcode >= pa->sym && opcode <= pa->sym + 2))
+                continue;
+            s = opcode - pa->sym + 1;
+        } else {
+            if (pa->sym != opcode)
+                continue;
+        }
+        if (pa->nb_ops != nb_ops)
+            continue;
+        /* now decode and check each operand */
+        for(i = 0; i < nb_ops; i++) {
+            int op1, op2;
+            op1 = pa->op_type[i];
+            op2 = op1 & 0x1f;
+            switch(op2) {
+            case OPT_IM:
+                v = OP_IM8 | OP_IM16 | OP_IM32;
+                break;
+            case OPT_REG:
+                v = OP_REG8 | OP_REG16 | OP_REG32;
+                break;
+            case OPT_REGW:
+                v = OP_REG16 | OP_REG32;
+                break;
+            case OPT_IMW:
+                v = OP_IM16 | OP_IM32;
+                break;
+            default:
+                v = 1 << op2;
+                break;
+            }
+            if (op1 & OPT_EA)
+                v |= OP_EA;
+            op_type[i] = v;
+            if ((ops[i].type & v) == 0)
+                goto next;
+        }
+        /* all is matching ! */
+        break;
+    next: ;
+    }
+    if (pa->sym == 0) {
+        if (opcode >= TOK_ASM_pusha && opcode <= TOK_ASM_emms) {
+            int b;
+            b = op0_codes[opcode - TOK_ASM_pusha];
+            if (b & 0xff00) 
+                g(b >> 8);
+            g(b);
+            return;
+        } else {
+            error("unknown opcode '%s'", 
+                  get_tok_str(opcode, NULL));
+        }
+    }
+    /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
+    if (s == 3) {
+        for(i = 0; s == 3 && i < nb_ops; i++) {
+            if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
+                s = reg_to_size[ops[i].type & OP_REG];
+        }
+        if (s == 3) {
+            if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && 
+                (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
+                s = 2;
+            else
+                error("cannot infer opcode suffix");
+        }
+    }
+
+    /* generate data16 prefix if needed */
+    ss = s;
+    if (s == 1 || (pa->instr_type & OPC_D16))
+        g(WORD_PREFIX_OPCODE);
+    else if (s == 2)
+        s = 1;
+    /* now generates the operation */
+    if (pa->instr_type & OPC_FWAIT)
+        g(0x9b);
+    if (has_seg_prefix)
+        g(segment_prefixes[seg_prefix.reg]);
+
+    v = pa->opcode;
+    if (v == 0x69 || v == 0x69) {
+        /* kludge for imul $im, %reg */
+        nb_ops = 3;
+        ops[2] = ops[1];
+    } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
+        v--; /* int $3 case */
+        nb_ops = 0;
+    } else if ((v == 0x06 || v == 0x07)) {
+        if (ops[0].reg >= 4) {
+            /* push/pop %fs or %gs */
+            v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
+        } else {
+            v += ops[0].reg << 3;
+        }
+        nb_ops = 0;
+    } else if (v <= 0x05) {
+        /* arith case */
+        v += ((opcode - TOK_ASM_addb) >> 2) << 3;
+    } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
+        /* fpu arith case */
+        v += ((opcode - pa->sym) / 6) << 3;
+    }
+    if (pa->instr_type & OPC_REG) {
+        for(i = 0; i < nb_ops; i++) {
+            if (op_type[i] & (OP_REG | OP_ST)) {
+                v += ops[i].reg;
+                break;
+            }
+        }
+        /* mov $im, %reg case */
+        if (pa->opcode == 0xb0 && s >= 1)
+            v += 7;
+    }
+    if (pa->instr_type & OPC_B)
+        v += s;
+    if (pa->instr_type & OPC_TEST)
+        v += test_bits[opcode - pa->sym]; 
+    if (pa->instr_type & OPC_SHORTJMP) {
+        Sym *sym;
+        int jmp_disp;
+
+        /* see if we can really generate the jump with a byte offset */
+        sym = ops[0].e.sym;
+        if (!sym)
+            goto no_short_jump;
+        if (sym->r != cur_text_section->sh_num)
+            goto no_short_jump;
+        jmp_disp = ops[0].e.v + (long)sym->next - ind - 2;
+        if (jmp_disp == (int8_t)jmp_disp) {
+            /* OK to generate jump */
+            is_short_jmp = 1;
+            ops[0].e.v = jmp_disp;
+        } else {
+        no_short_jump:
+            if (pa->instr_type & OPC_JMP) {
+                /* long jump will be allowed. need to modify the
+                   opcode slightly */
+                if (v == 0xeb)
+                    v = 0xe9;
+                else 
+                    v += 0x0f10;
+            } else {
+                error("invalid displacement");
+            }
+        }
+    }
+    op1 = v >> 8;
+    if (op1)
+        g(op1);
+    g(v);
+        
+    /* search which operand will used for modrm */
+    modrm_index = 0;
+    if (pa->instr_type & OPC_SHIFT) {
+        reg = (opcode - pa->sym) >> 2; 
+        if (reg == 6)
+            reg = 7;
+    } else if (pa->instr_type & OPC_ARITH) {
+        reg = (opcode - pa->sym) >> 2;
+    } else if (pa->instr_type & OPC_FARITH) {
+        reg = (opcode - pa->sym) / 6;
+    } else {
+        reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
+    }
+    if (pa->instr_type & OPC_MODRM) {
+        /* first look for an ea operand */
+        for(i = 0;i < nb_ops; i++) {
+            if (op_type[i] & OP_EA)
+                goto modrm_found;
+        }
+        /* then if not found, a register or indirection (shift instructions) */
+        for(i = 0;i < nb_ops; i++) {
+            if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
+                goto modrm_found;
+        }
+#ifdef ASM_DEBUG
+        error("bad op table");
+#endif      
+    modrm_found:
+        modrm_index = i;
+        /* if a register is used in another operand then it is
+           used instead of group */
+        for(i = 0;i < nb_ops; i++) {
+            v = op_type[i];
+            if (i != modrm_index && 
+                (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
+                reg = ops[i].reg;
+                break;
+            }
+        }
+
+        asm_modrm(reg, &ops[modrm_index]);
+    }
+
+    /* emit constants */
+    if (pa->opcode == 0x9a || pa->opcode == 0xea) {
+        /* ljmp or lcall kludge */
+        gen_expr32(&ops[1].e);
+        if (ops[0].e.sym)
+            error("cannot relocate");
+        gen_le16(ops[0].e.v);
+    } else {
+        for(i = 0;i < nb_ops; i++) {
+            v = op_type[i];
+            if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM8S | OP_ADDR)) {
+                /* if multiple sizes are given it means we must look
+                   at the op size */
+                if (v == (OP_IM8 | OP_IM16 | OP_IM32) ||
+                    v == (OP_IM16 | OP_IM32)) {
+                    if (ss == 0)
+                        v = OP_IM8;
+                    else if (ss == 1)
+                        v = OP_IM16;
+                    else
+                        v = OP_IM32;
+                }
+                if (v & (OP_IM8 | OP_IM8S)) {
+                    if (ops[i].e.sym)
+                        goto error_relocate;
+                    g(ops[i].e.v);
+                } else if (v & OP_IM16) {
+                    if (ops[i].e.sym) {
+                    error_relocate:
+                        error("cannot relocate");
+                    }
+                    gen_le16(ops[i].e.v);
+                } else {
+                    if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
+                        if (is_short_jmp)
+                            g(ops[i].e.v);
+                        else
+                            gen_disp32(&ops[i].e);
+                    } else {
+                        gen_expr32(&ops[i].e);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#define NB_SAVED_REGS 3
+#define NB_ASM_REGS 8
+
+/* return the constraint priority (we allocate first the lowest
+   numbered constraints) */
+static inline int constraint_priority(const char *str)
+{
+    int priority, c, pr;
+
+    /* we take the lowest priority */
+    priority = 0;
+    for(;;) {
+        c = *str;
+        if (c == '\0')
+            break;
+        str++;
+        switch(c) {
+        case 'A':
+            pr = 0;
+            break;
+        case 'a':
+        case 'b':
+        case 'c':
+        case 'd':
+        case 'S':
+        case 'D':
+            pr = 1;
+            break;
+        case 'q':
+            pr = 2;
+            break;
+        case 'r':
+            pr = 3;
+            break;
+        case 'N':
+        case 'M':
+        case 'I':
+        case 'i':
+        case 'm':
+        case 'g':
+            pr = 4;
+            break;
+        default:
+            error("unknown constraint '%c'", c);
+            pr = 0;
+        }
+        if (pr > priority)
+            priority = pr;
+    }
+    return priority;
+}
+
+static const char *skip_constraint_modifiers(const char *p)
+{
+    while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
+        p++;
+    return p;
+}
+
+#define REG_OUT_MASK 0x01
+#define REG_IN_MASK  0x02
+
+#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
+
+static void asm_compute_constraints(ASMOperand *operands, 
+                                    int nb_operands, int nb_outputs, 
+                                    const uint8_t *clobber_regs,
+                                    int *pout_reg)
+{
+    ASMOperand *op;
+    int sorted_op[MAX_ASM_OPERANDS];
+    int i, j, k, p1, p2, tmp, reg, c, reg_mask;
+    const char *str;
+    uint8_t regs_allocated[NB_ASM_REGS];
+    
+    /* init fields */
+    for(i=0;i<nb_operands;i++) {
+        op = &operands[i];
+        op->input_index = -1;
+        op->ref_index = -1;
+        op->reg = -1;
+        op->is_memory = 0;
+        op->is_rw = 0;
+    }
+    /* compute constraint priority and evaluate references to output
+       constraints if input constraints */
+    for(i=0;i<nb_operands;i++) {
+        op = &operands[i];
+        str = op->constraint;
+        str = skip_constraint_modifiers(str);
+        if (isnum(*str) || *str == '[') {
+            /* this is a reference to another constraint */
+            k = find_constraint(operands, nb_operands, str, NULL);
+            if ((unsigned)k >= i || i < nb_outputs)
+                error("invalid reference in constraint %d ('%s')",
+                      i, str);
+            op->ref_index = k;
+            if (operands[k].input_index >= 0)
+                error("cannot reference twice the same operand");
+            operands[k].input_index = i;
+            op->priority = 5;
+        } else {
+            op->priority = constraint_priority(str);
+        }
+    }
+    
+    /* sort operands according to their priority */
+    for(i=0;i<nb_operands;i++)
+        sorted_op[i] = i;
+    for(i=0;i<nb_operands - 1;i++) {
+        for(j=i+1;j<nb_operands;j++) {
+            p1 = operands[sorted_op[i]].priority; 
+            p2 = operands[sorted_op[j]].priority;
+            if (p2 < p1) {
+                tmp = sorted_op[i];
+                sorted_op[i] = sorted_op[j];
+                sorted_op[j] = tmp;
+            }
+        }
+    }
+
+    for(i = 0;i < NB_ASM_REGS; i++) {
+        if (clobber_regs[i])
+            regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
+        else
+            regs_allocated[i] = 0;
+    }
+    /* esp cannot be used */
+    regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK; 
+    /* ebp cannot be used yet */
+    regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK; 
+
+    /* allocate registers and generate corresponding asm moves */
+    for(i=0;i<nb_operands;i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        str = op->constraint;
+        /* no need to allocate references */
+        if (op->ref_index >= 0)
+            continue;
+        /* select if register is used for output, input or both */
+        if (op->input_index >= 0) {
+            reg_mask = REG_IN_MASK | REG_OUT_MASK;
+        } else if (j < nb_outputs) {
+            reg_mask = REG_OUT_MASK;
+        } else {
+            reg_mask = REG_IN_MASK;
+        }
+    try_next:
+        c = *str++;
+        switch(c) {
+        case '=':
+            goto try_next;
+        case '+':
+            op->is_rw = 1;
+            /* FALL THRU */
+        case '&':
+            if (j >= nb_outputs)
+                error("'%c' modifier can only be applied to outputs", c);
+            reg_mask = REG_IN_MASK | REG_OUT_MASK;
+            goto try_next;
+        case 'A':
+            /* allocate both eax and edx */
+            if (is_reg_allocated(TREG_EAX) || 
+                is_reg_allocated(TREG_EDX))
+                goto try_next;
+            op->is_llong = 1;
+            op->reg = TREG_EAX;
+            regs_allocated[TREG_EAX] |= reg_mask;
+            regs_allocated[TREG_EDX] |= reg_mask;
+            break;
+        case 'a':
+            reg = TREG_EAX;
+            goto alloc_reg;
+        case 'b':
+            reg = 3;
+            goto alloc_reg;
+        case 'c':
+            reg = TREG_ECX;
+            goto alloc_reg;
+        case 'd':
+            reg = TREG_EDX;
+            goto alloc_reg;
+        case 'S':
+            reg = 6;
+            goto alloc_reg;
+        case 'D':
+            reg = 7;
+        alloc_reg:
+            if (is_reg_allocated(reg))
+                goto try_next;
+            goto reg_found;
+        case 'q':
+            /* eax, ebx, ecx or edx */
+            for(reg = 0; reg < 4; reg++) {
+                if (!is_reg_allocated(reg))
+                    goto reg_found;
+            }
+            goto try_next;
+        case 'r':
+            /* any general register */
+            for(reg = 0; reg < 8; reg++) {
+                if (!is_reg_allocated(reg))
+                    goto reg_found;
+            }
+            goto try_next;
+        reg_found:
+            /* now we can reload in the register */
+            op->is_llong = 0;
+            op->reg = reg;
+            regs_allocated[reg] |= reg_mask;
+            break;
+        case 'i':
+            if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
+                goto try_next;
+            break;
+        case 'I':
+        case 'N':
+        case 'M':
+            if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
+                goto try_next;
+            break;
+        case 'm':
+        case 'g':
+            /* nothing special to do because the operand is already in
+               memory, except if the pointer itself is stored in a
+               memory variable (VT_LLOCAL case) */
+            /* XXX: fix constant case */
+            /* if it is a reference to a memory zone, it must lie
+               in a register, so we reserve the register in the
+               input registers and a load will be generated
+               later */
+            if (j < nb_outputs || c == 'm') {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
+                    /* any general register */
+                    for(reg = 0; reg < 8; reg++) {
+                        if (!(regs_allocated[reg] & REG_IN_MASK))
+                            goto reg_found1;
+                    }
+                    goto try_next;
+                reg_found1:
+                    /* now we can reload in the register */
+                    regs_allocated[reg] |= REG_IN_MASK;
+                    op->reg = reg;
+                    op->is_memory = 1;
+                }
+            }
+            break;
+        default:
+            error("asm constraint %d ('%s') could not be satisfied", 
+                  j, op->constraint);
+            break;
+        }
+        /* if a reference is present for that operand, we assign it too */
+        if (op->input_index >= 0) {
+            operands[op->input_index].reg = op->reg;
+            operands[op->input_index].is_llong = op->is_llong;
+        }
+    }
+    
+    /* compute out_reg. It is used to store outputs registers to memory
+       locations references by pointers (VT_LLOCAL case) */
+    *pout_reg = -1;
+    for(i=0;i<nb_operands;i++) {
+        op = &operands[i];
+        if (op->reg >= 0 && 
+            (op->vt->r & VT_VALMASK) == VT_LLOCAL  &&
+            !op->is_memory) {
+            for(reg = 0; reg < 8; reg++) {
+                if (!(regs_allocated[reg] & REG_OUT_MASK))
+                    goto reg_found2;
+            }
+            error("could not find free output register for reloading");
+        reg_found2:
+            *pout_reg = reg;
+            break;
+        }
+    }
+    
+    /* print sorted constraints */
+#ifdef ASM_DEBUG
+    for(i=0;i<nb_operands;i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", 
+               j,                
+               op->id ? get_tok_str(op->id, NULL) : "", 
+               op->constraint,
+               op->vt->r,
+               op->reg);
+    }
+    if (*pout_reg >= 0)
+        printf("out_reg=%d\n", *pout_reg);
+#endif
+}
+
+static void subst_asm_operand(CString *add_str, 
+                              SValue *sv, int modifier)
+{
+    int r, reg, size, val;
+    char buf[64];
+
+    r = sv->r;
+    if ((r & VT_VALMASK) == VT_CONST) {
+        if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
+            cstr_ccat(add_str, '$');
+        if (r & VT_SYM) {
+            cstr_cat(add_str, get_tok_str(sv->sym->v, NULL));
+            if (sv->c.i != 0) {
+                cstr_ccat(add_str, '+');
+            } else {
+                return;
+            }
+        }
+        val = sv->c.i;
+        if (modifier == 'n')
+            val = -val;
+        snprintf(buf, sizeof(buf), "%d", sv->c.i);
+        cstr_cat(add_str, buf);
+    } else if ((r & VT_VALMASK) == VT_LOCAL) {
+        snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i);
+        cstr_cat(add_str, buf);
+    } else if (r & VT_LVAL) {
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            error("internal compiler error");
+        snprintf(buf, sizeof(buf), "(%%%s)", 
+                 get_tok_str(TOK_ASM_eax + reg, NULL));
+        cstr_cat(add_str, buf);
+    } else {
+        /* register case */
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            error("internal compiler error");
+
+        /* choose register operand size */
+        if ((sv->type.t & VT_BTYPE) == VT_BYTE)
+            size = 1;
+        else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
+            size = 2;
+        else
+            size = 4;
+        if (size == 1 && reg >= 4)
+            size = 4;
+
+        if (modifier == 'b') {
+            if (reg >= 4)
+                error("cannot use byte register");
+            size = 1;
+        } else if (modifier == 'h') {
+            if (reg >= 4)
+                error("cannot use byte register");
+            size = -1;
+        } else if (modifier == 'w') {
+            size = 2;
+        }
+
+        switch(size) {
+        case -1:
+            reg = TOK_ASM_ah + reg;
+            break;
+        case 1:
+            reg = TOK_ASM_al + reg;
+            break;
+        case 2:
+            reg = TOK_ASM_ax + reg;
+            break;
+        default:
+            reg = TOK_ASM_eax + reg;
+            break;
+        }
+        snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
+        cstr_cat(add_str, buf);
+    }
+}
+
+/* generate prolog and epilog code for asm statment */
+static void asm_gen_code(ASMOperand *operands, int nb_operands, 
+                         int nb_outputs, int is_output,
+                         uint8_t *clobber_regs,
+                         int out_reg)
+{
+    uint8_t regs_allocated[NB_ASM_REGS];
+    ASMOperand *op;
+    int i, reg;
+    static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
+
+    /* mark all used registers */
+    memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
+    for(i = 0; i < nb_operands;i++) {
+        op = &operands[i];
+        if (op->reg >= 0)
+            regs_allocated[op->reg] = 1;
+    }
+    if (!is_output) {
+        /* generate reg save code */
+        for(i = 0; i < NB_SAVED_REGS; i++) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) 
+                g(0x50 + reg);
+        }
+
+        /* generate load code */
+        for(i = 0; i < nb_operands; i++) {
+            op = &operands[i];
+            if (op->reg >= 0) {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
+                    op->is_memory) {
+                    /* memory reference case (for both input and
+                       output cases) */
+                    SValue sv;
+                    sv = *op->vt;
+                    sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
+                    load(op->reg, &sv);
+                } else if (i >= nb_outputs || op->is_rw) {
+                    /* load value in register */
+                    load(op->reg, op->vt);
+                    if (op->is_llong) {
+                        SValue sv;
+                        sv = *op->vt;
+                        sv.c.ul += 4;
+                        load(TREG_EDX, &sv);
+                    }
+                }
+            }
+        }
+    } else {
+        /* generate save code */
+        for(i = 0 ; i < nb_outputs; i++) {
+            op = &operands[i];
+            if (op->reg >= 0) {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
+                    if (!op->is_memory) {
+                        SValue sv;
+                        sv = *op->vt;
+                        sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
+                        load(out_reg, &sv);
+
+                        sv.r = (sv.r & ~VT_VALMASK) | out_reg;
+                        store(op->reg, &sv);
+                    }
+                } else {
+                    store(op->reg, op->vt);
+                    if (op->is_llong) {
+                        SValue sv;
+                        sv = *op->vt;
+                        sv.c.ul += 4;
+                        store(TREG_EDX, &sv);
+                    }
+                }
+            }
+        }
+        /* generate reg restore code */
+        for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) 
+                g(0x58 + reg);
+        }
+    }
+}
+
+static void asm_clobber(uint8_t *clobber_regs, const char *str)
+{
+    int reg;
+    TokenSym *ts;
+
+    if (!strcmp(str, "memory") || 
+        !strcmp(str, "cc"))
+        return;
+    ts = tok_alloc(str, strlen(str));
+    reg = ts->tok;
+    if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
+        reg -= TOK_ASM_eax;
+    } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
+        reg -= TOK_ASM_ax;
+    } else {
+        error("invalid clobber register '%s'", str);
+    }
+    clobber_regs[reg] = 1;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i386/i386-asm.h	Sat May 12 00:15:39 2007 -0400
@@ -0,0 +1,446 @@
+     DEF_ASM_OP0(pusha, 0x60) /* must be first OP0 */
+     DEF_ASM_OP0(popa, 0x61)
+     DEF_ASM_OP0(clc, 0xf8)
+     DEF_ASM_OP0(cld, 0xfc)
+     DEF_ASM_OP0(cli, 0xfa)
+     DEF_ASM_OP0(clts, 0x0f06)
+     DEF_ASM_OP0(cmc, 0xf5)
+     DEF_ASM_OP0(lahf, 0x9f)
+     DEF_ASM_OP0(sahf, 0x9e)
+     DEF_ASM_OP0(pushfl, 0x9c)
+     DEF_ASM_OP0(popfl, 0x9d)
+     DEF_ASM_OP0(pushf, 0x9c)
+     DEF_ASM_OP0(popf, 0x9d)
+     DEF_ASM_OP0(stc, 0xf9)
+     DEF_ASM_OP0(std, 0xfd)
+     DEF_ASM_OP0(sti, 0xfb)
+     DEF_ASM_OP0(aaa, 0x37)
+     DEF_ASM_OP0(aas, 0x3f)
+     DEF_ASM_OP0(daa, 0x27)
+     DEF_ASM_OP0(das, 0x2f)
+     DEF_ASM_OP0(aad, 0xd50a)
+     DEF_ASM_OP0(aam, 0xd40a)
+     DEF_ASM_OP0(cbw, 0x6698)
+     DEF_ASM_OP0(cwd, 0x6699)
+     DEF_ASM_OP0(cwde, 0x98)
+     DEF_ASM_OP0(cdq, 0x99)
+     DEF_ASM_OP0(cbtw, 0x6698)
+     DEF_ASM_OP0(cwtl, 0x98)
+     DEF_ASM_OP0(cwtd, 0x6699)
+     DEF_ASM_OP0(cltd, 0x99)
+     DEF_ASM_OP0(int3, 0xcc)
+     DEF_ASM_OP0(into, 0xce)
+     DEF_ASM_OP0(iret, 0xcf)
+     DEF_ASM_OP0(rsm, 0x0faa)
+     DEF_ASM_OP0(hlt, 0xf4)
+     DEF_ASM_OP0(wait, 0x9b)
+     DEF_ASM_OP0(nop, 0x90)
+     DEF_ASM_OP0(xlat, 0xd7)
+
+     /* strings */
+ALT(DEF_ASM_OP0L(cmpsb, 0xa6, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(scmpb, 0xa6, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(insb, 0x6c, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(outsb, 0x6e, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(lodsb, 0xac, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(slodb, 0xac, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(movsb, 0xa4, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(smovb, 0xa4, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(scasb, 0xae, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(sscab, 0xae, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(stosb, 0xaa, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(sstob, 0xaa, 0, OPC_BWL))
+
+     /* bits */
+     
+ALT(DEF_ASM_OP2(bsfw, 0x0fbc, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(bsrw, 0x0fbd, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
+
+ALT(DEF_ASM_OP2(btw, 0x0fa3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btw, 0x0fba, 4, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btsw, 0x0fab, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btsw, 0x0fba, 5, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btrw, 0x0fb3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+     /* prefixes */
+     DEF_ASM_OP0(aword, 0x67)
+     DEF_ASM_OP0(addr16, 0x67)
+     DEF_ASM_OP0(word, 0x66)
+     DEF_ASM_OP0(data16, 0x66)
+     DEF_ASM_OP0(lock, 0xf0)
+     DEF_ASM_OP0(rep, 0xf3)
+     DEF_ASM_OP0(repe, 0xf3)
+     DEF_ASM_OP0(repz, 0xf3)
+     DEF_ASM_OP0(repne, 0xf2)
+     DEF_ASM_OP0(repnz, 0xf2)
+             
+     DEF_ASM_OP0(invd, 0x0f08)
+     DEF_ASM_OP0(wbinvd, 0x0f09)
+     DEF_ASM_OP0(cpuid, 0x0fa2)
+     DEF_ASM_OP0(wrmsr, 0x0f30)
+     DEF_ASM_OP0(rdtsc, 0x0f31)
+     DEF_ASM_OP0(rdmsr, 0x0f32)
+     DEF_ASM_OP0(rdpmc, 0x0f33)
+     DEF_ASM_OP0(ud2, 0x0f0b)
+
+     /* NOTE: we took the same order as gas opcode definition order */
+ALT(DEF_ASM_OP2(movb, 0xa0, 0, OPC_BWL, OPT_ADDR, OPT_EAX))
+ALT(DEF_ASM_OP2(movb, 0xa2, 0, OPC_BWL, OPT_EAX, OPT_ADDR))
+ALT(DEF_ASM_OP2(movb, 0x88, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWL, OPT_IM, OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WL, OPT_SEG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(movw, 0x8e, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_SEG))
+
+ALT(DEF_ASM_OP2(movw, 0x0f20, 0, OPC_MODRM | OPC_WL, OPT_CR, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f21, 0, OPC_MODRM | OPC_WL, OPT_DB, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f24, 0, OPC_MODRM | OPC_WL, OPT_TR, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f22, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_CR))
+ALT(DEF_ASM_OP2(movw, 0x0f23, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_DB))
+ALT(DEF_ASM_OP2(movw, 0x0f26, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_TR))
+
+ALT(DEF_ASM_OP2(movsbl, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(movsbw, 0x0fbe, 0, OPC_MODRM | OPC_D16, OPT_REG8 | OPT_EA, OPT_REG16))
+ALT(DEF_ASM_OP2(movswl, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(movzbw, 0x0fb6, 0, OPC_MODRM | OPC_WL, OPT_REG8 | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(movzwl, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
+
+ALT(DEF_ASM_OP1(pushw, 0x50, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(pushw, 0xff, 6, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP1(pushw, 0x6a, 0, OPC_WL, OPT_IM8S))
+ALT(DEF_ASM_OP1(pushw, 0x68, 0, OPC_WL, OPT_IM32))
+ALT(DEF_ASM_OP1(pushw, 0x06, 0, OPC_WL, OPT_SEG))
+
+ALT(DEF_ASM_OP1(popw, 0x58, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(popw, 0x8f, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP1(popw, 0x07, 0, OPC_WL, OPT_SEG))
+
+ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_REG, OPT_EAX))
+ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_EAX, OPT_REG))
+ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+
+ALT(DEF_ASM_OP2(inb, 0xe4, 0, OPC_BWL, OPT_IM8, OPT_EAX))
+ALT(DEF_ASM_OP1(inb, 0xe4, 0, OPC_BWL, OPT_IM8))
+ALT(DEF_ASM_OP2(inb, 0xec, 0, OPC_BWL, OPT_DX, OPT_EAX))
+ALT(DEF_ASM_OP1(inb, 0xec, 0, OPC_BWL, OPT_DX))
+
+ALT(DEF_ASM_OP2(outb, 0xe6, 0, OPC_BWL, OPT_EAX, OPT_IM8))
+ALT(DEF_ASM_OP1(outb, 0xe6, 0, OPC_BWL, OPT_IM8))
+ALT(DEF_ASM_OP2(outb, 0xee, 0, OPC_BWL, OPT_EAX, OPT_DX))
+ALT(DEF_ASM_OP1(outb, 0xee, 0, OPC_BWL, OPT_DX))
+
+ALT(DEF_ASM_OP2(leaw, 0x8d, 0, OPC_MODRM | OPC_WL, OPT_EA, OPT_REG))
+
+ALT(DEF_ASM_OP2(les, 0xc4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lds, 0xc5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lss, 0x0fb2, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lfs, 0x0fb4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+
+     /* arith */
+ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */
+ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWL, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WL, OPT_IM8S, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWL, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP1(incw, 0x40, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(incb, 0xfe, 0, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(decw, 0x48, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(decb, 0xfe, 1, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP1(notb, 0xf6, 2, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(negb, 0xf6, 3, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP1(mulb, 0xf6, 4, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(imulb, 0xf6, 5, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP2(imulw, 0x0faf, 0, OPC_MODRM | OPC_WL, OPT_REG | OPT_EA, OPT_REG))
+ALT(DEF_ASM_OP3(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW))
+ALT(DEF_ASM_OP3(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW))
+
+ALT(DEF_ASM_OP1(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP2(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
+ALT(DEF_ASM_OP1(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP2(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
+
+     /* shifts */
+ALT(DEF_ASM_OP2(rolb, 0xc0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_IM8, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(rolb, 0xd2, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_CL, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP1(rolb, 0xd0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP3(shldw, 0x0fa4, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP2(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shrdw, 0x0fac, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP2(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
+
+ALT(DEF_ASM_OP1(call, 0xff, 2, OPC_MODRM, OPT_INDIR))
+ALT(DEF_ASM_OP1(call, 0xe8, 0, OPC_JMP, OPT_ADDR))
+ALT(DEF_ASM_OP1(jmp, 0xff, 4, OPC_MODRM, OPT_INDIR))
+ALT(DEF_ASM_OP1(jmp, 0xeb, 0, OPC_SHORTJMP | OPC_JMP, OPT_ADDR))
+
+ALT(DEF_ASM_OP2(lcall, 0x9a, 0, 0, OPT_IM16, OPT_IM32))
+ALT(DEF_ASM_OP1(lcall, 0xff, 3, 0, OPT_EA))
+ALT(DEF_ASM_OP2(ljmp, 0xea, 0, 0, OPT_IM16, OPT_IM32))
+ALT(DEF_ASM_OP1(ljmp, 0xff, 5, 0, OPT_EA))
+
+ALT(DEF_ASM_OP1(int, 0xcd, 0, 0, OPT_IM8))
+ALT(DEF_ASM_OP1(seto, 0x0f90, 0, OPC_MODRM | OPC_TEST, OPT_REG8 | OPT_EA))
+    DEF_ASM_OP2(enter, 0xc8, 0, 0, OPT_IM16, OPT_IM8)
+    DEF_ASM_OP0(leave, 0xc9)
+    DEF_ASM_OP0(ret, 0xc3)
+ALT(DEF_ASM_OP1(ret, 0xc2, 0, 0, OPT_IM16))
+    DEF_ASM_OP0(lret, 0xcb)
+ALT(DEF_ASM_OP1(lret, 0xca, 0, 0, OPT_IM16))
+
+ALT(DEF_ASM_OP1(jo, 0x70, 0, OPC_SHORTJMP | OPC_JMP | OPC_TEST, OPT_ADDR))
+    DEF_ASM_OP1(loopne, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loopnz, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loope, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loopz, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loop, 0xe2, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(jecxz, 0xe3, 0, OPC_SHORTJMP, OPT_ADDR)
+     
+     /* float */
+     /* specific fcomp handling */
+ALT(DEF_ASM_OP0L(fcomp, 0xd8d9, 0, 0))
+
+ALT(DEF_ASM_OP1(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST))
+ALT(DEF_ASM_OP2(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
+ALT(DEF_ASM_OP0L(fadd, 0xdec1, 0, OPC_FARITH))
+ALT(DEF_ASM_OP1(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST))
+ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
+ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST0, OPT_ST))
+ALT(DEF_ASM_OP0L(faddp, 0xdec1, 0, OPC_FARITH))
+ALT(DEF_ASM_OP1(fadds, 0xd8, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(fiaddl, 0xda, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(faddl, 0xdc, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(fiadds, 0xde, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+
+     DEF_ASM_OP0(fucompp, 0xdae9)
+     DEF_ASM_OP0(ftst, 0xd9e4)
+     DEF_ASM_OP0(fxam, 0xd9e5)
+     DEF_ASM_OP0(fld1, 0xd9e8)
+     DEF_ASM_OP0(fldl2t, 0xd9e9)
+     DEF_ASM_OP0(fldl2e, 0xd9ea)
+     DEF_ASM_OP0(fldpi, 0xd9eb)
+     DEF_ASM_OP0(fldlg2, 0xd9ec)
+     DEF_ASM_OP0(fldln2, 0xd9ed)
+     DEF_ASM_OP0(fldz, 0xd9ee)
+
+     DEF_ASM_OP0(f2xm1, 0xd9f0)
+     DEF_ASM_OP0(fyl2x, 0xd9f1)
+     DEF_ASM_OP0(fptan, 0xd9f2)
+     DEF_ASM_OP0(fpatan, 0xd9f3)
+     DEF_ASM_OP0(fxtract, 0xd9f4)
+     DEF_ASM_OP0(fprem1, 0xd9f5)
+     DEF_ASM_OP0(fdecstp, 0xd9f6)
+     DEF_ASM_OP0(fincstp, 0xd9f7)
+     DEF_ASM_OP0(fprem, 0xd9f8)
+     DEF_ASM_OP0(fyl2xp1, 0xd9f9)
+     DEF_ASM_OP0(fsqrt, 0xd9fa)
+     DEF_ASM_OP0(fsincos, 0xd9fb)
+     DEF_ASM_OP0(frndint, 0xd9fc)
+     DEF_ASM_OP0(fscale, 0xd9fd)
+     DEF_ASM_OP0(fsin, 0xd9fe)
+     DEF_ASM_OP0(fcos, 0xd9ff)
+     DEF_ASM_OP0(fchs, 0xd9e0)
+     DEF_ASM_OP0(fabs, 0xd9e1)
+     DEF_ASM_OP0(fninit, 0xdbe3)
+     DEF_ASM_OP0(fnclex, 0xdbe2)
+     DEF_ASM_OP0(fnop, 0xd9d0)
+     DEF_ASM_OP0(fwait, 0x9b)
+
+    /* fp load */
+    DEF_ASM_OP1(fld, 0xd9c0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fldl, 0xd9c0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(flds, 0xd9, 0, OPC_MODRM, OPT_EA)
+ALT(DEF_ASM_OP1(fldl, 0xdd, 0, OPC_MODRM, OPT_EA))
+    DEF_ASM_OP1(fildl, 0xdb, 0, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fildq, 0xdf, 5, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fildll, 0xdf, 5, OPC_MODRM,OPT_EA)
+    DEF_ASM_OP1(fldt, 0xdb, 5, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fbld, 0xdf, 4, OPC_MODRM, OPT_EA)
+    
+    /* fp store */
+    DEF_ASM_OP1(fst, 0xddd0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fstl, 0xddd0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fsts, 0xd9, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fstps, 0xd9, 3, OPC_MODRM, OPT_EA)
+ALT(DEF_ASM_OP1(fstl, 0xdd, 2, OPC_MODRM, OPT_EA))
+    DEF_ASM_OP1(fstpl, 0xdd, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fist, 0xdf, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistp, 0xdf, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistl, 0xdb, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistpl, 0xdb, 3, OPC_MODRM, OPT_EA)
+
+    DEF_ASM_OP1(fstp, 0xddd8, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fistpq, 0xdf, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistpll, 0xdf, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fstpt, 0xdb, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fbstp, 0xdf, 6, OPC_MODRM, OPT_EA)
+
+    /* exchange */
+    DEF_ASM_OP0(fxch, 0xd9c9)
+ALT(DEF_ASM_OP1(fxch, 0xd9c8, 0, OPC_REG, OPT_ST))
+
+    /* misc FPU */
+    DEF_ASM_OP1(fucom, 0xdde0, 0, OPC_REG, OPT_ST )
+    DEF_ASM_OP1(fucomp, 0xdde8, 0, OPC_REG, OPT_ST )
+
+    DEF_ASM_OP0L(finit, 0xdbe3, 0, OPC_FWAIT)
+    DEF_ASM_OP1(fldcw, 0xd9, 5, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fnstcw, 0xd9, 7, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fstcw, 0xd9, 7, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP0(fnstsw, 0xdfe0)
+ALT(DEF_ASM_OP1(fnstsw, 0xdfe0, 0, 0, OPT_EAX ))
+ALT(DEF_ASM_OP1(fnstsw, 0xdd, 7, OPC_MODRM, OPT_EA ))
+    DEF_ASM_OP1(fstsw, 0xdfe0, 0, OPC_FWAIT, OPT_EAX )
+ALT(DEF_ASM_OP0L(fstsw, 0xdfe0, 0, OPC_FWAIT))
+ALT(DEF_ASM_OP1(fstsw, 0xdd, 7, OPC_MODRM | OPC_FWAIT, OPT_EA ))
+    DEF_ASM_OP0L(fclex, 0xdbe2, 0, OPC_FWAIT)
+    DEF_ASM_OP1(fnstenv, 0xd9, 6, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fstenv, 0xd9, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP1(fldenv, 0xd9, 4, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fnsave, 0xdd, 6, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fsave, 0xdd, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP1(frstor, 0xdd, 4, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(ffree, 0xddc0, 4, OPC_REG, OPT_ST )
+    DEF_ASM_OP1(ffreep, 0xdfc0, 4, OPC_REG, OPT_ST )
+    DEF_ASM_OP1(fxsave, 0x0fae, 0, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fxrstor, 0x0fae, 1, OPC_MODRM, OPT_EA )
+
+    /* segments */
+    DEF_ASM_OP2(arpl, 0x63, 0, OPC_MODRM, OPT_REG16, OPT_REG16 | OPT_EA)
+    DEF_ASM_OP2(lar, 0x0f02, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG32)
+    DEF_ASM_OP1(lgdt, 0x0f01, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(lidt, 0x0f01, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(lldt, 0x0f00, 2, OPC_MODRM, OPT_EA | OPT_REG)
+    DEF_ASM_OP1(lmsw, 0x0f01, 6, OPC_MODRM, OPT_EA | OPT_REG)
+ALT(DEF_ASM_OP2(lslw, 0x0f03, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_REG))
+    DEF_ASM_OP1(ltr, 0x0f00, 3, OPC_MODRM, OPT_EA | OPT_REG)
+    DEF_ASM_OP1(sgdt, 0x0f01, 0, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(sidt, 0x0f01, 1, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(sldt, 0x0f00, 0, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(smsw, 0x0f01, 4, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM, OPT_REG16| OPT_EA)
+    DEF_ASM_OP1(verr, 0x0f00, 4, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(verw, 0x0f00, 5, OPC_MODRM, OPT_REG | OPT_EA)
+
+    /* 486 */
+    DEF_ASM_OP1(bswap, 0x0fc8, 0, OPC_REG, OPT_REG32 )
+ALT(DEF_ASM_OP2(xaddb, 0x0fc0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
+ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
+    DEF_ASM_OP1(invlpg, 0x0f01, 7, OPC_MODRM, OPT_EA )
+
+    DEF_ASM_OP2(boundl, 0x62, 0, OPC_MODRM, OPT_REG32, OPT_EA)
+    DEF_ASM_OP2(boundw, 0x62, 0, OPC_MODRM | OPC_D16, OPT_REG16, OPT_EA)
+
+    /* pentium */
+    DEF_ASM_OP1(cmpxchg8b, 0x0fc7, 1, OPC_MODRM, OPT_EA )
+    
+    /* pentium pro */
+    ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST, OPT_REG32 | OPT_EA, OPT_REG32))
+
+    DEF_ASM_OP2(fcmovb, 0xdac0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmove, 0xdac8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovbe, 0xdad0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovu, 0xdad8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnb, 0xdbc0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovne, 0xdbc8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnbe, 0xdbd0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnu, 0xdbd8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+
+    DEF_ASM_OP2(fucomi, 0xdbe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcomi, 0xdbf0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fucomip, 0xdfe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcomip, 0xdff0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+
+    /* mmx */
+    DEF_ASM_OP0(emms, 0x0f77) /* must be last OP0 */
+    DEF_ASM_OP2(movd, 0x0f6e, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_MMX )
+ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_REG32 ))
+    DEF_ASM_OP2(movq, 0x0f6f, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(movq, 0x0f7f, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_MMX ))
+    DEF_ASM_OP2(packssdw, 0x0f6b, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(packsswb, 0x0f63, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(packuswb, 0x0f67, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddb, 0x0ffc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddw, 0x0ffd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddd, 0x0ffe, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddsb, 0x0fec, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddsw, 0x0fed, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddusb, 0x0fdc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddusw, 0x0fdd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pand, 0x0fdb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pandn, 0x0fdf, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqb, 0x0f74, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqw, 0x0f75, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqd, 0x0f76, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtb, 0x0f64, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtw, 0x0f65, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtd, 0x0f66, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmaddwd, 0x0ff5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmulhw, 0x0fe5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmullw, 0x0fd5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(por, 0x0feb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psllw, 0x0ff1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psllw, 0x0f71, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(pslld, 0x0ff2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(pslld, 0x0f72, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psllq, 0x0ff3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psllq, 0x0f73, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psraw, 0x0fe1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psraw, 0x0f71, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrad, 0x0fe2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrad, 0x0f72, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrlw, 0x0fd1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrlw, 0x0f71, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrld, 0x0fd2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrld, 0x0f72, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrlq, 0x0fd3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psubb, 0x0ff8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubw, 0x0ff9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubd, 0x0ffa, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubsb, 0x0fe8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubsw, 0x0fe9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubusb, 0x0fd8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubusw, 0x0fd9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhbw, 0x0f68, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhwd, 0x0f69, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhdq, 0x0f6a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+
+#undef ALT
+#undef DEF_ASM_OP0
+#undef DEF_ASM_OP0L
+#undef DEF_ASM_OP1
+#undef DEF_ASM_OP2
+#undef DEF_ASM_OP3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i386/i386-gen.c	Sat May 12 00:15:39 2007 -0400
@@ -0,0 +1,1033 @@
+/*
+ *  X86 code generator for TCC
+ * 
+ *  Copyright (c) 2001-2004 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* number of available registers */
+#define NB_REGS             4
+
+/* a register can belong to several classes. The classes must be
+   sorted from more general to more precise (see gv2() code which does
+   assumptions on it). */
+#define RC_INT     0x0001 /* generic integer register */
+#define RC_FLOAT   0x0002 /* generic float register */
+#define RC_EAX     0x0004
+#define RC_ST0     0x0008 
+#define RC_ECX     0x0010
+#define RC_EDX     0x0020
+#define RC_IRET    RC_EAX /* function return: integer register */
+#define RC_LRET    RC_EDX /* function return: second integer register */
+#define RC_FRET    RC_ST0 /* function return: float register */
+
+/* pretty names for the registers */
+enum {
+    TREG_EAX = 0,
+    TREG_ECX,
+    TREG_EDX,
+    TREG_ST0,
+};
+
+int reg_classes[NB_REGS] = {
+    /* eax */ RC_INT | RC_EAX,
+    /* ecx */ RC_INT | RC_ECX,
+    /* edx */ RC_INT | RC_EDX,
+    /* st0 */ RC_FLOAT | RC_ST0,
+};
+
+/* return registers for function */
+#define REG_IRET TREG_EAX /* single word int return register */
+#define REG_LRET TREG_EDX /* second word return register (for long long) */
+#define REG_FRET TREG_ST0 /* float return register */
+
+/* defined if function parameters must be evaluated in reverse order */
+#define INVERT_FUNC_PARAMS
+
+/* defined if structures are passed as pointers. Otherwise structures
+   are directly pushed on stack. */
+//#define FUNC_STRUCT_PARAM_AS_PTR
+
+/* pointer size, in bytes */
+#define PTR_SIZE 4
+
+/* long double size and alignment, in bytes */
+#define LDOUBLE_SIZE  12
+#define LDOUBLE_ALIGN 4
+/* maximum alignment (for aligned attribute support) */
+#define MAX_ALIGN     8
+
+/******************************************************/
+/* ELF defines */
+
+#define EM_TCC_TARGET EM_386
+
+/* relocation type for 32 bit data relocation */
+#define R_DATA_32   R_386_32
+#define R_JMP_SLOT  R_386_JMP_SLOT
+#define R_COPY      R_386_COPY
+
+#define ELF_START_ADDR 0x08048000
+#define ELF_PAGE_SIZE  0x1000
+
+/******************************************************/
+
+static unsigned long func_sub_sp_offset;
+static unsigned long func_bound_offset;
+static int func_ret_sub;
+
+/* XXX: make it faster ? */
+void g(int c)
+{
+    int ind1;
+    ind1 = ind + 1;
+    if (ind1 > cur_text_section->data_allocated)
+        section_realloc(cur_text_section, ind1);
+    cur_text_section->data[ind] = c;
+    ind = ind1;
+}
+
+void o(unsigned int c)
+{
+    while (c) {
+        g(c);
+        c = c >> 8;
+    }
+}
+
+void gen_le32(int c)
+{
+    g(c);
+    g(c >> 8);
+    g(c >> 16);
+    g(c >> 24);
+}
+
+/* output a symbol and patch all calls to it */
+void gsym_addr(int t, int a)
+{
+    int n, *ptr;
+    while (t) {
+        ptr = (int *)(cur_text_section->data + t);
+        n = *ptr; /* next value */
+        *ptr = a - t - 4;
+        t = n;
+    }
+}
+
+void gsym(int t)
+{
+    gsym_addr(t, ind);
+}
+
+/* psym is used to put an instruction with a data field which is a
+   reference to a symbol. It is in fact the same as oad ! */
+#define psym oad
+
+/* instruction + 4 bytes data. Return the address of the data */
+static int oad(int c, int s)
+{
+    int ind1;
+
+    o(c);
+    ind1 = ind + 4;
+    if (ind1 > cur_text_section->data_allocated)
+        section_realloc(cur_text_section, ind1);
+    *(int *)(cur_text_section->data + ind) = s;
+    s = ind;
+    ind = ind1;
+    return s;
+}
+
+/* output constant with relocation if 'r & VT_SYM' is true */
+static void gen_addr32(int r, Sym *sym, int c)
+{
+    if (r & VT_SYM)
+        greloc(cur_text_section, sym, ind, R_386_32);
+    gen_le32(c);
+}
+
+/* generate a modrm reference. 'op_reg' contains the addtionnal 3
+   opcode bits */
+static void gen_modrm(int op_reg, int r, Sym *sym, int c)
+{
+    op_reg = op_reg << 3;
+    if ((r & VT_VALMASK) == VT_CONST) {
+        /* constant memory reference */
+        o(0x05 | op_reg);
+        gen_addr32(r, sym, c);
+    } else if ((r & VT_VALMASK) == VT_LOCAL) {
+        /* currently, we use only ebp as base */
+        if (c == (char)c) {
+            /* short reference */
+            o(0x45 | op_reg);
+            g(c);
+        } else {
+            oad(0x85 | op_reg, c);
+        }
+    } else {
+        g(0x00 | op_reg | (r & VT_VALMASK));
+    }
+}
+
+
+/* load 'r' from value 'sv' */
+void load(int r, SValue *sv)
+{
+    int v, t, ft, fc, fr;
+    SValue v1;
+
+    fr = sv->r;
+    ft = sv->type.t;
+    fc = sv->c.ul;
+
+    v = fr & VT_VALMASK;
+    if (fr & VT_LVAL) {
+        if (v == VT_LLOCAL) {
+            v1.type.t = VT_INT;
+            v1.r = VT_LOCAL | VT_LVAL;
+            v1.c.ul = fc;
+            load(r, &v1);
+            fr = r;
+        }
+        if ((ft & VT_BTYPE) == VT_FLOAT) {
+            o(0xd9); /* flds */
+            r = 0;
+        } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
+            o(0xdd); /* fldl */
+            r = 0;
+        } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
+            o(0xdb); /* fldt */
+            r = 5;
+        } else if ((ft & VT_TYPE) == VT_BYTE) {
+            o(0xbe0f);   /* movsbl */
+        } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
+            o(0xb60f);   /* movzbl */
+        } else if ((ft & VT_TYPE) == VT_SHORT) {
+            o(0xbf0f);   /* movswl */
+        } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
+            o(0xb70f);   /* movzwl */
+        } else {
+            o(0x8b);     /* movl */
+        }
+        gen_modrm(r, fr, sv->sym, fc);
+    } else {
+        if (v == VT_CONST) {
+            o(0xb8 + r); /* mov $xx, r */
+            gen_addr32(fr, sv->sym, fc);
+        } else if (v == VT_LOCAL) {
+            o(0x8d); /* lea xxx(%ebp), r */
+            gen_modrm(r, VT_LOCAL, sv->sym, fc);
+        } else if (v == VT_CMP) {
+            oad(0xb8 + r, 0); /* mov $0, r */
+            o(0x0f); /* setxx %br */
+            o(fc);
+            o(0xc0 + r);
+        } else if (v == VT_JMP || v == VT_JMPI) {
+            t = v & 1;
+            oad(0xb8 + r, t); /* mov $1, r */
+            o(0x05eb); /* jmp after */
+            gsym(fc);
+            oad(0xb8 + r, t ^ 1); /* mov $0, r */
+        } else if (v != r) {
+            o(0x89);
+            o(0xc0 + r + v * 8); /* mov v, r */
+        }
+    }
+}
+
+/* store register 'r' in lvalue 'v' */
+void store(int r, SValue *v)
+{
+    int fr, bt, ft, fc;
+
+    ft = v->type.t;
+    fc = v->c.ul;
+    fr = v->r & VT_VALMASK;
+    bt = ft & VT_BTYPE;
+    /* XXX: incorrect if float reg to reg */
+    if (bt == VT_FLOAT) {
+        o(0xd9); /* fsts */
+        r = 2;
+    } else if (bt == VT_DOUBLE) {
+        o(0xdd); /* fstpl */
+        r = 2;
+    } else if (bt == VT_LDOUBLE) {
+        o(0xc0d9); /* fld %st(0) */
+        o(0xdb); /* fstpt */
+        r = 7;
+    } else {
+        if (bt == VT_SHORT)
+            o(0x66);
+        if (bt == VT_BYTE || bt == VT_BOOL)
+            o(0x88);
+        else
+            o(0x89);
+    }
+    if (fr == VT_CONST ||
+        fr == VT_LOCAL ||
+        (v->r & VT_LVAL)) {
+        gen_modrm(r, v->r, v->sym, fc);
+    } else if (fr != r) {
+        o(0xc0 + fr + r * 8); /* mov r, fr */
+    }
+}
+
+static void gadd_sp(int val)
+{
+    if (val == (char)val) {
+        o(0xc483);
+        g(val);
+    } else {
+        oad(0xc481, val); /* add $xxx, %esp */
+    }
+}
+
+/* 'is_jmp' is '1' if it is a jump */
+static void gcall_or_jmp(int is_jmp)
+{
+    int r;
+    if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
+        /* constant case */
+        if (vtop->r & VT_SYM) {
+            /* relocation case */
+            greloc(cur_text_section, vtop->sym, 
+                   ind + 1, R_386_PC32);
+        } else {
+            /* put an empty PC32 relocation */
+            put_elf_reloc(symtab_section, cur_text_section, 
+                          ind + 1, R_386_PC32, 0);
+        }
+        oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
+    } else {
+        /* otherwise, indirect call */
+        r = gv(RC_INT);
+        o(0xff); /* call/jmp *r */
+        o(0xd0 + r + (is_jmp << 4));
+    }
+}
+
+static uint8_t fastcall_regs[3] = { TREG_EAX, TREG_EDX, TREG_ECX };
+static uint8_t fastcallw_regs[2] = { TREG_ECX, TREG_EDX };
+
+/* Generate function call. The function address is pushed first, then
+   all the parameters in call order. This functions pops all the
+   parameters and the function address. */
+void gfunc_call(int nb_args)
+{
+    int size, align, r, args_size, i, func_call;
+    Sym *func_sym;
+    
+    args_size = 0;
+    for(i = 0;i < nb_args; i++) {
+        if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
+            size = type_size(&vtop->type, &align);
+            /* align to stack align size */
+            size = (size + 3) & ~3;
+            /* allocate the necessary size on stack */
+            oad(0xec81, size); /* sub $xxx, %esp */
+            /* generate structure store */
+            r = get_reg(RC_INT);
+            o(0x89); /* mov %esp, r */
+            o(0xe0 + r);
+            vset(&vtop->type, r | VT_LVAL, 0);
+            vswap();
+            vstore();
+            args_size += size;
+        } else if (is_float(vtop->type.t)) {
+            gv(RC_FLOAT); /* only one float register */
+            if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
+                size = 4;
+            else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
+                size = 8;
+            else
+                size = 12;
+            oad(0xec81, size); /* sub $xxx, %esp */
+            if (size == 12)
+                o(0x7cdb);
+            else
+                o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */
+            g(0x24);
+            g(0x00);
+            args_size += size;
+        } else {
+            /* simple type (currently always same size) */
+            /* XXX: implicit cast ? */
+            r = gv(RC_INT);
+            if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
+                size = 8;
+                o(0x50 + vtop->r2); /* push r */
+            } else {
+                size = 4;
+            }
+            o(0x50 + r); /* push r */
+            args_size += size;
+        }
+        vtop--;
+    }
+    save_regs(0); /* save used temporary registers */
+    func_sym = vtop->type.ref;
+    func_call = func_sym->r;
+    /* fast call case */
+    if ((func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) ||
+        func_call == FUNC_FASTCALLW) {
+        int fastcall_nb_regs;
+        uint8_t *fastcall_regs_ptr;
+        if (func_call == FUNC_FASTCALLW) {
+            fastcall_regs_ptr = fastcallw_regs;
+            fastcall_nb_regs = 2;
+        } else {
+            fastcall_regs_ptr = fastcall_regs;
+            fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
+        }
+        for(i = 0;i < fastcall_nb_regs; i++) {
+            if (args_size <= 0)
+                break;
+            o(0x58 + fastcall_regs_ptr[i]); /* pop r */
+            /* XXX: incorrect for struct/floats */
+            args_size -= 4;
+        }
+    }
+    gcall_or_jmp(0);
+    if (args_size && func_sym->r != FUNC_STDCALL)
+        gadd_sp(args_size);
+    vtop--;
+}
+
+#ifdef TCC_TARGET_PE
+#define FUNC_PROLOG_SIZE 10
+#else
+#define FUNC_PROLOG_SIZE 9
+#endif
+
+/* generate function prolog of type 't' */
+void gfunc_prolog(CType *func_type)
+{
+    int addr, align, size, func_call, fastcall_nb_regs;
+    int param_index, param_addr;
+    uint8_t *fastcall_regs_ptr;
+    Sym *sym;
+    CType *type;
+
+    sym = func_type->ref;
+    func_call = sym->r;
+    addr = 8;
+    loc = 0;
+    if (func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) {
+        fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
+        fastcall_regs_ptr = fastcall_regs;
+    } else if (func_call == FUNC_FASTCALLW) {
+        fastcall_nb_regs = 2;
+        fastcall_regs_ptr = fastcallw_regs;
+    } else {
+        fastcall_nb_regs = 0;
+        fastcall_regs_ptr = NULL;
+    }
+    param_index = 0;
+
+    ind += FUNC_PROLOG_SIZE;
+    func_sub_sp_offset = ind;
+    /* if the function returns a structure, then add an
+       implicit pointer parameter */
+    func_vt = sym->type;
+    if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
+        /* XXX: fastcall case ? */
+        func_vc = addr;
+        addr += 4;
+        param_index++;
+    }
+    /* define parameters */
+    while ((sym = sym->next) != NULL) {
+        type = &sym->type;
+        size = type_size(type, &align);
+        size = (size + 3) & ~3;
+#ifdef FUNC_STRUCT_PARAM_AS_PTR
+        /* structs are passed as pointer */
+        if ((type->t & VT_BTYPE) == VT_STRUCT) {
+            size = 4;
+        }
+#endif
+        if (param_index < fastcall_nb_regs) {
+            /* save FASTCALL register */
+            loc -= 4;
+            o(0x89);     /* movl */
+            gen_modrm(fastcall_regs_ptr[param_index], VT_LOCAL, NULL, loc);
+            param_addr = loc;
+        } else {
+            param_addr = addr;
+            addr += size;
+        }
+        sym_push(sym->v & ~SYM_FIELD, type,
+                 VT_LOCAL | VT_LVAL, param_addr);
+        param_index++;
+    }
+    func_ret_sub = 0;
+    /* pascal type call ? */
+    if (func_call == FUNC_STDCALL)
+        func_ret_sub = addr - 8;
+
+    /* leave some room for bound checking code */
+    if (do_bounds_check) {
+        oad(0xb8, 0); /* lbound section pointer */
+        oad(0xb8, 0); /* call to function */
+        func_bound_offset = lbounds_section->data_offset;
+    }
+}
+
+/* generate function epilog */
+void gfunc_epilog(void)
+{
+    int v, saved_ind;
+
+#ifdef CONFIG_TCC_BCHECK
+    if (do_bounds_check && func_bound_offset != lbounds_section->data_offset) {
+        int saved_ind;
+        int *bounds_ptr;
+        Sym *sym, *sym_data;
+        /* add end of table info */
+        bounds_ptr = section_ptr_add(lbounds_section, sizeof(int));
+        *bounds_ptr = 0;
+        /* generate bound local allocation */
+        saved_ind = ind;
+        ind = func_sub_sp_offset;
+        sym_data = get_sym_ref(&char_pointer_type, lbounds_section, 
+                               func_bound_offset, lbounds_section->data_offset);
+        greloc(cur_text_section, sym_data,
+               ind + 1, R_386_32);
+        oad(0xb8, 0); /* mov %eax, xxx */
+        sym = external_global_sym(TOK___bound_local_new, &func_old_type, 0);
+        greloc(cur_text_section, sym, 
+               ind + 1, R_386_PC32);
+        oad(0xe8, -4);
+        ind = saved_ind;
+        /* generate bound check local freeing */
+        o(0x5250); /* save returned value, if any */
+        greloc(cur_text_section, sym_data,
+               ind + 1, R_386_32);
+        oad(0xb8, 0); /* mov %eax, xxx */
+        sym = external_global_sym(TOK___bound_local_delete, &func_old_type, 0);
+        greloc(cur_text_section, sym, 
+               ind + 1, R_386_PC32);
+        oad(0xe8, -4);
+        o(0x585a); /* restore returned value, if any */
+    }
+#endif
+    o(0xc9); /* leave */
+    if (func_ret_sub == 0) {
+        o(0xc3); /* ret */
+    } else {
+        o(0xc2); /* ret n */
+        g(func_ret_sub);
+        g(func_ret_sub >> 8);
+    }
+    /* align local size to word & save local variables */
+    
+    v = (-loc + 3) & -4; 
+    saved_ind = ind;
+    ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
+#ifdef TCC_TARGET_PE
+    if (v >= 4096) {
+        Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
+        oad(0xb8, v); /* mov stacksize, %eax */
+        oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
+        greloc(cur_text_section, sym, ind-4, R_386_PC32);
+    } else
+#endif
+    {
+        o(0xe58955);  /* push %ebp, mov %esp, %ebp */
+        o(0xec81);  /* sub esp, stacksize */
+        gen_le32(v);
+#if FUNC_PROLOG_SIZE == 10
+        o(0x90);  /* adjust to FUNC_PROLOG_SIZE */
+#endif
+    }
+    ind = saved_ind;
+}
+
+/* generate a jump to a label */
+int gjmp(int t)
+{
+    return psym(0xe9, t);
+}
+
+/* generate a jump to a fixed address */
+void gjmp_addr(int a)
+{
+    int r;
+    r = a - ind - 2;
+    if (r == (char)r) {
+        g(0xeb);
+        g(r);
+    } else {
+        oad(0xe9, a - ind - 5);
+    }
+}
+
+/* generate a test. set 'inv' to invert test. Stack entry is popped */
+int gtst(int inv, int t)
+{
+    int v, *p;
+
+    v = vtop->r & VT_VALMASK;
+    if (v == VT_CMP) {
+        /* fast case : can jump directly since flags are set */
+        g(0x0f);
+        t = psym((vtop->c.i - 16) ^ inv, t);
+    } else if (v == VT_JMP || v == VT_JMPI) {
+        /* && or || optimization */
+        if ((v & 1) == inv) {
+            /* insert vtop->c jump list in t */
+            p = &vtop->c.i;
+            while (*p != 0)
+                p = (int *)(cur_text_section->data + *p);
+            *p = t;
+            t = vtop->c.i;
+        } else {
+            t = gjmp(t);
+            gsym(vtop->c.i);
+        }
+    } else {
+        if (is_float(vtop->type.t) || is_llong(vtop->type.t)) {
+            /* compare != 0 to get a 32-bit int for testing */
+            vpushi(0);
+            gen_op(TOK_NE);
+        }
+        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
+            /* constant jmp optimization */
+            if ((vtop->c.i != 0) != inv) 
+                t = gjmp(t);
+        } else {
+            v = gv(RC_INT);
+            o(0x85);
+            o(0xc0 + v * 9);
+            g(0x0f);
+            t = psym(0x85 ^ inv, t);
+        }
+    }
+    vtop--;
+    return t;
+}
+
+/* generate an integer binary operation */
+void gen_opi(int op)
+{
+    int r, fr, opc, c;
+
+    switch(op) {
+    case '+':
+    case TOK_ADDC1: /* add with carry generation */
+        opc = 0;
+    gen_op8:
+        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
+            /* constant case */
+            vswap();
+            r = gv(RC_INT);
+            vswap();
+            c = vtop->c.i;
+            if (c == (char)c) {
+                /* XXX: generate inc and dec for smaller code ? */
+                o(0x83);
+                o(0xc0 | (opc << 3) | r);
+                g(c);
+            } else {
+                o(0x81);
+                oad(0xc0 | (opc << 3) | r, c);
+            }
+        } else {
+            gv2(RC_INT, RC_INT);
+            r = vtop[-1].r;
+            fr = vtop[0].r;
+            o((opc << 3) | 0x01);
+            o(0xc0 + r + fr * 8); 
+        }
+        vtop--;
+        if (op >= TOK_ULT && op <= TOK_GT) {
+            vtop->r = VT_CMP;
+            vtop->c.i = op;
+        }
+        break;
+    case '-':
+    case TOK_SUBC1: /* sub with carry generation */
+        opc = 5;
+        goto gen_op8;
+    case TOK_ADDC2: /* add with carry use */
+        opc = 2;
+        goto gen_op8;
+    case TOK_SUBC2: /* sub with carry use */
+        opc = 3;
+        goto gen_op8;
+    case '&':
+        opc = 4;
+        goto gen_op8;
+    case '^':
+        opc = 6;
+        goto gen_op8;
+    case '|':
+        opc = 1;
+        goto gen_op8;
+    case '*':
+        gv2(RC_INT, RC_INT);
+        r = vtop[-1].r;
+        fr = vtop[0].r;
+        vtop--;
+        o(0xaf0f); /* imul fr, r */
+        o(0xc0 + fr + r * 8);
+        break;
+    case TOK_SHL:
+        opc = 4;
+        goto gen_shift;
+    case TOK_SHR:
+        opc = 5;
+        goto gen_shift;
+    case TOK_SAR:
+        opc = 7;
+    gen_shift:
+        opc = 0xc0 | (opc << 3);
+        if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
+            /* constant case */
+            vswap();
+            r = gv(RC_INT);
+            vswap();
+            c = vtop->c.i & 0x1f;
+            o(0xc1); /* shl/shr/sar $xxx, r */
+            o(opc | r);
+            g(c);
+        } else {
+            /* we generate the shift in ecx */
+            gv2(RC_INT, RC_ECX);
+            r = vtop[-1].r;
+            o(0xd3); /* shl/shr/sar %cl, r */
+            o(opc | r);
+        }
+        vtop--;
+        break;
+    case '/':
+    case TOK_UDIV:
+    case TOK_PDIV:
+    case '%':
+    case TOK_UMOD:
+    case TOK_UMULL:
+        /* first operand must be in eax */
+        /* XXX: need better constraint for second operand */
+        gv2(RC_EAX, RC_ECX);
+        r = vtop[-1].r;
+        fr = vtop[0].r;
+        vtop--;
+        save_reg(TREG_EDX);
+        if (op == TOK_UMULL) {
+            o(0xf7); /* mul fr */
+            o(0xe0 + fr);
+            vtop->r2 = TREG_EDX;
+            r = TREG_EAX;
+        } else {
+            if (op == TOK_UDIV || op == TOK_UMOD) {
+                o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
+                o(0xf0 + fr);
+            } else {
+                o(0xf799); /* cltd, idiv fr, %eax */
+                o(0xf8 + fr);
+            }
+            if (op == '%' || op == TOK_UMOD)
+                r = TREG_EDX;
+            else
+                r = TREG_EAX;
+        }
+        vtop->r = r;
+        break;
+    default:
+        opc = 7;
+        goto gen_op8;
+    }
+}
+
+/* generate a floating point operation 'v = t1 op t2' instruction. The
+   two operands are guaranted to have the same floating point type */
+/* XXX: need to use ST1 too */
+void gen_opf(int op)
+{
+    int a, ft, fc, swapped, r;
+
+    /* convert constants to memory references */
+    if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
+        vswap();
+        gv(RC_FLOAT);
+        vswap();
+    }
+    if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
+        gv(RC_FLOAT);
+
+    /* must put at least one value in the floating point register */
+    if ((vtop[-1].r & VT_LVAL) &&
+        (vtop[0].r & VT_LVAL)) {
+        vswap();
+        gv(RC_FLOAT);
+        vswap();
+    }
+    swapped = 0;
+    /* swap the stack if needed so that t1 is the register and t2 is
+       the memory reference */
+    if (vtop[-1].r & VT_LVAL) {
+        vswap();
+        swapped = 1;
+    }
+    if (op >= TOK_ULT && op <= TOK_GT) {
+        /* load on stack second operand */
+        load(TREG_ST0, vtop);
+        save_reg(TREG_EAX); /* eax is used by FP comparison code */
+        if (op == TOK_GE || op == TOK_GT)
+            swapped = !swapped;
+        else if (op == TOK_EQ || op == TOK_NE)
+            swapped = 0;
+        if (swapped)
+            o(0xc9d9); /* fxch %st(1) */
+        o(0xe9da); /* fucompp */
+        o(0xe0df); /* fnstsw %ax */
+        if (op == TOK_EQ) {
+            o(0x45e480); /* and $0x45, %ah */
+            o(0x40fC80); /* cmp $0x40, %ah */
+        } else if (op == TOK_NE) {
+            o(0x45e480); /* and $0x45, %ah */
+            o(0x40f480); /* xor $0x40, %ah */
+            op = TOK_NE;
+        } else if (op == TOK_GE || op == TOK_LE) {
+            o(0x05c4f6); /* test $0x05, %ah */
+            op = TOK_EQ;
+        } else {
+            o(0x45c4f6); /* test $0x45, %ah */
+            op = TOK_EQ;
+        }
+        vtop--;
+        vtop->r = VT_CMP;
+        vtop->c.i = op;
+    } else {
+        /* no memory reference possible for long double operations */
+        if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
+            load(TREG_ST0, vtop);
+            swapped = !swapped;
+        }
+        
+        switch(op) {
+        default:
+        case '+':
+            a = 0;
+            break;
+        case '-':
+            a = 4;
+            if (swapped)
+                a++;
+            break;
+        case '*':
+            a = 1;
+            break;
+        case '/':
+            a = 6;
+            if (swapped)
+                a++;
+            break;
+        }
+        ft = vtop->type.t;
+        fc = vtop->c.ul;
+        if ((ft & VT_BTYPE) == VT_LDOUBLE) {
+            o(0xde); /* fxxxp %st, %st(1) */
+            o(0xc1 + (a << 3));
+        } else {
+            /* if saved lvalue, then we must reload it */
+            r = vtop->r;
+            if ((r & VT_VALMASK) == VT_LLOCAL) {
+                SValue v1;
+                r = get_reg(RC_INT);
+                v1.type.t = VT_INT;
+                v1.r = VT_LOCAL | VT_LVAL;
+                v1.c.ul = fc;
+                load(r, &v1);
+                fc = 0;
+            }
+
+            if ((ft & VT_BTYPE) == VT_DOUBLE)
+                o(0xdc);
+            else
+                o(0xd8);
+            gen_modrm(a, r, vtop->sym, fc);
+        }
+        vtop--;
+    }
+}
+
+/* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
+   and 'long long' cases. */
+void gen_cvt_itof(int t)
+{
+    save_reg(TREG_ST0);
+    gv(RC_INT);
+    if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
+        /* signed long long to float/double/long double (unsigned case
+           is handled generically) */
+        o(0x50 + vtop->r2); /* push r2 */
+        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
+        o(0x242cdf); /* fildll (%esp) */
+        o(0x08c483); /* add $8, %esp */
+    } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == 
+               (VT_INT | VT_UNSIGNED)) {
+        /* unsigned int to float/double/long double */
+        o(0x6a); /* push $0 */
+        g(0x00);
+        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
+        o(0x242cdf); /* fildll (%esp) */
+        o(0x08c483); /* add $8, %esp */
+    } else {
+        /* int to float/double/long double */
+        o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
+        o(0x2404db); /* fildl (%esp) */
+        o(0x04c483); /* add $4, %esp */
+    }
+    vtop->r = TREG_ST0;
+}
+
+/* convert fp to int 't' type */
+/* XXX: handle long long case */
+void gen_cvt_ftoi(int t)
+{
+    int r, r2, size;
+    Sym *sym;
+    CType ushort_type;
+
+    ushort_type.t = VT_SHORT | VT_UNSIGNED;
+
+    gv(RC_FLOAT);
+    if (t != VT_INT)
+        size = 8;
+    else 
+        size = 4;
+    
+    o(0x2dd9); /* ldcw xxx */
+    sym = external_global_sym(TOK___tcc_int_fpu_control, 
+                              &ushort_type, VT_LVAL);
+    greloc(cur_text_section, sym, 
+           ind, R_386_32);
+    gen_le32(0);
+    
+    oad(0xec81, size); /* sub $xxx, %esp */
+    if (size == 4)
+        o(0x1cdb); /* fistpl */
+    else
+        o(0x3cdf); /* fistpll */
+    o(0x24);
+    o(0x2dd9); /* ldcw xxx */
+    sym = external_global_sym(TOK___tcc_fpu_control, 
+                              &ushort_type, VT_LVAL);
+    greloc(cur_text_section, sym, 
+           ind, R_386_32);
+    gen_le32(0);
+
+    r = get_reg(RC_INT);
+    o(0x58 + r); /* pop r */
+    if (size == 8) {
+        if (t == VT_LLONG) {
+            vtop->r = r; /* mark reg as used */
+            r2 = get_reg(RC_INT);
+            o(0x58 + r2); /* pop r2 */
+            vtop->r2 = r2;
+        } else {
+            o(0x04c483); /* add $4, %esp */
+        }
+    }
+    vtop->r = r;
+}
+
+/* convert from one floating point type to another */
+void gen_cvt_ftof(int t)
+{
+    /* all we have to do on i386 is to put the float in a register */
+    gv(RC_FLOAT);
+}
+
+/* computed goto support */
+void ggoto(void)
+{
+    gcall_or_jmp(1);
+    vtop--;
+}
+
+/* bound check support functions */
+#ifdef CONFIG_TCC_BCHECK
+
+/* generate a bounded pointer addition */
+void gen_bounded_ptr_add(void)
+{
+    Sym *sym;
+
+    /* prepare fast i386 function call (args in eax and edx) */
+    gv2(RC_EAX, RC_EDX);
+    /* save all temporary registers */
+    vtop -= 2;
+    save_regs(0);
+    /* do a fast function call */
+    sym = external_global_sym(TOK___bound_ptr_add, &func_old_type, 0);
+    greloc(cur_text_section, sym, 
+           ind + 1, R_386_PC32);
+    oad(0xe8, -4);
+    /* returned pointer is in eax */
+    vtop++;
+    vtop->r = TREG_EAX | VT_BOUNDED;
+    /* address of bounding function call point */
+    vtop->c.ul = (cur_text_section->reloc->data_offset - sizeof(Elf32_Rel)); 
+}
+
+/* patch pointer addition in vtop so that pointer dereferencing is
+   also tested */
+void gen_bounded_ptr_deref(void)
+{
+    int func;
+    int size, align;
+    Elf32_Rel *rel;
+    Sym *sym;
+
+    size = 0;
+    /* XXX: put that code in generic part of tcc */
+    if (!is_float(vtop->type.t)) {
+        if (vtop->r & VT_LVAL_BYTE)
+            size = 1;
+        else if (vtop->r & VT_LVAL_SHORT)
+            size = 2;
+    }
+    if (!size)
+        size = type_size(&vtop->type, &align);
+    switch(size) {
+    case  1: func = TOK___bound_ptr_indir1; break;
+    case  2: func = TOK___bound_ptr_indir2; break;
+    case  4: func = TOK___bound_ptr_indir4; break;
+    case  8: func = TOK___bound_ptr_indir8; break;
+    case 12: func = TOK___bound_ptr_indir12; break;
+    case 16: func = TOK___bound_ptr_indir16; break;
+    default:
+        error("unhandled size when derefencing bounded pointer");
+        func = 0;
+        break;
+    }
+
+    /* patch relocation */
+    /* XXX: find a better solution ? */
+    rel = (Elf32_Rel *)(cur_text_section->reloc->data + vtop->c.ul);
+    sym = external_global_sym(func, &func_old_type, 0);
+    if (!sym->c)
+        put_extern_sym(sym, NULL, 0, 0);
+    rel->r_info = ELF32_R_INFO(sym->c, ELF32_R_TYPE(rel->r_info));
+}
+#endif
+
+/* end of X86 code generator */
+/*************************************************************/
+
--- a/tcc.c	Tue May 08 22:16:31 2007 -0400
+++ b/tcc.c	Sat May 12 00:15:39 2007 -0400
@@ -38,7 +38,7 @@
 static int do_bounds_check = 0;
 
 #ifdef TCC_TARGET_I386
-#include "i386-gen.c"
+#include "i386/i386-gen.c"
 #endif
 
 #ifdef TCC_TARGET_ARM
@@ -8676,7 +8676,7 @@
 #ifdef CONFIG_TCC_ASM
 
 #ifdef TCC_TARGET_I386
-#include "i386-asm.c"
+#include "i386/i386-asm.c"
 #endif
 #include "tccasm.c"
 
--- a/tcctok.h	Tue May 08 22:16:31 2007 -0400
+++ b/tcctok.h	Sat May 12 00:15:39 2007 -0400
@@ -444,7 +444,7 @@
 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
-#include "i386-asm.h"
+#include "i386/i386-asm.h"
 
 #define ALT(x)
 #define DEF_ASM_OP0(name, opcode)
@@ -452,6 +452,6 @@
 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) DEF_ASM(name)
 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) DEF_ASM(name)
 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) DEF_ASM(name)
-#include "i386-asm.h"
+#include "i386/i386-asm.h"
 
 #endif