changeset 54:65cd203a5b34

The patch that makes qemu 0.8.2 build with gcc 4.x. (Stitched together from patches used by gentoo.)
author Rob Landley <rob@landley.net>
date Wed, 20 Dec 2006 12:08:51 -0500
parents 3fd961e31444
children f510ed16a6a0
files sources/patches/qemu-gcc-4-all.patch
diffstat 1 files changed, 1181 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sources/patches/qemu-gcc-4-all.patch	Wed Dec 20 12:08:51 2006 -0500
@@ -0,0 +1,1181 @@
+Patches needed to get qemu (tested up through 0.8.1) to build under GCC 4.x.
+
+2005-06-02  Gwenole Beauchesne  <gbeauchesne@mandriva.com>
+
+	* dyngen.c (trace_i386_insn): Fix push/imul case with 8-bit
+	immediate.
+
+2005-05-11  Paul Brook  <paul@codesourcery.com>
+
+	* gcc4 host support.
+
+--- qemu-0.7.0/target-ppc/exec.h.gcc4	2005-04-27 22:52:05.000000000 +0200
++++ qemu-0.7.0/target-ppc/exec.h	2005-06-02 21:41:51.000000000 +0200
+@@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3);
+ #define FT1 (env->ft1)
+ #define FT2 (env->ft2)
+ 
+-#if defined (DEBUG_OP)
+-#define RETURN() __asm__ __volatile__("nop");
+-#else
+-#define RETURN() __asm__ __volatile__("");
+-#endif
++#define RETURN() FORCE_RET()
+ 
+ #include "cpu.h"
+ #include "exec-all.h"
+--- qemu-0.7.0/dyngen-exec.h.gcc4	2005-04-27 22:52:05.000000000 +0200
++++ qemu-0.7.0/dyngen-exec.h	2005-06-02 21:41:51.000000000 +0200
+@@ -155,7 +155,12 @@ extern int printf(const char *, ...);
+ #endif
+ 
+ /* force GCC to generate only one epilog at the end of the function */
++#if defined(__i386__) || defined(__x86_64__)
++/* Also add 4 bytes of padding so that we can replace the ret with a jmp.  */
++#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
++#else
+ #define FORCE_RET() asm volatile ("");
++#endif
+ 
+ #ifndef OPPROTO
+ #define OPPROTO
+@@ -205,12 +210,19 @@ extern int __op_jmp0, __op_jmp1, __op_jm
+ #endif
+ 
+ #ifdef __i386__
+-#define EXIT_TB() asm volatile ("ret")
+-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
++/* Dyngen will replace hlt instructions with a ret instruction.  Inserting a
++   ret directly would confuse dyngen.  */
++#define EXIT_TB() asm volatile ("hlt")
++/* Dyngen will replace cli with 0x9e (jmp). 
++   We generate the offset manually.  */
++#define GOTO_LABEL_PARAM(n) \
++  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
+ #endif
+ #ifdef __x86_64__
+-#define EXIT_TB() asm volatile ("ret")
+-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
++/* The same as i386.  */
++#define EXIT_TB() asm volatile ("hlt")
++#define GOTO_LABEL_PARAM(n) \
++  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
+ #endif
+ #ifdef __powerpc__
+ #define EXIT_TB() asm volatile ("blr")
+--- qemu-0.7.0/dyngen.c.gcc4	2005-04-27 22:52:05.000000000 +0200
++++ qemu-0.7.0/dyngen.c	2005-06-02 22:25:06.000000000 +0200
+@@ -32,6 +32,8 @@
+ 
+ #include "config-host.h"
+ 
++//#define DEBUG_OP
++
+ /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
+    compilation */
+ #if defined(CONFIG_WIN32)
+@@ -1343,6 +1345,644 @@ int arm_emit_ldr_info(const char *name, 
+ #endif
+ 
+ 
++#if defined(HOST_I386) || defined(HOST_X86_64)
++
++/* This byte is the first byte of an instruction.  */
++#define FLAG_INSN     (1 << 0)
++/* This byte has been processed as part of an instruction.  */
++#define FLAG_SCANNED  (1 << 1)
++/* This instruction is a return instruction.  Gcc cometimes generates prefix
++   bytes, so may be more than one byte long.  */
++#define FLAG_RET      (1 << 2)
++/* This is either the target of a jump, or the preceeding instruction uses
++   a pc-relative offset.  */
++#define FLAG_TARGET   (1 << 3)
++/* This is a magic instruction that needs fixing up.  */
++#define FLAG_EXIT     (1 << 4)
++#define MAX_EXITS     5
++
++static void
++bad_opcode(const char *name, uint32_t op)
++{
++    error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
++}
++
++/* Mark len bytes as scanned,  Returns insn_size + len.  Reports an error
++   if these bytes have already been scanned.  */
++static int
++eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
++{
++    while (len > 0) {
++        /* This should never occur in sane code.  */
++        if (flags[insn + insn_size] & FLAG_SCANNED)
++            error ("Overlapping instructions in %s", name);
++        flags[insn + insn_size] |= FLAG_SCANNED;
++        insn_size++;
++        len--;
++    }
++    return insn_size;
++}
++
++static void
++trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
++                 int len)
++{
++    uint8_t *ptr;
++    uint8_t op;
++    int modrm;
++    int is_prefix;
++    int op_size;
++    int addr_size;
++    int insn_size;
++    int is_ret;
++    int is_condjmp;
++    int is_jmp;
++    int is_exit;
++    int is_pcrel;
++    int immed;
++    int seen_rexw;
++    int32_t disp;
++
++    ptr = start_p + insn;
++    /* nonzero if this insn has a ModR/M byte.  */
++    modrm = 1;
++    /* The size of the immediate value in this instruction.  */
++    immed = 0;
++    /* The operand size.  */
++    op_size = 4;
++    /* The address size */
++    addr_size = 4;
++    /* The total length of this instruction.  */
++    insn_size = 0;
++    is_prefix = 1;
++    is_ret = 0;
++    is_condjmp = 0;
++    is_jmp = 0;
++    is_exit = 0;
++    seen_rexw = 0;
++    is_pcrel = 0;
++
++    while (is_prefix) {
++        op = ptr[insn_size];
++        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++        is_prefix = 0;
++        switch (op >> 4) {
++        case 0:
++        case 1:
++        case 2:
++        case 3:
++            if (op == 0x0f) {
++                /* two-byte opcode.  */
++                op = ptr[insn_size];
++                insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++                switch (op >> 4) {
++                case 0:
++                    if ((op & 0xf) > 3)
++                      modrm = 0;
++                    break;
++                case 1: /* vector move or prefetch */
++                case 2: /* various moves and vector compares.  */
++                case 4: /* cmov */
++                case 5: /* vector instructions */
++                case 6:
++                case 13:
++                case 14:
++                case 15:
++                    break;
++                case 7: /* mmx */
++                    if (op & 0x77) /* emms */
++                      modrm = 0;
++                    break;
++                case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
++                    modrm = 0;
++                    break;
++                case 8: /* long conditional jump */
++                    is_condjmp = 1;
++                    immed = op_size;
++                    modrm = 0;
++                    break;
++                case 9: /* setcc */
++                    break;
++                case 10:
++                    switch (op & 0x7) {
++                    case 0: /* push fs/gs */
++                    case 1: /* pop fs/gs */
++                    case 2: /* cpuid/rsm */
++                        modrm = 0;
++                        break;
++                    case 4: /* shld/shrd immediate */
++                        immed = 1;
++                        break;
++                    default: /* Normal instructions with a ModR/M byte.  */
++                        break;
++                    }
++                    break;
++                case 11:
++                    switch (op & 0xf) {
++                    case 10: /* bt, bts, btr, btc */
++                        immed = 1;
++                        break;
++                    default:
++                        /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
++                           undefined, and movsx */
++                        break;
++                    }
++                    break;
++                case 12:
++                    if (op & 8) {
++                        /* bswap */
++                        modrm = 0;
++                    } else {
++                        switch (op & 0x7) {
++                        case 2:
++                        case 4:
++                        case 5:
++                        case 6:
++                            immed = 1;
++                            break;
++                        default:
++                            break;
++                        }
++                    }
++                    break;
++                }
++            } else if ((op & 0x07) <= 0x3) {
++                /* General arithmentic ax.  */
++            } else if ((op & 0x07) <= 0x5) {
++                /* General arithmetic ax, immediate.  */
++                if (op & 0x01)
++                    immed = op_size;
++                else
++                    immed = 1;
++                modrm = 0;
++            } else if ((op & 0x23) == 0x22) {
++                /* Segment prefix.  */
++                is_prefix = 1;
++            } else {
++                /* Segment register push/pop or DAA/AAA/DAS/AAS.  */
++                modrm = 0;
++            }
++            break;
++
++#if defined(HOST_X86_64)
++        case 4: /* rex prefix.  */
++            is_prefix = 1;
++            /* The address/operand size is actually 64-bit, but the immediate
++               values in the instruction are still 32-bit.  */
++            op_size = 4;
++            addr_size = 4;
++            if (op & 8)
++                seen_rexw = 1;
++            break;
++#else
++        case 4: /* inc/dec register.  */
++#endif
++        case 5: /* push/pop general register.  */
++            modrm = 0;
++            break;
++
++        case 6:
++            switch (op & 0x0f) {
++            case 0: /* pusha */
++            case 1: /* popa */
++                modrm = 0;
++                break;
++            case 2: /* bound */
++            case 3: /* arpl */
++                break;
++            case 4: /* FS */
++            case 5: /* GS */
++                is_prefix = 1;
++                break;
++            case 6: /* opcode size prefix.  */
++                op_size = 2;
++                is_prefix = 1;
++                break;
++            case 7: /* Address size prefix.  */
++                addr_size = 2;
++                is_prefix = 1;
++                break;
++            case 8: /* push immediate */
++                immed = op_size;
++                modrm = 0;
++                break;
++            case 10: /* push 8-bit immediate */
++                immed = 1;
++                modrm = 0;
++                break;
++            case 9: /* imul immediate */
++                immed = op_size;
++                break;
++            case 11: /* imul 8-bit immediate */
++                immed = 1;
++                break;
++            case 12: /* insb */
++            case 13: /* insw */
++            case 14: /* outsb */
++            case 15: /* outsw */
++                modrm = 0;
++                break;
++            }
++            break;
++
++        case 7: /* Short conditional jump.  */
++            is_condjmp = 1;
++            immed = 1;
++            modrm = 0;
++            break;
++          
++        case 8:
++            if ((op & 0xf) <= 3) {
++                /* arithmetic immediate.  */
++                if ((op & 3) == 1)
++                    immed = op_size;
++                else
++                    immed = 1;
++            }
++            /* else test, xchg, mov, lea or pop general.  */
++            break;
++
++        case 9:
++            /* Various single-byte opcodes with no modrm byte.  */
++            modrm = 0;
++            if (op == 10) {
++                /* Call */
++                immed = 4;
++            }
++            break;
++
++        case 10:
++            switch ((op & 0xe) >> 1) {
++            case 0: /* mov absoliute immediate.  */
++            case 1:
++                if (seen_rexw)
++                    immed = 8;
++                else
++                    immed = addr_size;
++                break;
++            case 4: /* test immediate.  */
++                if (op & 1)
++                    immed = op_size;
++                else
++                    immed = 1;
++                break;
++            default: /* Various string ops.  */
++                break;
++            }
++            modrm = 0;
++            break;
++
++        case 11: /* move immediate to register */
++            if (op & 8) {
++                if (seen_rexw)
++                    immed = 8;
++                else
++                    immed = op_size;
++            } else {
++                immed = 1;
++            }
++            modrm = 0;
++            break;
++
++          case 12:
++            switch (op & 0xf) {
++            case 0: /* shift immediate */
++            case 1:
++                immed = 1;
++                break;
++            case 2: /* ret immediate */
++                immed = 2;
++                modrm = 0;
++                bad_opcode(name, op);
++                break;
++            case 3: /* ret */
++                modrm = 0;
++                is_ret = 1;
++            case 4: /* les */
++            case 5: /* lds */
++                break;
++            case 6: /* mov immediate byte */
++                immed = 1;
++                break;
++            case 7: /* mov immediate */
++                immed = op_size;
++                break;
++            case 8: /* enter */
++                /* TODO: Is this right?  */
++                immed = 3;
++                modrm = 0;
++                break;
++            case 10: /* retf immediate */
++                immed = 2;
++                modrm = 0;
++                bad_opcode(name, op);
++                break;
++            case 13: /* int */
++                immed = 1;
++                modrm = 0;
++                break;
++            case 11: /* retf */
++            case 15: /* iret */
++                modrm = 0;
++                bad_opcode(name, op);
++                break;
++            default: /* leave, int3 or into */
++                modrm = 0;
++                break;
++            }
++            break;
++
++        case 13:
++            if ((op & 0xf) >= 8) {
++                /* Coprocessor escape.  For our purposes this is just a normal
++                   instruction with a ModR/M byte.  */
++            } else if ((op & 0xf) >= 4) {
++                /* AAM, AAD or XLAT */
++                modrm = 0;
++            }
++            /* else shift instruction */
++            break;
++
++        case 14:
++            switch ((op & 0xc) >> 2) {
++            case 0: /* loop or jcxz */
++                is_condjmp = 1;
++                immed = 1;
++                break;
++            case 1: /* in/out immed */
++                immed = 1;
++                break;
++            case 2: /* call or jmp */
++                switch (op & 3) {
++                case 0: /* call */
++                    immed = op_size;
++                    break;
++                case 1: /* long jump */
++                    immed = 4;
++                    is_jmp = 1;
++                    break;
++                case 2: /* far jmp */
++                    bad_opcode(name, op);
++                    break;
++                case 3: /* short jmp */
++                    immed = 1;
++                    is_jmp = 1;
++                    break;
++                }
++                break;
++            case 3: /* in/out register */
++                break;
++            }
++            modrm = 0;
++            break;
++
++        case 15:
++            switch ((op & 0xe) >> 1) {
++            case 0:
++            case 1:
++                is_prefix = 1;
++                break;
++            case 2:
++            case 4:
++            case 5:
++            case 6:
++                modrm = 0;
++                /* Some privileged insns are used as markers.  */
++                switch (op) {
++                case 0xf4: /* hlt: Exit translation block.  */
++                    is_exit = 1;
++                    break;
++                case 0xfa: /* cli: Jump to label.  */
++                    is_exit = 1;
++                    immed = 4;
++                    break;
++                case 0xfb: /* sti: TB patch jump.  */
++                    /* Mark the insn for patching, but continue sscanning.  */
++                    flags[insn] |= FLAG_EXIT;
++                    immed = 4;
++                    break;
++                }
++                break;
++            case 3: /* unary grp3 */
++                if ((ptr[insn_size] & 0x38) == 0) {
++                    if (op == 0xf7)
++                        immed = op_size;
++                    else
++                        immed = 1; /* test immediate */
++                }
++                break;
++            case 7: /* inc/dec grp4/5 */
++                /* TODO: This includes indirect jumps.  We should fail if we
++                   encounter one of these. */
++                break;
++            }
++            break;
++        }
++    }
++
++    if (modrm) {
++        if (addr_size != 4)
++            error("16-bit addressing mode used in %s", name);
++
++        disp = 0;
++        modrm = ptr[insn_size];
++        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++        modrm &= 0xc7;
++        switch ((modrm & 0xc0) >> 6) {
++        case 0:
++            if (modrm == 5)
++              disp = 4;
++            break;
++        case 1:
++            disp = 1;
++            break;
++        case 2:
++            disp = 4;
++            break;
++        }
++        if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
++            /* SIB byte */
++            if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
++                disp = 4;
++                is_pcrel = 1;
++            }
++            insn_size = eat_bytes(name, flags, insn, insn_size, 1);
++        }
++        insn_size = eat_bytes(name, flags, insn, insn_size, disp);
++    }
++    insn_size = eat_bytes(name, flags, insn, insn_size, immed);
++    if (is_condjmp || is_jmp) {
++        if (immed == 1) {
++            disp = (int8_t)*(ptr + insn_size - 1);
++        } else {
++            disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
++                   | (((int32_t)*(ptr + insn_size - 2)) << 16)
++                   | (((int32_t)*(ptr + insn_size - 3)) << 8)
++                   | *(ptr + insn_size - 4);
++        }
++        disp += insn_size;
++        /* Jumps to external symbols point to the address of the offset
++           before relocation.  */
++        /* ??? These are probably a tailcall.  We could fix them up by
++           replacing them with jmp to EOB + call, but it's easier to just
++           prevent the compiler generating them.  */
++        if (disp == 1)
++            error("Unconditional jump (sibcall?) in %s", name);
++        disp += insn;
++        if (disp < 0 || disp > len)
++            error("Jump outside instruction in %s", name);
++
++        if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
++            error("Overlapping instructions in %s", name);
++
++        flags[disp] |= (FLAG_INSN | FLAG_TARGET);
++        is_pcrel = 1; 
++    }
++    if (is_pcrel) {
++        /* Mark the following insn as a jump target.  This will stop
++           this instruction being moved.  */
++        flags[insn + insn_size] |= FLAG_TARGET;
++    }
++    if (is_ret)
++      flags[insn] |= FLAG_RET;
++
++    if (is_exit)
++      flags[insn] |= FLAG_EXIT;
++
++    if (!(is_jmp || is_ret || is_exit))
++      flags[insn + insn_size] |= FLAG_INSN;
++}
++
++/* Scan a function body.  Returns the position of the return sequence.
++   Sets *patch_bytes to the number of bytes that need to be copied from that
++   location.  If no patching is required (ie. the return is the last insn)
++   *patch_bytes will be set to -1.  *plen is the number of code bytes to copy.
++ */
++static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
++                         int *patch_bytes, int *exit_addrs)
++{
++    char *flags;
++    int more;
++    int insn;
++    int retpos;
++    int bytes;
++    int num_exits;
++    int len;
++    int last_insn;
++
++    len = *plen;
++    flags = malloc(len + 1);
++    memset(flags, 0, len + 1);
++    flags[0] |= FLAG_INSN;
++    more = 1;
++    while (more) {
++        more = 0;
++        for (insn = 0; insn < len; insn++) {
++            if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
++                trace_i386_insn(name, start_p, flags, insn, len);
++                more = 1;
++            }
++        }
++    }
++
++    /* Strip any unused code at the end of the function.  */
++    while (len > 0 && flags[len - 1] == 0)
++      len--;
++
++    retpos = -1;
++    num_exits = 0;
++    last_insn = 0;
++    for (insn = 0; insn < len; insn++) {
++        if (flags[insn] & FLAG_RET) {
++            /* ??? In theory it should be possible to handle multiple return
++               points.  In practice it's not worth the effort.  */
++            if (retpos != -1)
++                error("Multiple return instructions in %s", name);
++            retpos = insn;
++        }
++        if (flags[insn] & FLAG_EXIT) {
++            if (num_exits == MAX_EXITS)
++                error("Too many block exits in %s", name);
++            exit_addrs[num_exits] = insn;
++            num_exits++;
++        }
++        if (flags[insn] & FLAG_INSN)
++            last_insn = insn;
++    }
++
++    exit_addrs[num_exits] = -1;
++    if (retpos == -1) {
++        if (num_exits == 0) {
++            error ("No return instruction found in %s", name);
++        } else {
++            retpos = len;
++            last_insn = len;
++        }
++    }
++    
++    /* If the return instruction is the last instruction we can just 
++       remove it.  */
++    if (retpos == last_insn)
++        *patch_bytes = -1;
++    else
++        *patch_bytes = 0;
++
++    /* Back up over any nop instructions.  */
++    while (retpos > 0
++           && (flags[retpos] & FLAG_TARGET) == 0
++           && (flags[retpos - 1] & FLAG_INSN) != 0
++           && start_p[retpos - 1] == 0x90) {
++        retpos--;
++    }
++
++    if (*patch_bytes == -1) {
++        *plen = retpos;
++        free (flags);
++        return retpos;
++    }
++    *plen = len;
++
++    /* The ret is in the middle of the function.  Find four more bytes that
++       so the ret can be replaced by a jmp. */
++    /* ??? Use a short jump where possible. */
++    bytes = 4;
++    insn = retpos + 1;
++    /* We can clobber everything up to the next jump target.  */
++    while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
++        insn++;
++        bytes--;
++    }
++    if (bytes > 0) {
++        /* ???: Strip out nop blocks.  */
++        /* We can't do the replacement without clobbering anything important.
++           Copy preceeding instructions(s) to give us some space.  */
++        while (retpos > 0) {
++            /* If this byte is the target of a jmp we can't move it.  */
++            if (flags[retpos] & FLAG_TARGET)
++                break;
++
++            (*patch_bytes)++;
++            bytes--;
++            retpos--;
++
++            /* Break out of the loop if we have enough space and this is either 
++               the first byte of an instruction or a pad byte.  */
++            if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
++                && bytes <= 0) {
++                break;
++            }
++        }
++    }
++
++    if (bytes > 0)
++        error("Unable to replace ret with jmp in %s\n", name);
++
++    free(flags);
++    return retpos;
++}
++
++#endif
++
+ #define MAX_ARGS 3
+ 
+ /* generate op code */
+@@ -1356,6 +1996,11 @@ void gen_code(const char *name, host_ulo
+     uint8_t args_present[MAX_ARGS];
+     const char *sym_name, *p;
+     EXE_RELOC *rel;
++#if defined(HOST_I386) || defined(HOST_X86_64)
++    int patch_bytes;
++    int retpos;
++    int exit_addrs[MAX_EXITS];
++#endif
+ 
+     /* Compute exact size excluding prologue and epilogue instructions.
+      * Increment start_offset to skip epilogue instructions, then compute
+@@ -1366,33 +2011,12 @@ void gen_code(const char *name, host_ulo
+     p_end = p_start + size;
+     start_offset = offset;
+ #if defined(HOST_I386) || defined(HOST_X86_64)
+-#ifdef CONFIG_FORMAT_COFF
+-    {
+-        uint8_t *p;
+-        p = p_end - 1;
+-        if (p == p_start)
+-            error("empty code for %s", name);
+-        while (*p != 0xc3) {
+-            p--;
+-            if (p <= p_start)
+-                error("ret or jmp expected at the end of %s", name);
+-        }
+-        copy_size = p - p_start;
+-    }
+-#else
+     {
+         int len;
+         len = p_end - p_start;
+-        if (len == 0)
+-            error("empty code for %s", name);
+-        if (p_end[-1] == 0xc3) {
+-            len--;
+-        } else {
+-            error("ret or jmp expected at the end of %s", name);
+-        }
++        retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
+         copy_size = len;
+     }
+-#endif    
+ #elif defined(HOST_PPC)
+     {
+         uint8_t *p;
+@@ -1559,6 +2183,13 @@ void gen_code(const char *name, host_ulo
+     }
+ 
+     if (gen_switch == 2) {
++#if defined(HOST_I386) || defined(HOST_X86_64)
++        if (patch_bytes != -1)
++            copy_size += patch_bytes;
++#ifdef DEBUG_OP
++        copy_size += 2;
++#endif
++#endif
+         fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
+     } else if (gen_switch == 1) {
+ 
+@@ -1761,7 +2392,43 @@ void gen_code(const char *name, host_ulo
+ #error unsupport object format
+ #endif
+                 }
++               }
++                /* Replace the marker instructions with the actual opcodes.  */
++                for (i = 0; exit_addrs[i] != -1; i++) {
++                    int op;
++                    switch (p_start[exit_addrs[i]])
++                      {
++                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
++                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
++                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
++                      default: error("Internal error");
++                      }
++                    fprintf(outfile, 
++                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
++                            exit_addrs[i], op);
+                 }
++                /* Fix up the return instruction.  */
++                if (patch_bytes != -1) {
++                    if (patch_bytes) {
++                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
++                                "gen_code_ptr + %d, %d);\n",
++                                copy_size, retpos, patch_bytes);
++                    }
++                    fprintf(outfile,
++                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
++                            retpos);
++                    fprintf(outfile,
++                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
++                            retpos + 1, copy_size - (retpos + 5));
++                    
++                    copy_size += patch_bytes;
++                }
++#ifdef DEBUG_OP
++                fprintf(outfile,
++                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
++                        copy_size);
++                copy_size += 2;
++#endif
+             }
+ #elif defined(HOST_X86_64)
+             {
+@@ -1793,6 +2460,42 @@ void gen_code(const char *name, host_ulo
+                     }
+                 }
+                 }
++                /* Replace the marker instructions with the actual opcodes.  */
++                for (i = 0; exit_addrs[i] != -1; i++) {
++                    int op;
++                    switch (p_start[exit_addrs[i]])
++                      {
++                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
++                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
++                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
++                      default: error("Internal error");
++                      }
++                    fprintf(outfile, 
++                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
++                            exit_addrs[i], op);
++                }
++                /* Fix up the return instruction.  */
++                if (patch_bytes != -1) {
++                    if (patch_bytes) {
++                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
++                                "gen_code_ptr + %d, %d);\n",
++                                copy_size, retpos, patch_bytes);
++                    }
++                    fprintf(outfile,
++                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
++                            retpos);
++                    fprintf(outfile,
++                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
++                            retpos + 1, copy_size - (retpos + 5));
++                    
++                    copy_size += patch_bytes;
++                }
++#ifdef DEBUG_OP
++                fprintf(outfile,
++                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
++                        copy_size);
++                copy_size += 2;
++#endif
+             }
+ #elif defined(HOST_PPC)
+             {
+--- qemu-0.7.0/exec-all.h.gcc4	2005-04-27 22:52:05.000000000 +0200
++++ qemu-0.7.0/exec-all.h	2005-06-02 21:41:51.000000000 +0200
+@@ -335,14 +335,15 @@ do {\
+ 
+ #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
+ 
+-/* we patch the jump instruction directly */
++/* we patch the jump instruction directly.  Use sti in place of the actual
++   jmp instruction so that dyngen can patch in the correct result.  */
+ #define GOTO_TB(opname, tbparam, n)\
+ do {\
+     asm volatile (".section .data\n"\
+ 		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
+ 		  ".long 1f\n"\
+ 		  ASM_PREVIOUS_SECTION \
+-                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
++                  "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\
+ 		  "1:\n");\
+ } while (0)
+ 
+2005-11-11  Gwenole Beauchesne  <gbeauchesne@mandriva.com>
+
+	* Check for stack clobbers in functions using GOTO_LABEL_PARAM().
+
+--- qemu-0.7.2/dyngen.c.dyngen-check-stack-clobbers	2005-11-11 16:26:33.000000000 +0100
++++ qemu-0.7.2/dyngen.c	2005-11-11 17:30:29.000000000 +0100
+@@ -1414,6 +1414,9 @@ int arm_emit_ldr_info(const char *name, 
+ #define FLAG_TARGET   (1 << 3)
+ /* This is a magic instruction that needs fixing up.  */
+ #define FLAG_EXIT     (1 << 4)
++/* This instruction clobbers the stack pointer.  */
++/* XXX only supports push, pop, add/sub $imm,%esp  */
++#define FLAG_STACK    (1 << 5)
+ #define MAX_EXITS     5
+ 
+ static void
+@@ -1454,6 +1457,7 @@ trace_i386_insn (const char *name, uint8
+     int is_jmp;
+     int is_exit;
+     int is_pcrel;
++    int is_stack;
+     int immed;
+     int seen_rexw;
+     int32_t disp;
+@@ -1476,6 +1480,7 @@ trace_i386_insn (const char *name, uint8
+     is_exit = 0;
+     seen_rexw = 0;
+     is_pcrel = 0;
++    is_stack = 0;
+ 
+     while (is_prefix) {
+         op = ptr[insn_size];
+@@ -1522,6 +1527,7 @@ trace_i386_insn (const char *name, uint8
+                     switch (op & 0x7) {
+                     case 0: /* push fs/gs */
+                     case 1: /* pop fs/gs */
++                        is_stack = 1;
+                     case 2: /* cpuid/rsm */
+                         modrm = 0;
+                         break;
+@@ -1594,6 +1600,7 @@ trace_i386_insn (const char *name, uint8
+ #endif
+         case 5: /* push/pop general register.  */
+             modrm = 0;
++            is_stack = 1;
+             break;
+ 
+         case 6:
+@@ -1601,6 +1608,7 @@ trace_i386_insn (const char *name, uint8
+             case 0: /* pusha */
+             case 1: /* popa */
+                 modrm = 0;
++                is_stack = 1;
+                 break;
+             case 2: /* bound */
+             case 3: /* arpl */
+@@ -1620,10 +1628,12 @@ trace_i386_insn (const char *name, uint8
+             case 8: /* push immediate */
+                 immed = op_size;
+                 modrm = 0;
++                is_stack = 1;
+                 break;
+             case 10: /* push 8-bit immediate */
+                 immed = 1;
+                 modrm = 0;
++                is_stack = 1;
+                 break;
+             case 9: /* imul immediate */
+                 immed = op_size;
+@@ -1653,8 +1663,22 @@ trace_i386_insn (const char *name, uint8
+                     immed = op_size;
+                 else
+                     immed = 1;
++                if (op == 0x81 || op == 0x83) {
++                    /* add, sub */
++                    op = ptr[insn_size];
++                    switch ((op >> 3) & 7) {
++                    case 0:
++                    case 5:
++                        is_stack = (op & 7) == 4;
++                        break;
++                    }
++                }
+             }
+-            /* else test, xchg, mov, lea or pop general.  */
++            else if ((op & 0xf) == 0xf) {
++                /* pop general.  */
++                is_stack = 1;
++            }
++            /* else test, xchg, mov, lea.  */
+             break;
+ 
+         case 9:
+@@ -1904,6 +1928,9 @@ trace_i386_insn (const char *name, uint8
+     if (is_exit)
+       flags[insn] |= FLAG_EXIT;
+ 
++    if (is_stack)
++      flags[insn] |= FLAG_STACK;
++
+     if (!(is_jmp || is_ret || is_exit))
+       flags[insn + insn_size] |= FLAG_INSN;
+ }
+@@ -1924,6 +1951,7 @@ static int trace_i386_op(const char * na
+     int num_exits;
+     int len;
+     int last_insn;
++    int stack_clobbered;
+ 
+     len = *plen;
+     flags = malloc(len + 1);
+@@ -1947,6 +1975,7 @@ static int trace_i386_op(const char * na
+     retpos = -1;
+     num_exits = 0;
+     last_insn = 0;
++    stack_clobbered = 0;
+     for (insn = 0; insn < len; insn++) {
+         if (flags[insn] & FLAG_RET) {
+             /* ??? In theory it should be possible to handle multiple return
+@@ -1956,6 +1985,8 @@ static int trace_i386_op(const char * na
+             retpos = insn;
+         }
+         if (flags[insn] & FLAG_EXIT) {
++            if (stack_clobbered)
++                error("Stack clobbered in %s", name);
+             if (num_exits == MAX_EXITS)
+                 error("Too many block exits in %s", name);
+             exit_addrs[num_exits] = insn;
+@@ -1963,6 +1994,8 @@ static int trace_i386_op(const char * na
+         }
+         if (flags[insn] & FLAG_INSN)
+             last_insn = insn;
++        if (flags[insn] & FLAG_STACK)
++            stack_clobbered = 1;
+     }
+ 
+     exit_addrs[num_exits] = -1;
+2005-11-11  Gwenole Beauchesne  <gbeauchesne@mandriva.com>
+
+	* Globaaly save %ebx, %esi, %edi on entry to generated
+	function. This avoids some register spills in synthetic opcodes.
+	NOTE: this also easily fixes gcc4 compiled qemu-system-x86_64 on x86.
+
+--- qemu-0.7.2/cpu-exec.c.gcc4-opts	2005-09-04 19:11:31.000000000 +0200
++++ qemu-0.7.2/cpu-exec.c	2005-11-11 17:40:47.000000000 +0100
+@@ -561,6 +561,15 @@ int cpu_exec(CPUState *env1)
+                               : /* no outputs */
+                               : "r" (gen_func)
+                               : "r1", "r2", "r3", "r8", "r9", "r10", "r12", "r14");
++#elif defined(TARGET_X86_64) && defined(__i386__)
++                asm volatile ("push %%ebx\n"
++                              "push %%esi\n"
++                              "push %%edi\n"
++                              "call *%0\n"
++                              "pop %%edi\n"
++                              "pop %%esi\n"
++                              "pop %%ebx\n"
++                              : : "r" (gen_func) : "ebx", "esi", "edi");
+ #elif defined(TARGET_I386) && defined(USE_CODE_COPY)
+ {
+     if (!(tb->cflags & CF_CODE_COPY)) {
+--- qemu-0.7.2/Makefile.target.gcc4-opts	2005-11-11 16:26:33.000000000 +0100
++++ qemu-0.7.2/Makefile.target	2005-11-11 17:59:56.000000000 +0100
+@@ -65,6 +65,10 @@ OP_CFLAGS+= -falign-functions=0 -fno-gcs
+ else
+ OP_CFLAGS+= -malign-functions=0
+ endif
++ifeq ($(TARGET_ARCH), x86_64)
++# XXX globally save %ebx, %esi, %edi on entry to generated function
++OP_CFLAGS+= -fcall-used-ebx -fcall-used-esi -fcall-used-edi
++endif
+ 
+ ifdef TARGET_GPROF
+ USE_I386_LD=y
+2005-10-28  Gwenole Beauchesne  <gbeauchesne@mandriva.com>
+
+	* Various additional hacks for GCC4.
+
+--- qemu-0.7.2/target-i386/ops_sse.h.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
++++ qemu-0.7.2/target-i386/ops_sse.h	2005-10-28 10:09:21.000000000 +0200
+@@ -34,6 +34,12 @@
+ #define Q(n) XMM_Q(n)
+ #define SUFFIX _xmm
+ #endif
++#if defined(__i386__) && __GNUC__ >= 4
++#define RegCopy(d, s) __builtin_memcpy(&(d), &(s), sizeof(d))
++#endif
++#ifndef RegCopy
++#define RegCopy(d, s) d = s
++#endif
+ 
+ void OPPROTO glue(op_psrlw, SUFFIX)(void)
+ {
+@@ -570,7 +576,7 @@ void OPPROTO glue(op_pshufw, SUFFIX) (vo
+     r.W(1) = s->W((order >> 2) & 3);
+     r.W(2) = s->W((order >> 4) & 3);
+     r.W(3) = s->W((order >> 6) & 3);
+-    *d = r;
++    RegCopy(*d, r);
+ }
+ #else
+ void OPPROTO op_shufps(void)
+--- qemu-0.7.2/target-i386/helper.c.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
++++ qemu-0.7.2/target-i386/helper.c	2005-10-28 10:09:21.000000000 +0200
+@@ -3130,8 +3130,15 @@ void helper_fxrstor(target_ulong ptr, in
+         nb_xmm_regs = 8 << data64;
+         addr = ptr + 0xa0;
+         for(i = 0; i < nb_xmm_regs; i++) {
++#if defined(__i386__) && __GNUC__ >= 4
++            env->xmm_regs[i].XMM_L(0) = ldl(addr);
++            env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
++            env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
++            env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
++#else
+             env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+             env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
++#endif
+             addr += 16;
+         }
+     }
+--- qemu-0.7.2/cpu-all.h.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
++++ qemu-0.7.2/cpu-all.h	2005-10-28 10:09:21.000000000 +0200
+@@ -339,7 +339,13 @@
+ 
+ static inline void stq_le_p(void *ptr, uint64_t v)
+ {
++#if defined(__i386__) && __GNUC__ >= 4
++    const union { uint64_t v; uint32_t p[2]; } x = { .v = v };
++    ((uint32_t *)ptr)[0] = x.p[0];
++    ((uint32_t *)ptr)[1] = x.p[1];
++#else
+     *(uint64_t *)ptr = v;
++#endif
+ }
+ 
+ /* float access */
+--- qemu-0.7.2/softmmu_header.h.gcc4-hacks	2005-10-28 10:08:08.000000000 +0200
++++ qemu-0.7.2/softmmu_header.h	2005-10-28 10:09:21.000000000 +0200
+@@ -104,7 +104,7 @@
+ void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE v, int is_user);
+ 
+ #if (DATA_SIZE <= 4) && (TARGET_LONG_BITS == 32) && defined(__i386__) && \
+-    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU)
++    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU) && (__GNUC__ < 4)
+ 
+ #define CPU_TLB_ENTRY_BITS 4
+ 
+@@ -131,7 +131,7 @@ static inline RES_TYPE glue(glue(ld, USU
+                   "m" (*(uint32_t *)offsetof(CPUState, tlb_read[CPU_MEM_INDEX][0].address)),
+                   "i" (CPU_MEM_INDEX),
+                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
+-                  : "%eax", "%ecx", "%edx", "memory", "cc");
++                  : "%eax", "%edx", "memory", "cc");
+     return res;
+ }
+ 
+@@ -178,13 +178,14 @@ static inline int glue(glue(lds, SUFFIX)
+                   "m" (*(uint32_t *)offsetof(CPUState, tlb_read[CPU_MEM_INDEX][0].address)),
+                   "i" (CPU_MEM_INDEX),
+                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
+-                  : "%eax", "%ecx", "%edx", "memory", "cc");
++                  : "%eax", "%edx", "memory", "cc");
+     return res;
+ }
+ #endif
+ 
+-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
++static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE val)
+ {
++    RES_TYPE v = val;
+     asm volatile ("movl %0, %%edx\n"
+                   "movl %0, %%eax\n"
+                   "shrl %3, %%edx\n"
+@@ -236,16 +237,14 @@
+                   "2:\n"
+                   : 
+                   : "r" (ptr), 
+-/* NOTE: 'q' would be needed as constraint, but we could not use it
+-   with T1 ! */
+-                  "r" (v), 
++                  "q" (v), 
+                   "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
+                   "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
+                   "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
+                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_write)),
+                   "i" (CPU_MEM_INDEX),
+                   "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
+-                  : "%eax", "%ecx", "%edx", "memory", "cc");
++                  : "%eax", "%edx", "memory", "cc");
+ }
+ 
+ /* TODO: handle 64-bit access sizes and addresses */