view sources/patches/qemu-gcc-4-all.patch @ 74:752ca43084d7

Added tag 0.0.1 for changeset 8b75dee28f40
author Rob Landley <rob@landley.net>
date Fri, 29 Dec 2006 20:00:27 -0500
parents 65cd203a5b34
children
line wrap: on
line source

Patches needed to get qemu (tested up through 0.8.1) to build under GCC 4.x.

2005-06-02  Gwenole Beauchesne  <gbeauchesne@mandriva.com>

	* dyngen.c (trace_i386_insn): Fix push/imul case with 8-bit
	immediate.

2005-05-11  Paul Brook  <paul@codesourcery.com>

	* gcc4 host support.

--- qemu-0.7.0/target-ppc/exec.h.gcc4	2005-04-27 22:52:05.000000000 +0200
+++ qemu-0.7.0/target-ppc/exec.h	2005-06-02 21:41:51.000000000 +0200
@@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3);
 #define FT1 (env->ft1)
 #define FT2 (env->ft2)
 
-#if defined (DEBUG_OP)
-#define RETURN() __asm__ __volatile__("nop");
-#else
-#define RETURN() __asm__ __volatile__("");
-#endif
+#define RETURN() FORCE_RET()
 
 #include "cpu.h"
 #include "exec-all.h"
--- qemu-0.7.0/dyngen-exec.h.gcc4	2005-04-27 22:52:05.000000000 +0200
+++ qemu-0.7.0/dyngen-exec.h	2005-06-02 21:41:51.000000000 +0200
@@ -155,7 +155,12 @@ extern int printf(const char *, ...);
 #endif
 
 /* force GCC to generate only one epilog at the end of the function */
+#if defined(__i386__) || defined(__x86_64__)
+/* Also add 4 bytes of padding so that we can replace the ret with a jmp.  */
+#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
+#else
 #define FORCE_RET() asm volatile ("");
+#endif
 
 #ifndef OPPROTO
 #define OPPROTO
@@ -205,12 +210,19 @@ extern int __op_jmp0, __op_jmp1, __op_jm
 #endif
 
 #ifdef __i386__
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* Dyngen will replace hlt instructions with a ret instruction.  Inserting a
+   ret directly would confuse dyngen.  */
+#define EXIT_TB() asm volatile ("hlt")
+/* Dyngen will replace cli with 0x9e (jmp). 
+   We generate the offset manually.  */
+#define GOTO_LABEL_PARAM(n) \
+  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
 #endif
 #ifdef __x86_64__
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* The same as i386.  */
+#define EXIT_TB() asm volatile ("hlt")
+#define GOTO_LABEL_PARAM(n) \
+  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
 #endif
 #ifdef __powerpc__
 #define EXIT_TB() asm volatile ("blr")
--- qemu-0.7.0/dyngen.c.gcc4	2005-04-27 22:52:05.000000000 +0200
+++ qemu-0.7.0/dyngen.c	2005-06-02 22:25:06.000000000 +0200
@@ -32,6 +32,8 @@
 
 #include "config-host.h"
 
+//#define DEBUG_OP
+
 /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
    compilation */
 #if defined(CONFIG_WIN32)
@@ -1343,6 +1345,644 @@ int arm_emit_ldr_info(const char *name, 
 #endif
 
 
+#if defined(HOST_I386) || defined(HOST_X86_64)
+
+/* This byte is the first byte of an instruction.  */
+#define FLAG_INSN     (1 << 0)
+/* This byte has been processed as part of an instruction.  */
+#define FLAG_SCANNED  (1 << 1)
+/* This instruction is a return instruction.  Gcc cometimes generates prefix
+   bytes, so may be more than one byte long.  */
+#define FLAG_RET      (1 << 2)
+/* This is either the target of a jump, or the preceeding instruction uses
+   a pc-relative offset.  */
+#define FLAG_TARGET   (1 << 3)
+/* This is a magic instruction that needs fixing up.  */
+#define FLAG_EXIT     (1 << 4)
+#define MAX_EXITS     5
+
+static void
+bad_opcode(const char *name, uint32_t op)
+{
+    error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
+}
+
+/* Mark len bytes as scanned,  Returns insn_size + len.  Reports an error
+   if these bytes have already been scanned.  */
+static int
+eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
+{
+    while (len > 0) {
+        /* This should never occur in sane code.  */
+        if (flags[insn + insn_size] & FLAG_SCANNED)
+            error ("Overlapping instructions in %s", name);
+        flags[insn + insn_size] |= FLAG_SCANNED;
+        insn_size++;
+        len--;
+    }
+    return insn_size;
+}
+
+static void
+trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
+                 int len)
+{
+    uint8_t *ptr;
+    uint8_t op;
+    int modrm;
+    int is_prefix;
+    int op_size;
+    int addr_size;
+    int insn_size;
+    int is_ret;
+    int is_condjmp;
+    int is_jmp;
+    int is_exit;
+    int is_pcrel;
+    int immed;
+    int seen_rexw;
+    int32_t disp;
+
+    ptr = start_p + insn;
+    /* nonzero if this insn has a ModR/M byte.  */
+    modrm = 1;
+    /* The size of the immediate value in this instruction.  */
+    immed = 0;
+    /* The operand size.  */
+    op_size = 4;
+    /* The address size */
+    addr_size = 4;
+    /* The total length of this instruction.  */
+    insn_size = 0;
+    is_prefix = 1;
+    is_ret = 0;
+    is_condjmp = 0;
+    is_jmp = 0;
+    is_exit = 0;
+    seen_rexw = 0;
+    is_pcrel = 0;
+
+    while (is_prefix) {
+        op = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        is_prefix = 0;
+        switch (op >> 4) {
+        case 0:
+        case 1:
+        case 2:
+        case 3:
+            if (op == 0x0f) {
+                /* two-byte opcode.  */
+                op = ptr[insn_size];
+                insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+                switch (op >> 4) {
+                case 0:
+                    if ((op & 0xf) > 3)
+                      modrm = 0;
+                    break;
+                case 1: /* vector move or prefetch */
+                case 2: /* various moves and vector compares.  */
+                case 4: /* cmov */
+                case 5: /* vector instructions */
+                case 6:
+                case 13:
+                case 14:
+                case 15:
+                    break;
+                case 7: /* mmx */
+                    if (op & 0x77) /* emms */
+                      modrm = 0;
+                    break;
+                case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
+                    modrm = 0;
+                    break;
+                case 8: /* long conditional jump */
+                    is_condjmp = 1;
+                    immed = op_size;
+                    modrm = 0;
+                    break;
+                case 9: /* setcc */
+                    break;
+                case 10:
+                    switch (op & 0x7) {
+                    case 0: /* push fs/gs */
+                    case 1: /* pop fs/gs */
+                    case 2: /* cpuid/rsm */
+                        modrm = 0;
+                        break;
+                    case 4: /* shld/shrd immediate */
+                        immed = 1;
+                        break;
+                    default: /* Normal instructions with a ModR/M byte.  */
+                        break;
+                    }
+                    break;
+                case 11:
+                    switch (op & 0xf) {
+                    case 10: /* bt, bts, btr, btc */
+                        immed = 1;
+                        break;
+                    default:
+                        /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
+                           undefined, and movsx */
+                        break;
+                    }
+                    break;
+                case 12:
+                    if (op & 8) {
+                        /* bswap */
+                        modrm = 0;
+                    } else {
+                        switch (op & 0x7) {
+                        case 2:
+                        case 4:
+                        case 5:
+                        case 6:
+                            immed = 1;
+                            break;
+                        default:
+                            break;
+                        }
+                    }
+                    break;
+                }
+            } else if ((op & 0x07) <= 0x3) {
+                /* General arithmentic ax.  */
+            } else if ((op & 0x07) <= 0x5) {
+                /* General arithmetic ax, immediate.  */
+                if (op & 0x01)
+                    immed = op_size;
+                else
+                    immed = 1;
+                modrm = 0;
+            } else if ((op & 0x23) == 0x22) {
+                /* Segment prefix.  */
+                is_prefix = 1;
+            } else {
+                /* Segment register push/pop or DAA/AAA/DAS/AAS.  */
+                modrm = 0;
+            }
+            break;
+
+#if defined(HOST_X86_64)
+        case 4: /* rex prefix.  */
+            is_prefix = 1;
+            /* The address/operand size is actually 64-bit, but the immediate
+               values in the instruction are still 32-bit.  */
+            op_size = 4;
+            addr_size = 4;
+            if (op & 8)
+                seen_rexw = 1;
+            break;
+#else
+        case 4: /* inc/dec register.  */
+#endif
+        case 5: /* push/pop general register.  */
+            modrm = 0;
+            break;
+
+        case 6:
+            switch (op & 0x0f) {
+            case 0: /* pusha */
+            case 1: /* popa */
+                modrm = 0;
+                break;
+            case 2: /* bound */
+            case 3: /* arpl */
+                break;
+            case 4: /* FS */
+            case 5: /* GS */
+                is_prefix = 1;
+                break;
+            case 6: /* opcode size prefix.  */
+                op_size = 2;
+                is_prefix = 1;
+                break;
+            case 7: /* Address size prefix.  */
+                addr_size = 2;
+                is_prefix = 1;
+                break;
+            case 8: /* push immediate */
+                immed = op_size;
+                modrm = 0;
+                break;
+            case 10: /* push 8-bit immediate */
+                immed = 1;
+                modrm = 0;
+                break;
+            case 9: /* imul immediate */
+                immed = op_size;
+                break;
+            case 11: /* imul 8-bit immediate */
+                immed = 1;
+                break;
+            case 12: /* insb */
+            case 13: /* insw */
+            case 14: /* outsb */
+            case 15: /* outsw */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 7: /* Short conditional jump.  */
+            is_condjmp = 1;
+            immed = 1;
+            modrm = 0;
+            break;
+          
+        case 8:
+            if ((op & 0xf) <= 3) {
+                /* arithmetic immediate.  */
+                if ((op & 3) == 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+            }
+            /* else test, xchg, mov, lea or pop general.  */
+            break;
+
+        case 9:
+            /* Various single-byte opcodes with no modrm byte.  */
+            modrm = 0;
+            if (op == 10) {
+                /* Call */
+                immed = 4;
+            }
+            break;
+
+        case 10:
+            switch ((op & 0xe) >> 1) {
+            case 0: /* mov absoliute immediate.  */
+            case 1:
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = addr_size;
+                break;
+            case 4: /* test immediate.  */
+                if (op & 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+                break;
+            default: /* Various string ops.  */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 11: /* move immediate to register */
+            if (op & 8) {
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = op_size;
+            } else {
+                immed = 1;
+            }
+            modrm = 0;
+            break;
+
+          case 12:
+            switch (op & 0xf) {
+            case 0: /* shift immediate */
+            case 1:
+                immed = 1;
+                break;
+            case 2: /* ret immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 3: /* ret */
+                modrm = 0;
+                is_ret = 1;
+            case 4: /* les */
+            case 5: /* lds */
+                break;
+            case 6: /* mov immediate byte */
+                immed = 1;
+                break;
+            case 7: /* mov immediate */
+                immed = op_size;
+                break;
+            case 8: /* enter */
+                /* TODO: Is this right?  */
+                immed = 3;
+                modrm = 0;
+                break;
+            case 10: /* retf immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 13: /* int */
+                immed = 1;
+                modrm = 0;
+                break;
+            case 11: /* retf */
+            case 15: /* iret */
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            default: /* leave, int3 or into */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 13:
+            if ((op & 0xf) >= 8) {
+                /* Coprocessor escape.  For our purposes this is just a normal
+                   instruction with a ModR/M byte.  */
+            } else if ((op & 0xf) >= 4) {
+                /* AAM, AAD or XLAT */
+                modrm = 0;
+            }
+            /* else shift instruction */
+            break;
+
+        case 14:
+            switch ((op & 0xc) >> 2) {
+            case 0: /* loop or jcxz */
+                is_condjmp = 1;
+                immed = 1;
+                break;
+            case 1: /* in/out immed */
+                immed = 1;
+                break;
+            case 2: /* call or jmp */
+                switch (op & 3) {
+                case 0: /* call */
+                    immed = op_size;
+                    break;
+                case 1: /* long jump */
+                    immed = 4;
+                    is_jmp = 1;
+                    break;
+                case 2: /* far jmp */
+                    bad_opcode(name, op);
+                    break;
+                case 3: /* short jmp */
+                    immed = 1;
+                    is_jmp = 1;
+                    break;
+                }
+                break;
+            case 3: /* in/out register */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 15:
+            switch ((op & 0xe) >> 1) {
+            case 0:
+            case 1:
+                is_prefix = 1;
+                break;
+            case 2:
+            case 4:
+            case 5:
+            case 6:
+                modrm = 0;
+                /* Some privileged insns are used as markers.  */
+                switch (op) {
+                case 0xf4: /* hlt: Exit translation block.  */
+                    is_exit = 1;
+                    break;
+                case 0xfa: /* cli: Jump to label.  */
+                    is_exit = 1;
+                    immed = 4;
+                    break;
+                case 0xfb: /* sti: TB patch jump.  */
+                    /* Mark the insn for patching, but continue sscanning.  */
+                    flags[insn] |= FLAG_EXIT;
+                    immed = 4;
+                    break;
+                }
+                break;
+            case 3: /* unary grp3 */
+                if ((ptr[insn_size] & 0x38) == 0) {
+                    if (op == 0xf7)
+                        immed = op_size;
+                    else
+                        immed = 1; /* test immediate */
+                }
+                break;
+            case 7: /* inc/dec grp4/5 */
+                /* TODO: This includes indirect jumps.  We should fail if we
+                   encounter one of these. */
+                break;
+            }
+            break;
+        }
+    }
+
+    if (modrm) {
+        if (addr_size != 4)
+            error("16-bit addressing mode used in %s", name);
+
+        disp = 0;
+        modrm = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        modrm &= 0xc7;
+        switch ((modrm & 0xc0) >> 6) {
+        case 0:
+            if (modrm == 5)
+              disp = 4;
+            break;
+        case 1:
+            disp = 1;
+            break;
+        case 2:
+            disp = 4;
+            break;
+        }
+        if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
+            /* SIB byte */
+            if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
+                disp = 4;
+                is_pcrel = 1;
+            }
+            insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        }
+        insn_size = eat_bytes(name, flags, insn, insn_size, disp);
+    }
+    insn_size = eat_bytes(name, flags, insn, insn_size, immed);
+    if (is_condjmp || is_jmp) {
+        if (immed == 1) {
+            disp = (int8_t)*(ptr + insn_size - 1);
+        } else {
+            disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
+                   | (((int32_t)*(ptr + insn_size - 2)) << 16)
+                   | (((int32_t)*(ptr + insn_size - 3)) << 8)
+                   | *(ptr + insn_size - 4);
+        }
+        disp += insn_size;
+        /* Jumps to external symbols point to the address of the offset
+           before relocation.  */
+        /* ??? These are probably a tailcall.  We could fix them up by
+           replacing them with jmp to EOB + call, but it's easier to just
+           prevent the compiler generating them.  */
+        if (disp == 1)
+            error("Unconditional jump (sibcall?) in %s", name);
+        disp += insn;
+        if (disp < 0 || disp > len)
+            error("Jump outside instruction in %s", name);
+
+        if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
+            error("Overlapping instructions in %s", name);
+
+        flags[disp] |= (FLAG_INSN | FLAG_TARGET);
+        is_pcrel = 1; 
+    }
+    if (is_pcrel) {
+        /* Mark the following insn as a jump target.  This will stop
+           this instruction being moved.  */
+        flags[insn + insn_size] |= FLAG_TARGET;
+    }
+    if (is_ret)
+      flags[insn] |= FLAG_RET;
+
+    if (is_exit)
+      flags[insn] |= FLAG_EXIT;
+
+    if (!(is_jmp || is_ret || is_exit))
+      flags[insn + insn_size] |= FLAG_INSN;
+}
+
+/* Scan a function body.  Returns the position of the return sequence.
+   Sets *patch_bytes to the number of bytes that need to be copied from that
+   location.  If no patching is required (ie. the return is the last insn)
+   *patch_bytes will be set to -1.  *plen is the number of code bytes to copy.
+ */
+static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
+                         int *patch_bytes, int *exit_addrs)
+{
+    char *flags;
+    int more;
+    int insn;
+    int retpos;
+    int bytes;
+    int num_exits;
+    int len;
+    int last_insn;
+
+    len = *plen;
+    flags = malloc(len + 1);
+    memset(flags, 0, len + 1);
+    flags[0] |= FLAG_INSN;
+    more = 1;
+    while (more) {
+        more = 0;
+        for (insn = 0; insn < len; insn++) {
+            if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
+                trace_i386_insn(name, start_p, flags, insn, len);
+                more = 1;
+            }
+        }
+    }
+
+    /* Strip any unused code at the end of the function.  */
+    while (len > 0 && flags[len - 1] == 0)
+      len--;
+
+    retpos = -1;
+    num_exits = 0;
+    last_insn = 0;
+    for (insn = 0; insn < len; insn++) {
+        if (flags[insn] & FLAG_RET) {
+            /* ??? In theory it should be possible to handle multiple return
+               points.  In practice it's not worth the effort.  */
+            if (retpos != -1)
+                error("Multiple return instructions in %s", name);
+            retpos = insn;
+        }
+        if (flags[insn] & FLAG_EXIT) {
+            if (num_exits == MAX_EXITS)
+                error("Too many block exits in %s", name);
+            exit_addrs[num_exits] = insn;
+            num_exits++;
+        }
+        if (flags[insn] & FLAG_INSN)
+            last_insn = insn;
+    }
+
+    exit_addrs[num_exits] = -1;
+    if (retpos == -1) {
+        if (num_exits == 0) {
+            error ("No return instruction found in %s", name);
+        } else {
+            retpos = len;
+            last_insn = len;
+        }
+    }
+    
+    /* If the return instruction is the last instruction we can just 
+       remove it.  */
+    if (retpos == last_insn)
+        *patch_bytes = -1;
+    else
+        *patch_bytes = 0;
+
+    /* Back up over any nop instructions.  */
+    while (retpos > 0
+           && (flags[retpos] & FLAG_TARGET) == 0
+           && (flags[retpos - 1] & FLAG_INSN) != 0
+           && start_p[retpos - 1] == 0x90) {
+        retpos--;
+    }
+
+    if (*patch_bytes == -1) {
+        *plen = retpos;
+        free (flags);
+        return retpos;
+    }
+    *plen = len;
+
+    /* The ret is in the middle of the function.  Find four more bytes that
+       so the ret can be replaced by a jmp. */
+    /* ??? Use a short jump where possible. */
+    bytes = 4;
+    insn = retpos + 1;
+    /* We can clobber everything up to the next jump target.  */
+    while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
+        insn++;
+        bytes--;
+    }
+    if (bytes > 0) {
+        /* ???: Strip out nop blocks.  */
+        /* We can't do the replacement without clobbering anything important.
+           Copy preceeding instructions(s) to give us some space.  */
+        while (retpos > 0) {
+            /* If this byte is the target of a jmp we can't move it.  */
+            if (flags[retpos] & FLAG_TARGET)
+                break;
+
+            (*patch_bytes)++;
+            bytes--;
+            retpos--;
+
+            /* Break out of the loop if we have enough space and this is either 
+               the first byte of an instruction or a pad byte.  */
+            if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
+                && bytes <= 0) {
+                break;
+            }
+        }
+    }
+
+    if (bytes > 0)
+        error("Unable to replace ret with jmp in %s\n", name);
+
+    free(flags);
+    return retpos;
+}
+
+#endif
+
 #define MAX_ARGS 3
 
 /* generate op code */
@@ -1356,6 +1996,11 @@ void gen_code(const char *name, host_ulo
     uint8_t args_present[MAX_ARGS];
     const char *sym_name, *p;
     EXE_RELOC *rel;
+#if defined(HOST_I386) || defined(HOST_X86_64)
+    int patch_bytes;
+    int retpos;
+    int exit_addrs[MAX_EXITS];
+#endif
 
     /* Compute exact size excluding prologue and epilogue instructions.
      * Increment start_offset to skip epilogue instructions, then compute
@@ -1366,33 +2011,12 @@ void gen_code(const char *name, host_ulo
     p_end = p_start + size;
     start_offset = offset;
 #if defined(HOST_I386) || defined(HOST_X86_64)
-#ifdef CONFIG_FORMAT_COFF
-    {
-        uint8_t *p;
-        p = p_end - 1;
-        if (p == p_start)
-            error("empty code for %s", name);
-        while (*p != 0xc3) {
-            p--;
-            if (p <= p_start)
-                error("ret or jmp expected at the end of %s", name);
-        }
-        copy_size = p - p_start;
-    }
-#else
     {
         int len;
         len = p_end - p_start;
-        if (len == 0)
-            error("empty code for %s", name);
-        if (p_end[-1] == 0xc3) {
-            len--;
-        } else {
-            error("ret or jmp expected at the end of %s", name);
-        }
+        retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
         copy_size = len;
     }
-#endif    
 #elif defined(HOST_PPC)
     {
         uint8_t *p;
@@ -1559,6 +2183,13 @@ void gen_code(const char *name, host_ulo
     }
 
     if (gen_switch == 2) {
+#if defined(HOST_I386) || defined(HOST_X86_64)
+        if (patch_bytes != -1)
+            copy_size += patch_bytes;
+#ifdef DEBUG_OP
+        copy_size += 2;
+#endif
+#endif
         fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
     } else if (gen_switch == 1) {
 
@@ -1761,7 +2392,43 @@ void gen_code(const char *name, host_ulo
 #error unsupport object format
 #endif
                 }
+               }
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
                 }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
+                }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_X86_64)
             {
@@ -1793,6 +2460,42 @@ void gen_code(const char *name, host_ulo
                     }
                 }
                 }
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
+                }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
+                }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_PPC)
             {
--- qemu-0.7.0/exec-all.h.gcc4	2005-04-27 22:52:05.000000000 +0200
+++ qemu-0.7.0/exec-all.h	2005-06-02 21:41:51.000000000 +0200
@@ -335,14 +335,15 @@ do {\
 
 #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
 
-/* we patch the jump instruction directly */
+/* we patch the jump instruction directly.  Use sti in place of the actual
+   jmp instruction so that dyngen can patch in the correct result.  */
 #define GOTO_TB(opname, tbparam, n)\
 do {\
     asm volatile (".section .data\n"\
 		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
 		  ".long 1f\n"\
 		  ASM_PREVIOUS_SECTION \
-                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
+                  "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\
 		  "1:\n");\
 } while (0)
 
2005-11-11  Gwenole Beauchesne  <gbeauchesne@mandriva.com>

	* Check for stack clobbers in functions using GOTO_LABEL_PARAM().

--- qemu-0.7.2/dyngen.c.dyngen-check-stack-clobbers	2005-11-11 16:26:33.000000000 +0100
+++ qemu-0.7.2/dyngen.c	2005-11-11 17:30:29.000000000 +0100
@@ -1414,6 +1414,9 @@ int arm_emit_ldr_info(const char *name, 
 #define FLAG_TARGET   (1 << 3)
 /* This is a magic instruction that needs fixing up.  */
 #define FLAG_EXIT     (1 << 4)
+/* This instruction clobbers the stack pointer.  */
+/* XXX only supports push, pop, add/sub $imm,%esp  */
+#define FLAG_STACK    (1 << 5)
 #define MAX_EXITS     5
 
 static void
@@ -1454,6 +1457,7 @@ trace_i386_insn (const char *name, uint8
     int is_jmp;
     int is_exit;
     int is_pcrel;
+    int is_stack;
     int immed;
     int seen_rexw;
     int32_t disp;
@@ -1476,6 +1480,7 @@ trace_i386_insn (const char *name, uint8
     is_exit = 0;
     seen_rexw = 0;
     is_pcrel = 0;
+    is_stack = 0;
 
     while (is_prefix) {
         op = ptr[insn_size];
@@ -1522,6 +1527,7 @@ trace_i386_insn (const char *name, uint8
                     switch (op & 0x7) {
                     case 0: /* push fs/gs */
                     case 1: /* pop fs/gs */
+                        is_stack = 1;
                     case 2: /* cpuid/rsm */
                         modrm = 0;
                         break;
@@ -1594,6 +1600,7 @@ trace_i386_insn (const char *name, uint8
 #endif
         case 5: /* push/pop general register.  */
             modrm = 0;
+            is_stack = 1;
             break;
 
         case 6:
@@ -1601,6 +1608,7 @@ trace_i386_insn (const char *name, uint8
             case 0: /* pusha */
             case 1: /* popa */
                 modrm = 0;
+                is_stack = 1;
                 break;
             case 2: /* bound */
             case 3: /* arpl */
@@ -1620,10 +1628,12 @@ trace_i386_insn (const char *name, uint8
             case 8: /* push immediate */
                 immed = op_size;
                 modrm = 0;
+                is_stack = 1;
                 break;
             case 10: /* push 8-bit immediate */
                 immed = 1;
                 modrm = 0;
+                is_stack = 1;
                 break;
             case 9: /* imul immediate */
                 immed = op_size;
@@ -1653,8 +1663,22 @@ trace_i386_insn (const char *name, uint8
                     immed = op_size;
                 else
                     immed = 1;
+                if (op == 0x81 || op == 0x83) {
+                    /* add, sub */
+                    op = ptr[insn_size];
+                    switch ((op >> 3) & 7) {
+                    case 0:
+                    case 5:
+                        is_stack = (op & 7) == 4;
+                        break;
+                    }
+                }
             }
-            /* else test, xchg, mov, lea or pop general.  */
+            else if ((op & 0xf) == 0xf) {
+                /* pop general.  */
+                is_stack = 1;
+            }
+            /* else test, xchg, mov, lea.  */
             break;
 
         case 9:
@@ -1904,6 +1928,9 @@ trace_i386_insn (const char *name, uint8
     if (is_exit)
       flags[insn] |= FLAG_EXIT;
 
+    if (is_stack)
+      flags[insn] |= FLAG_STACK;
+
     if (!(is_jmp || is_ret || is_exit))
       flags[insn + insn_size] |= FLAG_INSN;
 }
@@ -1924,6 +1951,7 @@ static int trace_i386_op(const char * na
     int num_exits;
     int len;
     int last_insn;
+    int stack_clobbered;
 
     len = *plen;
     flags = malloc(len + 1);
@@ -1947,6 +1975,7 @@ static int trace_i386_op(const char * na
     retpos = -1;
     num_exits = 0;
     last_insn = 0;
+    stack_clobbered = 0;
     for (insn = 0; insn < len; insn++) {
         if (flags[insn] & FLAG_RET) {
             /* ??? In theory it should be possible to handle multiple return
@@ -1956,6 +1985,8 @@ static int trace_i386_op(const char * na
             retpos = insn;
         }
         if (flags[insn] & FLAG_EXIT) {
+            if (stack_clobbered)
+                error("Stack clobbered in %s", name);
             if (num_exits == MAX_EXITS)
                 error("Too many block exits in %s", name);
             exit_addrs[num_exits] = insn;
@@ -1963,6 +1994,8 @@ static int trace_i386_op(const char * na
         }
         if (flags[insn] & FLAG_INSN)
             last_insn = insn;
+        if (flags[insn] & FLAG_STACK)
+            stack_clobbered = 1;
     }
 
     exit_addrs[num_exits] = -1;
2005-11-11  Gwenole Beauchesne  <gbeauchesne@mandriva.com>

	* Globaaly save %ebx, %esi, %edi on entry to generated
	function. This avoids some register spills in synthetic opcodes.
	NOTE: this also easily fixes gcc4 compiled qemu-system-x86_64 on x86.

--- qemu-0.7.2/cpu-exec.c.gcc4-opts	2005-09-04 19:11:31.000000000 +0200
+++ qemu-0.7.2/cpu-exec.c	2005-11-11 17:40:47.000000000 +0100
@@ -561,6 +561,15 @@ int cpu_exec(CPUState *env1)
                               : /* no outputs */
                               : "r" (gen_func)
                               : "r1", "r2", "r3", "r8", "r9", "r10", "r12", "r14");
+#elif defined(TARGET_X86_64) && defined(__i386__)
+                asm volatile ("push %%ebx\n"
+                              "push %%esi\n"
+                              "push %%edi\n"
+                              "call *%0\n"
+                              "pop %%edi\n"
+                              "pop %%esi\n"
+                              "pop %%ebx\n"
+                              : : "r" (gen_func) : "ebx", "esi", "edi");
 #elif defined(TARGET_I386) && defined(USE_CODE_COPY)
 {
     if (!(tb->cflags & CF_CODE_COPY)) {
--- qemu-0.7.2/Makefile.target.gcc4-opts	2005-11-11 16:26:33.000000000 +0100
+++ qemu-0.7.2/Makefile.target	2005-11-11 17:59:56.000000000 +0100
@@ -65,6 +65,10 @@ OP_CFLAGS+= -falign-functions=0 -fno-gcs
 else
 OP_CFLAGS+= -malign-functions=0
 endif
+ifeq ($(TARGET_ARCH), x86_64)
+# XXX globally save %ebx, %esi, %edi on entry to generated function
+OP_CFLAGS+= -fcall-used-ebx -fcall-used-esi -fcall-used-edi
+endif
 
 ifdef TARGET_GPROF
 USE_I386_LD=y
2005-10-28  Gwenole Beauchesne  <gbeauchesne@mandriva.com>

	* Various additional hacks for GCC4.

--- qemu-0.7.2/target-i386/ops_sse.h.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
+++ qemu-0.7.2/target-i386/ops_sse.h	2005-10-28 10:09:21.000000000 +0200
@@ -34,6 +34,12 @@
 #define Q(n) XMM_Q(n)
 #define SUFFIX _xmm
 #endif
+#if defined(__i386__) && __GNUC__ >= 4
+#define RegCopy(d, s) __builtin_memcpy(&(d), &(s), sizeof(d))
+#endif
+#ifndef RegCopy
+#define RegCopy(d, s) d = s
+#endif
 
 void OPPROTO glue(op_psrlw, SUFFIX)(void)
 {
@@ -570,7 +576,7 @@ void OPPROTO glue(op_pshufw, SUFFIX) (vo
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
-    *d = r;
+    RegCopy(*d, r);
 }
 #else
 void OPPROTO op_shufps(void)
--- qemu-0.7.2/target-i386/helper.c.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
+++ qemu-0.7.2/target-i386/helper.c	2005-10-28 10:09:21.000000000 +0200
@@ -3130,8 +3130,15 @@ void helper_fxrstor(target_ulong ptr, in
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
+#if defined(__i386__) && __GNUC__ >= 4
+            env->xmm_regs[i].XMM_L(0) = ldl(addr);
+            env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+            env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+            env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+#else
             env->xmm_regs[i].XMM_Q(0) = ldq(addr);
             env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#endif
             addr += 16;
         }
     }
--- qemu-0.7.2/cpu-all.h.gcc4-hacks	2005-09-04 19:11:31.000000000 +0200
+++ qemu-0.7.2/cpu-all.h	2005-10-28 10:09:21.000000000 +0200
@@ -339,7 +339,13 @@
 
 static inline void stq_le_p(void *ptr, uint64_t v)
 {
+#if defined(__i386__) && __GNUC__ >= 4
+    const union { uint64_t v; uint32_t p[2]; } x = { .v = v };
+    ((uint32_t *)ptr)[0] = x.p[0];
+    ((uint32_t *)ptr)[1] = x.p[1];
+#else
     *(uint64_t *)ptr = v;
+#endif
 }
 
 /* float access */
--- qemu-0.7.2/softmmu_header.h.gcc4-hacks	2005-10-28 10:08:08.000000000 +0200
+++ qemu-0.7.2/softmmu_header.h	2005-10-28 10:09:21.000000000 +0200
@@ -104,7 +104,7 @@
 void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE v, int is_user);
 
 #if (DATA_SIZE <= 4) && (TARGET_LONG_BITS == 32) && defined(__i386__) && \
-    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU)
+    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU) && (__GNUC__ < 4)
 
 #define CPU_TLB_ENTRY_BITS 4
 
@@ -131,7 +131,7 @@ static inline RES_TYPE glue(glue(ld, USU
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_read[CPU_MEM_INDEX][0].address)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
     return res;
 }
 
@@ -178,13 +178,14 @@ static inline int glue(glue(lds, SUFFIX)
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_read[CPU_MEM_INDEX][0].address)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
     return res;
 }
 #endif
 
-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE val)
 {
+    RES_TYPE v = val;
     asm volatile ("movl %0, %%edx\n"
                   "movl %0, %%eax\n"
                   "shrl %3, %%edx\n"
@@ -236,16 +237,14 @@
                   "2:\n"
                   : 
                   : "r" (ptr), 
-/* NOTE: 'q' would be needed as constraint, but we could not use it
-   with T1 ! */
-                  "r" (v), 
+                  "q" (v), 
                   "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_write)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
 }
 
 /* TODO: handle 64-bit access sizes and addresses */