------------------------------------------------------------------------
r7235 | blueswir1 | 2009-04-23 13:42:30 -0500 (Thu, 23 Apr 2009) | 1 line
Changed paths:
   M /trunk/hw/xen_console.c

Fix typo, thanks to Andreas Faerber for spotting
 ------------------------------------------------------------------------

Index: hw/xen_console.c
===================================================================
--- hw/xen_console.c	(revision 7234)
+++ hw/xen_console.c	(revision 7235)
@@ -189,7 +189,7 @@
     free(dom);
 
     type = xenstore_read_str(con->console, "type");
-    if (!type || strcmp(type, "ioemu" != 0)) {
+    if (!type || strcmp(type, "ioemu") != 0) {
 	xen_be_printf(xendev, 1, "not for me (type=%s)\n", type);
 	return -1;
     }

 ------------------------------------------------------------------------
r7234 | blueswir1 | 2009-04-23 13:29:47 -0500 (Thu, 23 Apr 2009) | 1 line
Changed paths:
   M /trunk/hw/xen_console.c
   M /trunk/hw/xen_disk.c
   M /trunk/hw/xen_domainbuild.c

Use a more natural order
 ------------------------------------------------------------------------

Index: hw/xen_disk.c
===================================================================
--- hw/xen_disk.c	(revision 7233)
+++ hw/xen_disk.c	(revision 7234)
@@ -179,7 +179,7 @@
     switch (ioreq->req.operation) {
     case BLKIF_OP_READ:
 	ioreq->prot = PROT_WRITE; /* to memory */
-	if (BLKIF_OP_READ != ioreq->req.operation && blkdev->mode[0] != 'w') {
+        if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
 	    xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
 	    goto err;
 	}
@@ -513,7 +513,7 @@
 
     if (use_aio)
         blk_send_response_all(blkdev);
-    while ((rc != rp)) {
+    while (rc != rp) {
         /* pull request from ring */
         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc))
             break;
Index: hw/xen_domainbuild.c
===================================================================
--- hw/xen_domainbuild.c	(revision 7233)
+++ hw/xen_domainbuild.c	(revision 7234)
@@ -137,7 +137,7 @@
     int rc;
 
     rc = xc_domain_getinfo(xen_xc, xen_domid, 1, &info);
-    if ((1 != rc) || (info.domid != xen_domid)) {
+    if ((rc != 1) || (info.domid != xen_domid)) {
         qemu_log("xen: domain %d is gone\n", xen_domid);
         goto quit;
     }
@@ -186,7 +186,7 @@
         rc = read(fd[0], &byte, 1);
         switch (rc) {
         case -1:
-            if (EINTR == errno)
+            if (errno == EINTR)
                 continue;
             qemu_log("%s: Huh? read error: %s\n", __FUNCTION__, strerror(errno));
             qemu_running = 0;
Index: hw/xen_console.c
===================================================================
--- hw/xen_console.c	(revision 7233)
+++ hw/xen_console.c	(revision 7234)
@@ -189,7 +189,7 @@
     free(dom);
 
     type = xenstore_read_str(con->console, "type");
-    if (!type || 0 != strcmp(type, "ioemu")) {
+    if (!type || strcmp(type, "ioemu" != 0)) {
 	xen_be_printf(xendev, 1, "not for me (type=%s)\n", type);
 	return -1;
     }

 ------------------------------------------------------------------------
r7233 | aliguori | 2009-04-23 08:16:56 -0500 (Thu, 23 Apr 2009) | 8 lines
Changed paths:
   M /trunk/target-i386/op_helper.c

Fix i386-linux-user build (Laurent Desnogues)

This broke due to r7230.

Signed-off-by: Laurent Desnogues 
Signed-off-by: Anthony Liguori 


 ------------------------------------------------------------------------

Index: target-i386/op_helper.c
===================================================================
--- target-i386/op_helper.c	(revision 7232)
+++ target-i386/op_helper.c	(revision 7233)
@@ -1191,6 +1191,7 @@
         EIP = next_eip;
 }
 
+#if !defined(CONFIG_USER_ONLY)
 static void handle_even_inj(int intno, int is_int, int error_code,
 		int is_hw, int rm)
 {
@@ -1209,6 +1210,7 @@
 	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
     }
 }
+#endif
 
 /*
  * Begin execution of an interruption. is_int is TRUE if coming from
@@ -1250,8 +1252,10 @@
         }
     }
     if (env->cr[0] & CR0_PE_MASK) {
+#if !defined(CONFIG_USER_ONLY)
         if (env->hflags & HF_SVMI_MASK)
             handle_even_inj(intno, is_int, error_code, is_hw, 0);
+#endif
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
@@ -1261,15 +1265,19 @@
             do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
         }
     } else {
+#if !defined(CONFIG_USER_ONLY)
         if (env->hflags & HF_SVMI_MASK)
             handle_even_inj(intno, is_int, error_code, is_hw, 1);
+#endif
         do_interrupt_real(intno, is_int, error_code, next_eip);
     }
 
+#if !defined(CONFIG_USER_ONLY)
     if (env->hflags & HF_SVMI_MASK) {
 	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
 	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
     }
+#endif
 }
 
 /* This should come from sysemu.h - if we could include it here... */

 ------------------------------------------------------------------------
r7232 | aliguori | 2009-04-22 15:20:29 -0500 (Wed, 22 Apr 2009) | 5 lines
Changed paths:
   M /trunk/monitor.c

monitor: Fix warning in do_info_numa (Jan Kiszka)

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: monitor.c
===================================================================
--- monitor.c	(revision 7231)
+++ monitor.c	(revision 7232)
@@ -1411,7 +1411,7 @@
 
 static void do_info_numa(Monitor *mon)
 {
-    int i, j;
+    int i;
     CPUState *env;
 
     monitor_printf(mon, "%d nodes\n", nb_numa_nodes);

 ------------------------------------------------------------------------
r7231 | aliguori | 2009-04-22 15:20:22 -0500 (Wed, 22 Apr 2009) | 16 lines
Changed paths:
   M /trunk/hw/mc146818rtc.c

Change RTC time drift IRQ re-injection (Gleb Natapov)

Currently IRQ are reinjected as soon as they are acknowledged to
the RTC, but Windows sometimes do acknowledgement in a loop with
global interrupt disabled waiting for interrupt to be cleared and
it does not mask RTC vector in PIC/APIC while doing this. In such
situation interrupt injection always fails and RTC interrupt is never
cleared.

Instead of reinjecting coalesced IRQs on acknowledgement the patch below
reinjects them by accelerating RTC clock a bit. This way RTC interrupt
is not constantly raced after coalesced interrupt.

Signed-off-by: Gleb Natapov 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/mc146818rtc.c
===================================================================
--- hw/mc146818rtc.c	(revision 7230)
+++ hw/mc146818rtc.c	(revision 7231)
@@ -73,6 +73,7 @@
 #ifdef TARGET_I386
     uint32_t irq_coalesced;
     uint32_t period;
+    QEMUTimer *coalesced_timer;
 #endif
     QEMUTimer *second_timer;
     QEMUTimer *second_timer2;
@@ -93,6 +94,37 @@
 static void rtc_set_time(RTCState *s);
 static void rtc_copy_date(RTCState *s);
 
+#ifdef TARGET_I386
+static void rtc_coalesced_timer_update(RTCState *s)
+{
+    if (s->irq_coalesced == 0) {
+        qemu_del_timer(s->coalesced_timer);
+    } else {
+        /* divide each RTC interval to 2 - 8 smaller intervals */
+        int c = MIN(s->irq_coalesced, 7) + 1; 
+        int64_t next_clock = qemu_get_clock(vm_clock) +
+		muldiv64(s->period / c, ticks_per_sec, 32768);
+        qemu_mod_timer(s->coalesced_timer, next_clock);
+    }
+}
+
+static void rtc_coalesced_timer(void *opaque)
+{
+    RTCState *s = opaque;
+
+    if (s->irq_coalesced != 0) {
+        apic_reset_irq_delivered();
+        s->cmos_data[RTC_REG_C] |= 0xc0;
+        rtc_irq_raise(s->irq);
+        if (apic_get_irq_delivered()) {
+            s->irq_coalesced--;
+        }
+    }
+
+    rtc_coalesced_timer_update(s);
+}
+#endif
+
 static void rtc_timer_update(RTCState *s, int64_t current_time)
 {
     int period_code, period;
@@ -138,14 +170,18 @@
     RTCState *s = opaque;
 
     rtc_timer_update(s, s->next_periodic_time);
+    if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
+        s->cmos_data[RTC_REG_C] |= 0xc0;
 #ifdef TARGET_I386
-    if ((s->cmos_data[RTC_REG_C] & 0xc0) && rtc_td_hack) {
-        s->irq_coalesced++;
-        return;
-    }
+        if(rtc_td_hack) {
+            apic_reset_irq_delivered();
+            rtc_irq_raise(s->irq);
+            if (!apic_get_irq_delivered()) {
+                s->irq_coalesced++;
+                rtc_coalesced_timer_update(s);
+            }
+        } else
 #endif
-    if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
-        s->cmos_data[RTC_REG_C] |= 0xc0;
         rtc_irq_raise(s->irq);
     }
     if (s->cmos_data[RTC_REG_B] & REG_B_SQWE) {
@@ -415,15 +451,6 @@
         case RTC_REG_C:
             ret = s->cmos_data[s->cmos_index];
             qemu_irq_lower(s->irq);
-#ifdef TARGET_I386
-            if(s->irq_coalesced) {
-                apic_reset_irq_delivered();
-                qemu_irq_raise(s->irq);
-                if (apic_get_irq_delivered())
-                    s->irq_coalesced--;
-                break;
-            }
-#endif
             s->cmos_data[RTC_REG_C] = 0x00;
             break;
         default:
@@ -536,6 +563,7 @@
 
     s->irq_coalesced = qemu_get_be32(f);
     s->period = qemu_get_be32(f);
+    rtc_coalesced_timer_update(s);
     return 0;
 }
 #endif
@@ -558,6 +586,10 @@
 
     s->periodic_timer = qemu_new_timer(vm_clock,
                                        rtc_periodic_timer, s);
+#ifdef TARGET_I386
+    if (rtc_td_hack)
+        s->coalesced_timer = qemu_new_timer(vm_clock, rtc_coalesced_timer, s);
+#endif
     s->second_timer = qemu_new_timer(vm_clock,
                                      rtc_update_second, s);
     s->second_timer2 = qemu_new_timer(vm_clock,

 ------------------------------------------------------------------------
r7230 | aliguori | 2009-04-22 15:20:07 -0500 (Wed, 22 Apr 2009) | 8 lines
Changed paths:
   M /trunk/target-i386/op_helper.c

put valid data into exit_int_info if needed (Gleb Natapov)

If fault happened during event delivery exit_int_info should contain
valid info about the event on vm exit.

Signed-off-by: Gleb Natapov 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: target-i386/op_helper.c
===================================================================
--- target-i386/op_helper.c	(revision 7229)
+++ target-i386/op_helper.c	(revision 7230)
@@ -595,6 +595,21 @@
         return 0xffff;
 }
 
+static int exeption_has_error_code(int intno)
+{
+        switch(intno) {
+        case 8:
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 17:
+            return 1;
+        }
+	return 0;
+}
+
 #ifdef TARGET_X86_64
 #define SET_ESP(val, sp_mask)\
 do {\
@@ -650,19 +665,8 @@
     uint32_t old_eip, sp_mask;
 
     has_error_code = 0;
-    if (!is_int && !is_hw) {
-        switch(intno) {
-        case 8:
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 17:
-            has_error_code = 1;
-            break;
-        }
-    }
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
     if (is_int)
         old_eip = next_eip;
     else
@@ -886,19 +890,8 @@
     target_ulong old_eip, esp, offset;
 
     has_error_code = 0;
-    if (!is_int && !is_hw) {
-        switch(intno) {
-        case 8:
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 17:
-            has_error_code = 1;
-            break;
-        }
-    }
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
     if (is_int)
         old_eip = next_eip;
     else
@@ -1198,6 +1191,25 @@
         EIP = next_eip;
 }
 
+static void handle_even_inj(int intno, int is_int, int error_code,
+		int is_hw, int rm)
+{
+    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    if (!(event_inj & SVM_EVTINJ_VALID)) {
+	    int type;
+	    if (is_int)
+		    type = SVM_EVTINJ_TYPE_SOFT;
+	    else
+		    type = SVM_EVTINJ_TYPE_EXEPT;
+	    event_inj = intno | type | SVM_EVTINJ_VALID;
+	    if (!rm && exeption_has_error_code(intno)) {
+		    event_inj |= SVM_EVTINJ_VALID_ERR;
+		    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
+	    }
+	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
+    }
+}
+
 /*
  * Begin execution of an interruption. is_int is TRUE if coming from
  * the int instruction. next_eip is the EIP value AFTER the interrupt
@@ -1238,6 +1250,8 @@
         }
     }
     if (env->cr[0] & CR0_PE_MASK) {
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(intno, is_int, error_code, is_hw, 0);
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
@@ -1247,8 +1261,15 @@
             do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
         }
     } else {
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(intno, is_int, error_code, is_hw, 1);
         do_interrupt_real(intno, is_int, error_code, next_eip);
     }
+
+    if (env->hflags & HF_SVMI_MASK) {
+	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+    }
 }
 
 /* This should come from sysemu.h - if we could include it here... */
@@ -4994,7 +5015,6 @@
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
         uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
-        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
         /* FIXME: need to implement valid_err */
@@ -5332,6 +5352,11 @@
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
 
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
+
     env->hflags2 &= ~HF2_GIF_MASK;
     /* FIXME: Resets the current ASID register to zero (host ASID). */
 

 ------------------------------------------------------------------------
r7229 | aliguori | 2009-04-22 15:20:00 -0500 (Wed, 22 Apr 2009) | 19 lines
Changed paths:
   M /trunk/block-qcow.c
   M /trunk/block-qcow2.c
   M /trunk/block-raw-posix.c
   M /trunk/block.c
   M /trunk/block_int.h
   M /trunk/hw/ide.c

implement qemu_blockalign (Stefano Stabellini)

this patch adds a buffer_alignment field to BlockDriverState and
implements a qemu_blockalign function that uses that field to allocate a
memory aligned buffer to be used by the block driver.
buffer_alignment is initialized to 512 but each block driver can set
a different value (at the moment none of them do).
This patch modifies ide.c, block-qcow.c, block-qcow2.c and block.c to
use qemu_blockalign instead of qemu_memalign.
There is only one place left that still uses qemu_memalign to allocate
buffers used by block drivers that is posix-aio-compat:handle_aiocb_rw
because it is not possible to get the BlockDriverState from that
function. However I think it is not important because posix-aio-compat
already deals with driver specific code so it is supposed to know its
own needs.

Signed-off-by: Stefano Stabellini 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block_int.h
===================================================================
--- block_int.h	(revision 7228)
+++ block_int.h	(revision 7229)
@@ -145,6 +145,9 @@
     /* Whether the disk can expand beyond total_sectors */
     int growable;
 
+    /* the memory alignment required for the buffers handled by this driver */
+    int buffer_alignment;
+
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
     int cyls, heads, secs, translation;
@@ -173,6 +176,8 @@
                         BlockDriverCompletionFunc *cb, void *opaque);
 void qemu_aio_release(void *p);
 
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+
 extern BlockDriverState *bdrv_first;
 
 #endif /* BLOCK_INT_H */
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c	(revision 7228)
+++ block-raw-posix.c	(revision 7229)
@@ -165,7 +165,7 @@
     s->fd = fd;
     s->aligned_buf = NULL;
     if ((flags & BDRV_O_NOCACHE)) {
-        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
+        s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
         if (s->aligned_buf == NULL) {
             ret = -errno;
             close(fd);
Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7228)
+++ block-qcow2.c	(revision 7229)
@@ -1412,7 +1412,7 @@
     acb->sector_num = sector_num;
     acb->qiov = qiov;
     if (qiov->niov > 1) {
-        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
         if (is_write)
             qemu_iovec_to_buffer(qiov, acb->buf);
     } else {
Index: block.c
===================================================================
--- block.c	(revision 7228)
+++ block.c	(revision 7229)
@@ -362,6 +362,8 @@
     bs->is_temporary = 0;
     bs->encrypted = 0;
     bs->valid_key = 0;
+    /* buffer_alignment defaulted to 512, drivers can change this value */
+    bs->buffer_alignment = 512;
 
     if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
@@ -1390,7 +1392,7 @@
     acb = qemu_aio_get(bs, cb, opaque);
     acb->is_write = is_write;
     acb->qiov = qiov;
-    acb->bounce = qemu_memalign(512, qiov->size);
+    acb->bounce = qemu_blockalign(bs, qiov->size);
 
     if (!acb->bh)
         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
@@ -1640,3 +1642,8 @@
         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
     return NULL;
 }
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
+}
Index: block-qcow.c
===================================================================
--- block-qcow.c	(revision 7228)
+++ block-qcow.c	(revision 7229)
@@ -641,7 +641,7 @@
     acb->sector_num = sector_num;
     acb->qiov = qiov;
     if (qiov->niov > 1)
-        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
     else
         acb->buf = (uint8_t *)qiov->iov->iov_base;
     acb->nb_sectors = nb_sectors;
@@ -736,7 +736,7 @@
     acb->sector_num = sector_num;
     acb->qiov = qiov;
     if (qiov->niov > 1) {
-        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
         qemu_iovec_to_buffer(qiov, acb->buf);
     } else {
         acb->buf = (uint8_t *)qiov->iov->iov_base;
Index: hw/ide.c
===================================================================
--- hw/ide.c	(revision 7228)
+++ hw/ide.c	(revision 7229)
@@ -2788,11 +2788,11 @@
 
     for(i = 0; i < 2; i++) {
         s = ide_state + i;
-        s->io_buffer = qemu_memalign(512, IDE_DMA_BUF_SECTORS*512 + 4);
         if (i == 0)
             s->bs = hd0;
         else
             s->bs = hd1;
+        s->io_buffer = qemu_blockalign(s->bs, IDE_DMA_BUF_SECTORS*512 + 4);
         if (s->bs) {
             bdrv_get_geometry(s->bs, &nb_sectors);
             bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);

 ------------------------------------------------------------------------
r7228 | aliguori | 2009-04-22 10:19:53 -0500 (Wed, 22 Apr 2009) | 6 lines
Changed paths:
   M /trunk/hw/xen_machine_pv.c
   M /trunk/qemu-options.hx
   M /trunk/sysemu.h
   M /trunk/vl.c

xen: add -vga xenfb option, configure xenfb (Gerd Hoffmann)


Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7227)
+++ vl.c	(revision 7228)
@@ -216,6 +216,7 @@
 int cirrus_vga_enabled = 1;
 int std_vga_enabled = 0;
 int vmsvga_enabled = 0;
+int xenfb_enabled = 0;
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
 int graphic_height = 768;
@@ -4225,12 +4226,15 @@
     cirrus_vga_enabled = 0;
     std_vga_enabled = 0;
     vmsvga_enabled = 0;
+    xenfb_enabled = 0;
     if (strstart(p, "std", &opts)) {
         std_vga_enabled = 1;
     } else if (strstart(p, "cirrus", &opts)) {
         cirrus_vga_enabled = 1;
     } else if (strstart(p, "vmware", &opts)) {
         vmsvga_enabled = 1;
+    } else if (strstart(p, "xenfb", &opts)) {
+        xenfb_enabled = 1;
     } else if (!strstart(p, "none", &opts)) {
     invalid_vga:
         fprintf(stderr, "Unknown vga type: %s\n", p);
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7227)
+++ qemu-options.hx	(revision 7228)
@@ -465,7 +465,7 @@
 ETEXI
 
 DEF("vga", HAS_ARG, QEMU_OPTION_vga,
-    "-vga [std|cirrus|vmware|none]\n"
+    "-vga [std|cirrus|vmware|xenfb|none]\n"
     "                select video card type\n")
 STEXI
 @item -vga @var{type}
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7227)
+++ sysemu.h	(revision 7228)
@@ -88,6 +88,7 @@
 extern int cirrus_vga_enabled;
 extern int std_vga_enabled;
 extern int vmsvga_enabled;
+extern int xenfb_enabled;
 extern int graphic_width;
 extern int graphic_height;
 extern int graphic_depth;
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7227)
+++ hw/xen_machine_pv.c	(revision 7228)
@@ -82,6 +82,12 @@
     xen_be_register("qdisk", &xen_blkdev_ops);
     xen_be_register("qnic", &xen_netdev_ops);
 
+    /* configure framebuffer */
+    if (xenfb_enabled) {
+        xen_config_dev_vfb(0, "vnc");
+        xen_config_dev_vkbd(0);
+    }
+
     /* configure disks */
     for (i = 0; i < 16; i++) {
         index = drive_get_index(IF_XEN, 0, i);

 ------------------------------------------------------------------------
r7227 | aliguori | 2009-04-22 10:19:48 -0500 (Wed, 22 Apr 2009) | 5 lines
Changed paths:
   M /trunk/vl.c

simplify vga selection (Gerd Hoffmann)

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7226)
+++ vl.c	(revision 7227)
@@ -4222,23 +4222,16 @@
 {
     const char *opts;
 
+    cirrus_vga_enabled = 0;
+    std_vga_enabled = 0;
+    vmsvga_enabled = 0;
     if (strstart(p, "std", &opts)) {
         std_vga_enabled = 1;
-        cirrus_vga_enabled = 0;
-        vmsvga_enabled = 0;
     } else if (strstart(p, "cirrus", &opts)) {
         cirrus_vga_enabled = 1;
-        std_vga_enabled = 0;
-        vmsvga_enabled = 0;
     } else if (strstart(p, "vmware", &opts)) {
-        cirrus_vga_enabled = 0;
-        std_vga_enabled = 0;
         vmsvga_enabled = 1;
-    } else if (strstart(p, "none", &opts)) {
-        cirrus_vga_enabled = 0;
-        std_vga_enabled = 0;
-        vmsvga_enabled = 0;
-    } else {
+    } else if (!strstart(p, "none", &opts)) {
     invalid_vga:
         fprintf(stderr, "Unknown vga type: %s\n", p);
         exit(1);

 ------------------------------------------------------------------------
r7226 | aliguori | 2009-04-22 10:19:44 -0500 (Wed, 22 Apr 2009) | 9 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/configure
   M /trunk/hw/xen_backend.h
   M /trunk/hw/xen_devconfig.c
   A /trunk/hw/xen_domainbuild.c
   A /trunk/hw/xen_domainbuild.h
   M /trunk/hw/xen_machine_pv.c

xen: pv domain builder. (Gerd Hoffmann)

This adds domain building support for paravirtual domains to qemu.
This allows booting xen guests directly with qemu, without Xend
and the management stack.

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7225)
+++ Makefile.target	(revision 7226)
@@ -561,7 +561,7 @@
 endif
 
 # xen backend driver support
-XEN_OBJS := xen_machine_pv.o xen_backend.o xen_devconfig.o
+XEN_OBJS := xen_machine_pv.o xen_backend.o xen_devconfig.o xen_domainbuild.o
 XEN_OBJS += xen_console.o xenfb.o xen_disk.o xen_nic.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
Index: configure
===================================================================
--- configure	(revision 7225)
+++ configure	(revision 7226)
@@ -1634,7 +1634,7 @@
   echo "#define CONFIG_BLUEZ 1" >> $config_h
 fi
 if test "$xen" = "yes" ; then
-  echo "XEN_LIBS=-lxenstore -lxenctrl" >> $config_mak
+  echo "XEN_LIBS=-lxenstore -lxenctrl -lxenguest" >> $config_mak
 fi
 if test "$aio" = "yes" ; then
   echo "#define CONFIG_AIO 1" >> $config_h
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7225)
+++ hw/xen_machine_pv.c	(revision 7226)
@@ -27,6 +27,7 @@
 #include "sysemu.h"
 #include "boards.h"
 #include "xen_backend.h"
+#include "xen_domainbuild.h"
 
 uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_EMULATE;
@@ -57,6 +58,24 @@
         fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
         exit(1);
     }
+
+    switch (xen_mode) {
+    case XEN_ATTACH:
+        /* nothing to do, xend handles everything */
+        break;
+    case XEN_CREATE:
+        if (xen_domain_build_pv(kernel_filename, initrd_filename,
+                                kernel_cmdline) < 0) {
+            fprintf(stderr, "xen pv domain creation failed\n");
+            exit(1);
+        }
+        break;
+    case XEN_EMULATE:
+        fprintf(stderr, "xen emulation not implemented (yet)\n");
+        exit(1);
+        break;
+    }
+
     xen_be_register("console", &xen_console_ops);
     xen_be_register("vkbd", &xen_kbdmouse_ops);
     xen_be_register("vfb", &xen_framebuffer_ops);
Index: hw/xen_domainbuild.c
===================================================================
--- hw/xen_domainbuild.c	(revision 0)
+++ hw/xen_domainbuild.c	(revision 7226)
@@ -0,0 +1,294 @@
+#include 
+#include "xen_backend.h"
+#include "xen_domainbuild.h"
+#include "sysemu.h"
+#include "qemu-timer.h"
+
+#include 
+
+static int xenstore_domain_mkdir(char *path)
+{
+    struct xs_permissions perms_ro[] = {{
+            .id    = 0, /* set owner: dom0 */
+        },{
+            .id    = xen_domid,
+            .perms = XS_PERM_READ,
+        }};
+    struct xs_permissions perms_rw[] = {{
+            .id    = 0, /* set owner: dom0 */
+        },{
+            .id    = xen_domid,
+            .perms = XS_PERM_READ | XS_PERM_WRITE,
+        }};
+    const char *writable[] = { "device", "control", "error", NULL };
+    char subpath[256];
+    int i;
+
+    if (!xs_mkdir(xenstore, 0, path)) {
+        fprintf(stderr, "%s: xs_mkdir %s: failed\n", __FUNCTION__, path);
+	return -1;
+    }
+    if (!xs_set_permissions(xenstore, 0, path, perms_ro, 2)) {
+        fprintf(stderr, "%s: xs_set_permissions failed\n", __FUNCTION__);
+	return -1;
+    }
+
+    for (i = 0; writable[i]; i++) {
+        snprintf(subpath, sizeof(subpath), "%s/%s", path, writable[i]);
+        if (!xs_mkdir(xenstore, 0, subpath)) {
+            fprintf(stderr, "%s: xs_mkdir %s: failed\n", __FUNCTION__, subpath);
+            return -1;
+        }
+        if (!xs_set_permissions(xenstore, 0, subpath, perms_rw, 2)) {
+            fprintf(stderr, "%s: xs_set_permissions failed\n", __FUNCTION__);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int xenstore_domain_init1(const char *kernel, const char *ramdisk,
+                          const char *cmdline)
+{
+    char *dom, uuid_string[42], vm[256], path[256];
+    int i;
+
+    snprintf(uuid_string, sizeof(uuid_string), UUID_FMT,
+             qemu_uuid[0], qemu_uuid[1], qemu_uuid[2], qemu_uuid[3],
+             qemu_uuid[4], qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
+             qemu_uuid[8], qemu_uuid[9], qemu_uuid[10], qemu_uuid[11],
+             qemu_uuid[12], qemu_uuid[13], qemu_uuid[14], qemu_uuid[15]);
+    dom = xs_get_domain_path(xenstore, xen_domid);
+    snprintf(vm,  sizeof(vm),  "/vm/%s", uuid_string);
+
+    xenstore_domain_mkdir(dom);
+
+    xenstore_write_str(vm, "image/ostype",  "linux");
+    if (kernel)
+        xenstore_write_str(vm, "image/kernel",  kernel);
+    if (ramdisk)
+        xenstore_write_str(vm, "image/ramdisk", ramdisk);
+    if (cmdline)
+        xenstore_write_str(vm, "image/cmdline", cmdline);
+
+    /* name + id */
+    xenstore_write_str(vm,  "name",   qemu_name ? qemu_name : "no-name");
+    xenstore_write_str(vm,  "uuid",   uuid_string);
+    xenstore_write_str(dom, "name",   qemu_name ? qemu_name : "no-name");
+    xenstore_write_int(dom, "domid",  xen_domid);
+    xenstore_write_str(dom, "vm",     vm);
+
+    /* memory */
+    xenstore_write_int(dom, "memory/target", ram_size >> 10);  // kB
+    xenstore_write_int(vm, "memory",         ram_size >> 20);  // MB
+    xenstore_write_int(vm, "maxmem",         ram_size >> 20);  // MB
+
+    /* cpus */
+    for (i = 0; i < smp_cpus; i++) {
+	snprintf(path, sizeof(path), "cpu/%d/availability",i);
+	xenstore_write_str(dom, path, "online");
+    }
+    xenstore_write_int(vm, "vcpu_avail",  smp_cpus);
+    xenstore_write_int(vm, "vcpus",       smp_cpus);
+
+    /* vnc password */
+    xenstore_write_str(vm, "vncpassword", "" /* FIXME */);
+
+    free(dom);
+    return 0;
+}
+
+int xenstore_domain_init2(int xenstore_port, int xenstore_mfn,
+                          int console_port, int console_mfn)
+{
+    char *dom;
+
+    dom = xs_get_domain_path(xenstore, xen_domid);
+
+    /* signal new domain */
+    xs_introduce_domain(xenstore,
+                        xen_domid,
+                        xenstore_mfn,
+                        xenstore_port);
+
+    /* xenstore */
+    xenstore_write_int(dom, "store/ring-ref",   xenstore_mfn);
+    xenstore_write_int(dom, "store/port",       xenstore_port);
+
+    /* console */
+    xenstore_write_str(dom, "console/type",     "ioemu");
+    xenstore_write_int(dom, "console/limit",    128 * 1024);
+    xenstore_write_int(dom, "console/ring-ref", console_mfn);
+    xenstore_write_int(dom, "console/port",     console_port);
+    xen_config_dev_console(0);
+
+    free(dom);
+    return 0;
+}
+
+/* ------------------------------------------------------------- */
+
+static QEMUTimer *xen_poll;
+
+/* check domain state once per second */
+static void xen_domain_poll(void *opaque)
+{
+    struct xc_dominfo info;
+    int rc;
+
+    rc = xc_domain_getinfo(xen_xc, xen_domid, 1, &info);
+    if ((1 != rc) || (info.domid != xen_domid)) {
+        qemu_log("xen: domain %d is gone\n", xen_domid);
+        goto quit;
+    }
+    if (info.dying) {
+        qemu_log("xen: domain %d is dying (%s%s)\n", xen_domid,
+                 info.crashed  ? "crashed"  : "",
+                 info.shutdown ? "shutdown" : "");
+        goto quit;
+    }
+
+    qemu_mod_timer(xen_poll, qemu_get_clock(rt_clock) + 1000);
+    return;
+
+quit:
+    qemu_system_shutdown_request();
+    return;
+}
+
+static void xen_domain_watcher(void)
+{
+    int qemu_running = 1;
+    int fd[2], i, n, rc;
+    char byte;
+
+    pipe(fd);
+    if (fork() != 0)
+        return; /* not child */
+
+    /* close all file handles, except stdio/out/err,
+     * our watch pipe and the xen interface handle */
+    n = getdtablesize();
+    for (i = 3; i < n; i++) {
+        if (i == fd[0])
+            continue;
+        if (i == xen_xc)
+            continue;
+        close(i);
+    }
+
+    /* ignore term signals */
+    signal(SIGINT,  SIG_IGN);
+    signal(SIGTERM, SIG_IGN);
+
+    /* wait for qemu exiting */
+    while (qemu_running) {
+        rc = read(fd[0], &byte, 1);
+        switch (rc) {
+        case -1:
+            if (EINTR == errno)
+                continue;
+            qemu_log("%s: Huh? read error: %s\n", __FUNCTION__, strerror(errno));
+            qemu_running = 0;
+            break;
+        case 0:
+            /* EOF -> qemu exited */
+            qemu_running = 0;
+            break;
+        default:
+            qemu_log("%s: Huh? data on the watch pipe?\n", __FUNCTION__);
+            break;
+        }
+    }
+
+    /* cleanup */
+    qemu_log("%s: destroy domain %d\n", __FUNCTION__, xen_domid);
+    xc_domain_destroy(xen_xc, xen_domid);
+    _exit(0);
+}
+
+/* normal cleanup */
+static void xen_domain_cleanup(void)
+{
+    char *dom;
+
+    dom = xs_get_domain_path(xenstore, xen_domid);
+    if (dom) {
+        xs_rm(xenstore, 0, dom);
+        free(dom);
+    }
+    xs_release_domain(xenstore, xen_domid);
+}
+
+int xen_domain_build_pv(const char *kernel, const char *ramdisk,
+                        const char *cmdline)
+{
+    uint32_t ssidref = 0;
+    uint32_t flags = 0;
+    xen_domain_handle_t uuid;
+    unsigned int xenstore_port = 0, console_port = 0;
+    unsigned long xenstore_mfn = 0, console_mfn = 0;
+    int rc;
+
+    memcpy(uuid, qemu_uuid, sizeof(uuid));
+    rc = xc_domain_create(xen_xc, ssidref, uuid, flags, &xen_domid);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_domain_create() failed\n");
+        goto err;
+    }
+    qemu_log("xen: created domain %d\n", xen_domid);
+    atexit(xen_domain_cleanup);
+    xen_domain_watcher();
+
+    xenstore_domain_init1(kernel, ramdisk, cmdline);
+
+    rc = xc_domain_max_vcpus(xen_xc, xen_domid, smp_cpus);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_domain_max_vcpus() failed\n");
+        goto err;
+    }
+
+#if 0
+    rc = xc_domain_setcpuweight(xen_xc, xen_domid, 256);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_domain_setcpuweight() failed\n");
+        goto err;
+    }
+#endif
+
+    rc = xc_domain_setmaxmem(xen_xc, xen_domid, ram_size >> 10);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_domain_setmaxmem() failed\n");
+        goto err;
+    }
+
+    xenstore_port = xc_evtchn_alloc_unbound(xen_xc, xen_domid, 0);
+    console_port = xc_evtchn_alloc_unbound(xen_xc, xen_domid, 0);
+
+    rc = xc_linux_build(xen_xc, xen_domid, ram_size >> 20,
+                        kernel, ramdisk, cmdline,
+                        0, flags,
+                        xenstore_port, &xenstore_mfn,
+                        console_port, &console_mfn);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_linux_build() failed\n");
+        goto err;
+    }
+
+    xenstore_domain_init2(xenstore_port, xenstore_mfn,
+                          console_port, console_mfn);
+
+    qemu_log("xen: unpausing domain %d\n", xen_domid);
+    rc = xc_domain_unpause(xen_xc, xen_domid);
+    if (rc < 0) {
+        fprintf(stderr, "xen: xc_domain_unpause() failed\n");
+        goto err;
+    }
+
+    xen_poll = qemu_new_timer(rt_clock, xen_domain_poll, NULL);
+    qemu_mod_timer(xen_poll, qemu_get_clock(rt_clock) + 1000);
+    return 0;
+
+err:
+    return -1;
+}
Index: hw/xen_domainbuild.h
===================================================================
--- hw/xen_domainbuild.h	(revision 0)
+++ hw/xen_domainbuild.h	(revision 7226)
@@ -0,0 +1,13 @@
+#ifndef QEMU_HW_XEN_DOMAINBUILD_H
+#define QEMU_HW_XEN_DOMAINBUILD_H 1
+
+#include "xen_common.h"
+
+int xenstore_domain_init1(const char *kernel, const char *ramdisk,
+                          const char *cmdline);
+int xenstore_domain_init2(int xenstore_port, int xenstore_mfn,
+                          int console_port, int console_mfn);
+int xen_domain_build_pv(const char *kernel, const char *ramdisk,
+                        const char *cmdline);
+
+#endif /* QEMU_HW_XEN_DOMAINBUILD_H */
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7225)
+++ hw/xen_backend.h	(revision 7226)
@@ -100,5 +100,8 @@
 void xen_config_cleanup(void);
 int xen_config_dev_blk(DriveInfo *disk);
 int xen_config_dev_nic(NICInfo *nic);
+int xen_config_dev_vfb(int vdev, const char *type);
+int xen_config_dev_vkbd(int vdev);
+int xen_config_dev_console(int vdev);
 
 #endif /* QEMU_HW_XEN_BACKEND_H */
Index: hw/xen_devconfig.c
===================================================================
--- hw/xen_devconfig.c	(revision 7225)
+++ hw/xen_devconfig.c	(revision 7226)
@@ -140,3 +140,32 @@
     /* common stuff */
     return xen_config_dev_all(fe, be);
 }
+
+int xen_config_dev_vfb(int vdev, const char *type)
+{
+    char fe[256], be[256];
+
+    xen_config_dev_dirs("vfb", "vfb", vdev, fe, be, sizeof(fe));
+
+    /* backend */
+    xenstore_write_str(be, "type",  type);
+
+    /* common stuff */
+    return xen_config_dev_all(fe, be);
+}
+
+int xen_config_dev_vkbd(int vdev)
+{
+    char fe[256], be[256];
+
+    xen_config_dev_dirs("vkbd", "vkbd", vdev, fe, be, sizeof(fe));
+    return xen_config_dev_all(fe, be);
+}
+
+int xen_config_dev_console(int vdev)
+{
+    char fe[256], be[256];
+
+    xen_config_dev_dirs("console", "console", vdev, fe, be, sizeof(fe));
+    return xen_config_dev_all(fe, be);
+}

 ------------------------------------------------------------------------
r7225 | aliguori | 2009-04-22 10:19:39 -0500 (Wed, 22 Apr 2009) | 13 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/xen_backend.c
   M /trunk/hw/xen_backend.h
   A /trunk/hw/xen_devconfig.c
   M /trunk/hw/xen_machine_pv.c

xen: blk & nic configuration via cmd line. (Gerd Hoffmann)

This patch makes qemu create backend and frontend device entries in
xenstore for devices configured on the command line.  It will use
qdisk and qnic backend names, so the qemu internal backends will
be used.

Disks can be created using -drive if=xen,file=...
Nics can be created using -net nic,macaddr=...

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7224)
+++ Makefile.target	(revision 7225)
@@ -561,7 +561,7 @@
 endif
 
 # xen backend driver support
-XEN_OBJS := xen_machine_pv.o xen_backend.o
+XEN_OBJS := xen_machine_pv.o xen_backend.o xen_devconfig.o
 XEN_OBJS += xen_console.o xenfb.o xen_disk.o xen_nic.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7224)
+++ hw/xen_machine_pv.c	(revision 7225)
@@ -39,6 +39,7 @@
 			const char *cpu_model)
 {
     CPUState *env;
+    int i, index;
 
     /* Initialize a dummy CPU */
     if (cpu_model == NULL) {
@@ -62,6 +63,24 @@
     xen_be_register("qdisk", &xen_blkdev_ops);
     xen_be_register("qnic", &xen_netdev_ops);
 
+    /* configure disks */
+    for (i = 0; i < 16; i++) {
+        index = drive_get_index(IF_XEN, 0, i);
+        if (index == -1)
+            continue;
+        xen_config_dev_blk(drives_table + index);
+    }
+
+    /* configure nics */
+    for (i = 0; i < nb_nics; i++) {
+        if (!nd_table[i].model || 0 != strcmp(nd_table[i].model, "xen"))
+            continue;
+        xen_config_dev_nic(nd_table + i);
+    }
+
+    /* config cleanup hook */
+    atexit(xen_config_cleanup);
+
     /* setup framebuffer */
     xen_init_display(xen_domid);
 }
Index: hw/xen_backend.c
===================================================================
--- hw/xen_backend.c	(revision 7224)
+++ hw/xen_backend.c	(revision 7225)
@@ -45,6 +45,7 @@
 /* public */
 int xen_xc;
 struct xs_handle *xenstore = NULL;
+const char *xen_protocol;
 
 /* private */
 static TAILQ_HEAD(XenDeviceHead, XenDevice) xendevs = TAILQ_HEAD_INITIALIZER(xendevs);
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7224)
+++ hw/xen_backend.h	(revision 7225)
@@ -3,6 +3,8 @@
 
 #include "xen_common.h"
 #include "sysemu.h"
+#include "net.h"
+#include "block_int.h"
 
 /* ------------------------------------------------------------- */
 
@@ -56,6 +58,7 @@
 /* variables */
 extern int xen_xc;
 extern struct xs_handle *xenstore;
+extern const char *xen_protocol;
 
 /* xenstore helper functions */
 int xenstore_write_str(const char *base, const char *node, const char *val);
@@ -93,4 +96,9 @@
 
 void xen_init_display(int domid);
 
+/* configuration (aka xenbus setup) */
+void xen_config_cleanup(void);
+int xen_config_dev_blk(DriveInfo *disk);
+int xen_config_dev_nic(NICInfo *nic);
+
 #endif /* QEMU_HW_XEN_BACKEND_H */
Index: hw/xen_devconfig.c
===================================================================
--- hw/xen_devconfig.c	(revision 0)
+++ hw/xen_devconfig.c	(revision 7225)
@@ -0,0 +1,142 @@
+#include "xen_backend.h"
+
+/* ------------------------------------------------------------- */
+
+struct xs_dirs {
+    char *xs_dir;
+    TAILQ_ENTRY(xs_dirs) list;
+};
+static TAILQ_HEAD(xs_dirs_head, xs_dirs) xs_cleanup = TAILQ_HEAD_INITIALIZER(xs_cleanup);
+
+static void xen_config_cleanup_dir(char *dir)
+{
+    struct xs_dirs *d;
+
+    d = qemu_malloc(sizeof(*d));
+    d->xs_dir = dir;
+    TAILQ_INSERT_TAIL(&xs_cleanup, d, list);
+}
+
+void xen_config_cleanup(void)
+{
+    struct xs_dirs *d;
+
+    TAILQ_FOREACH(d, &xs_cleanup, list) {
+	xs_rm(xenstore, 0, d->xs_dir);
+    }
+}
+
+/* ------------------------------------------------------------- */
+
+static int xen_config_dev_mkdir(char *dev, int p)
+{
+    struct xs_permissions perms[2] = {{
+            .id    = 0, /* set owner: dom0 */
+        },{
+            .id    = xen_domid,
+            .perms = p,
+        }};
+
+    if (!xs_mkdir(xenstore, 0, dev)) {
+	xen_be_printf(NULL, 0, "xs_mkdir %s: failed\n", dev);
+	return -1;
+    }
+    xen_config_cleanup_dir(qemu_strdup(dev));
+
+    if (!xs_set_permissions(xenstore, 0, dev, perms, 2)) {
+	xen_be_printf(NULL, 0, "xs_set_permissions %s: failed\n", dev);
+	return -1;
+    }
+    return 0;
+}
+
+static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev,
+			       char *fe, char *be, int len)
+{
+    char *dom;
+
+    dom = xs_get_domain_path(xenstore, xen_domid);
+    snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev);
+    free(dom);
+
+    dom = xs_get_domain_path(xenstore, 0);
+    snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev);
+    free(dom);
+
+    xen_config_dev_mkdir(fe, XS_PERM_READ | XS_PERM_WRITE);
+    xen_config_dev_mkdir(be, XS_PERM_READ);
+    return 0;
+}
+
+static int xen_config_dev_all(char *fe, char *be)
+{
+    /* frontend */
+    if (xen_protocol)
+        xenstore_write_str(fe, "protocol", xen_protocol);
+
+    xenstore_write_int(fe, "state",           XenbusStateInitialising);
+    xenstore_write_int(fe, "backend-id",      0);
+    xenstore_write_str(fe, "backend",         be);
+
+    /* backend */
+    xenstore_write_str(be, "domain",          qemu_name ? qemu_name : "no-name");
+    xenstore_write_int(be, "online",          1);
+    xenstore_write_int(be, "state",           XenbusStateInitialising);
+    xenstore_write_int(be, "frontend-id",     xen_domid);
+    xenstore_write_str(be, "frontend",        fe);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------- */
+
+int xen_config_dev_blk(DriveInfo *disk)
+{
+    char fe[256], be[256];
+    int vdev = 202 * 256 + 16 * disk->unit;
+    int cdrom = disk->bdrv->type == BDRV_TYPE_CDROM;
+    const char *devtype = cdrom ? "cdrom" : "disk";
+    const char *mode    = cdrom ? "r"     : "w";
+
+    snprintf(disk->bdrv->device_name, sizeof(disk->bdrv->device_name),
+	     "xvd%c", 'a' + disk->unit);
+    xen_be_printf(NULL, 1, "config disk %d [%s]: %s\n",
+                  disk->unit, disk->bdrv->device_name, disk->bdrv->filename);
+    xen_config_dev_dirs("vbd", "qdisk", vdev, fe, be, sizeof(fe));
+
+    /* frontend */
+    xenstore_write_int(fe, "virtual-device",  vdev);
+    xenstore_write_str(fe, "device-type",     devtype);
+
+    /* backend */
+    xenstore_write_str(be, "dev",             disk->bdrv->device_name);
+    xenstore_write_str(be, "type",            "file");
+    xenstore_write_str(be, "params",          disk->bdrv->filename);
+    xenstore_write_str(be, "mode",            mode);
+
+    /* common stuff */
+    return xen_config_dev_all(fe, be);
+}
+
+int xen_config_dev_nic(NICInfo *nic)
+{
+    char fe[256], be[256];
+    char mac[20];
+
+    snprintf(mac, sizeof(mac), "%02x:%02x:%02x:%02x:%02x:%02x",
+	     nic->macaddr[0], nic->macaddr[1], nic->macaddr[2],
+	     nic->macaddr[3], nic->macaddr[4], nic->macaddr[5]);
+    xen_be_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", nic->vlan->id, mac);
+    xen_config_dev_dirs("vif", "qnic", nic->vlan->id, fe, be, sizeof(fe));
+
+    /* frontend */
+    xenstore_write_int(fe, "handle",     nic->vlan->id);
+    xenstore_write_str(fe, "mac",        mac);
+
+    /* backend */
+    xenstore_write_int(be, "handle",     nic->vlan->id);
+    xenstore_write_str(be, "mac",        mac);
+
+    /* common stuff */
+    return xen_config_dev_all(fe, be);
+}

 ------------------------------------------------------------------------
r7224 | aliguori | 2009-04-22 10:19:35 -0500 (Wed, 22 Apr 2009) | 15 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/xen_backend.h
   M /trunk/hw/xen_machine_pv.c
   A /trunk/hw/xen_nic.c

xen: add net backend driver. (Gerd Hoffmann)

This patch adds a network interface backend driver to qemu.  It is a pure
userspace implemention using the gntdev interface.  It uses "qnet" as
backend name in xenstore so it doesn't interfere with the netback
backend (aka "vnif").

The network backend is hooked into the corrosponding qemu vlan, i.e.
vif 0 is hooked into vlan 0.  To make the packages actually arrive
somewhere you additionally have to link the vlan to the outside world
using the usual qemu command line options such as "-net tap,...".

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7223)
+++ Makefile.target	(revision 7224)
@@ -562,7 +562,7 @@
 
 # xen backend driver support
 XEN_OBJS := xen_machine_pv.o xen_backend.o
-XEN_OBJS += xen_console.o xenfb.o xen_disk.o
+XEN_OBJS += xen_console.o xenfb.o xen_disk.o xen_nic.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
   LIBS += $(XEN_LIBS)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7223)
+++ hw/xen_machine_pv.c	(revision 7224)
@@ -60,6 +60,7 @@
     xen_be_register("vkbd", &xen_kbdmouse_ops);
     xen_be_register("vfb", &xen_framebuffer_ops);
     xen_be_register("qdisk", &xen_blkdev_ops);
+    xen_be_register("qnic", &xen_netdev_ops);
 
     /* setup framebuffer */
     xen_init_display(xen_domid);
Index: hw/xen_nic.c
===================================================================
--- hw/xen_nic.c	(revision 0)
+++ hw/xen_nic.c	(revision 7224)
@@ -0,0 +1,406 @@
+/*
+ *  xen paravirt network card backend
+ *
+ *  (c) Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "net.h"
+#include "qemu-char.h"
+#include "xen_backend.h"
+
+/* ------------------------------------------------------------- */
+
+struct XenNetDev {
+    struct XenDevice      xendev;  /* must be first */
+    char                  *mac;
+    int                   tx_work;
+    int                   tx_ring_ref;
+    int                   rx_ring_ref;
+    struct netif_tx_sring *txs;
+    struct netif_rx_sring *rxs;
+    netif_tx_back_ring_t  tx_ring;
+    netif_rx_back_ring_t  rx_ring;
+    VLANClientState       *vs;
+};
+
+/* ------------------------------------------------------------- */
+
+static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
+{
+    RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
+    netif_tx_response_t *resp;
+    int notify;
+
+    resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
+    resp->id     = txp->id;
+    resp->status = st;
+
+#if 0
+    if (txp->flags & NETTXF_extra_info)
+	RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
+#endif
+
+    netdev->tx_ring.rsp_prod_pvt = ++i;
+    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
+    if (notify)
+	xen_be_send_notify(&netdev->xendev);
+
+    if (i == netdev->tx_ring.req_cons) {
+	int more_to_do;
+	RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
+	if (more_to_do)
+	    netdev->tx_work++;
+    }
+}
+
+static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end)
+{
+#if 0
+    /*
+     * Hmm, why netback fails everything in the ring?
+     * Should we do that even when not supporting SG and TSO?
+     */
+    RING_IDX cons = netdev->tx_ring.req_cons;
+
+    do {
+	make_tx_response(netif, txp, NETIF_RSP_ERROR);
+	if (cons >= end)
+	    break;
+	txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
+    } while (1);
+    netdev->tx_ring.req_cons = cons;
+    netif_schedule_work(netif);
+    netif_put(netif);
+#else
+    net_tx_response(netdev, txp, NETIF_RSP_ERROR);
+#endif
+}
+
+static void net_tx_packets(struct XenNetDev *netdev)
+{
+    netif_tx_request_t txreq;
+    RING_IDX rc, rp;
+    void *page;
+    void *tmpbuf = NULL;
+
+    for (;;) {
+	rc = netdev->tx_ring.req_cons;
+	rp = netdev->tx_ring.sring->req_prod;
+	xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+	while ((rc != rp)) {
+	    if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc))
+		break;
+	    memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
+	    netdev->tx_ring.req_cons = ++rc;
+
+#if 1
+	    /* should not happen in theory, we don't announce the *
+	     * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
+	    if (txreq.flags & NETTXF_extra_info) {
+		xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
+		net_tx_error(netdev, &txreq, rc);
+		continue;
+	    }
+	    if (txreq.flags & NETTXF_more_data) {
+		xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
+		net_tx_error(netdev, &txreq, rc);
+		continue;
+	    }
+#endif
+
+	    if (txreq.size < 14) {
+		xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
+		net_tx_error(netdev, &txreq, rc);
+		continue;
+	    }
+
+	    if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+		xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
+		net_tx_error(netdev, &txreq, rc);
+		continue;
+	    }
+
+	    xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
+			  txreq.gref, txreq.offset, txreq.size, txreq.flags,
+			  (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
+			  (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
+			  (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
+			  (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
+
+	    page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+					   netdev->xendev.dom,
+					   txreq.gref, PROT_READ);
+	    if (page == NULL) {
+		xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
+                              txreq.gref);
+		net_tx_error(netdev, &txreq, rc);
+		continue;
+	    }
+	    if (txreq.flags & NETTXF_csum_blank) {
+                /* have read-only mapping -> can't fill checksum in-place */
+                if (!tmpbuf)
+                    tmpbuf = malloc(PAGE_SIZE);
+                memcpy(tmpbuf, page + txreq.offset, txreq.size);
+		net_checksum_calculate(tmpbuf, txreq.size);
+                qemu_send_packet(netdev->vs, tmpbuf, txreq.size);
+            } else {
+                qemu_send_packet(netdev->vs, page + txreq.offset, txreq.size);
+            }
+	    xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
+	    net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
+	}
+	if (!netdev->tx_work)
+	    break;
+	netdev->tx_work = 0;
+    }
+    free(tmpbuf);
+}
+
+/* ------------------------------------------------------------- */
+
+static void net_rx_response(struct XenNetDev *netdev,
+			    netif_rx_request_t *req, int8_t st,
+			    uint16_t offset, uint16_t size,
+			    uint16_t flags)
+{
+    RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
+    netif_rx_response_t *resp;
+    int notify;
+
+    resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
+    resp->offset     = offset;
+    resp->flags      = flags;
+    resp->id         = req->id;
+    resp->status     = (int16_t)size;
+    if (st < 0)
+	resp->status = (int16_t)st;
+
+    xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
+		  i, resp->status, resp->flags);
+
+    netdev->rx_ring.rsp_prod_pvt = ++i;
+    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
+    if (notify)
+	xen_be_send_notify(&netdev->xendev);
+}
+
+#define NET_IP_ALIGN 2
+
+static int net_rx_ok(void *opaque)
+{
+    struct XenNetDev *netdev = opaque;
+    RING_IDX rc, rp;
+
+    if (netdev->xendev.be_state != XenbusStateConnected)
+	return 0;
+
+    rc = netdev->rx_ring.req_cons;
+    rp = netdev->rx_ring.sring->req_prod;
+    xen_rmb();
+
+    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
+	xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n",
+		      __FUNCTION__, rc, rp);
+	return 0;
+    }
+    return 1;
+}
+
+static void net_rx_packet(void *opaque, const uint8_t *buf, int size)
+{
+    struct XenNetDev *netdev = opaque;
+    netif_rx_request_t rxreq;
+    RING_IDX rc, rp;
+    void *page;
+
+    if (netdev->xendev.be_state != XenbusStateConnected)
+	return;
+
+    rc = netdev->rx_ring.req_cons;
+    rp = netdev->rx_ring.sring->req_prod;
+    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+    if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
+	xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n");
+	return;
+    }
+    if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
+	xen_be_printf(&netdev->xendev, 0, "packet too big (%d > %ld)",
+		      size, XC_PAGE_SIZE - NET_IP_ALIGN);
+	return;
+    }
+
+    memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
+    netdev->rx_ring.req_cons = ++rc;
+
+    page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+				   netdev->xendev.dom,
+				   rxreq.gref, PROT_WRITE);
+    if (page == NULL) {
+	xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
+                      rxreq.gref);
+	net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
+	return;
+    }
+    memcpy(page + NET_IP_ALIGN, buf, size);
+    xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
+    net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
+}
+
+/* ------------------------------------------------------------- */
+
+static int net_init(struct XenDevice *xendev)
+{
+    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    VLANState *vlan;
+
+    /* read xenstore entries */
+    if (netdev->mac == NULL)
+	netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
+
+    /* do we have all we need? */
+    if (netdev->mac == NULL)
+	return -1;
+
+    vlan = qemu_find_vlan(netdev->xendev.dev);
+    netdev->vs = qemu_new_vlan_client(vlan, "xen", NULL,
+                                      net_rx_packet, net_rx_ok, NULL,
+                                      netdev);
+    snprintf(netdev->vs->info_str, sizeof(netdev->vs->info_str),
+             "nic: xenbus vif macaddr=%s", netdev->mac);
+
+    /* fill info */
+    xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
+    xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
+
+    return 0;
+}
+
+static int net_connect(struct XenDevice *xendev)
+{
+    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    int rx_copy;
+
+    if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
+				   &netdev->tx_ring_ref) == -1)
+	return -1;
+    if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
+				   &netdev->rx_ring_ref) == -1)
+	return 1;
+    if (xenstore_read_fe_int(&netdev->xendev, "event-channel",
+				   &netdev->xendev.remote_port) == -1)
+	return -1;
+
+    if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1)
+	rx_copy = 0;
+    if (rx_copy == 0) {
+	xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
+	return -1;
+    }
+
+    netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+					  netdev->xendev.dom,
+					  netdev->tx_ring_ref,
+					  PROT_READ | PROT_WRITE);
+    netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+					  netdev->xendev.dom,
+					  netdev->rx_ring_ref,
+					  PROT_READ | PROT_WRITE);
+    if (!netdev->txs || !netdev->rxs)
+	return -1;
+    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
+    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
+
+    xen_be_bind_evtchn(&netdev->xendev);
+
+    xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
+		  "remote port %d, local port %d\n",
+		  netdev->tx_ring_ref, netdev->rx_ring_ref,
+		  netdev->xendev.remote_port, netdev->xendev.local_port);
+    return 0;
+}
+
+static void net_disconnect(struct XenDevice *xendev)
+{
+    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+
+    xen_be_unbind_evtchn(&netdev->xendev);
+
+    if (netdev->txs) {
+	xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
+	netdev->txs = NULL;
+    }
+    if (netdev->rxs) {
+	xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
+	netdev->rxs = NULL;
+    }
+    if (netdev->vs) {
+        qemu_del_vlan_client(netdev->vs);
+        netdev->vs = NULL;
+    }
+}
+
+static void net_event(struct XenDevice *xendev)
+{
+    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    net_tx_packets(netdev);
+}
+
+static int net_free(struct XenDevice *xendev)
+{
+    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+
+    qemu_free(netdev->mac);
+    return 0;
+}
+
+/* ------------------------------------------------------------- */
+
+struct XenDevOps xen_netdev_ops = {
+    .size       = sizeof(struct XenNetDev),
+    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
+    .init       = net_init,
+    .connect    = net_connect,
+    .event      = net_event,
+    .disconnect = net_disconnect,
+    .free       = net_free,
+};
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7223)
+++ hw/xen_backend.h	(revision 7224)
@@ -89,6 +89,7 @@
 extern struct XenDevOps xen_kbdmouse_ops;     /* xen_framebuffer.c */
 extern struct XenDevOps xen_framebuffer_ops;  /* xen_framebuffer.c */
 extern struct XenDevOps xen_blkdev_ops;       /* xen_disk.c        */
+extern struct XenDevOps xen_netdev_ops;       /* xen_nic.c         */
 
 void xen_init_display(int domid);
 

 ------------------------------------------------------------------------
r7223 | aliguori | 2009-04-22 10:19:30 -0500 (Wed, 22 Apr 2009) | 10 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/xen_backend.h
   A /trunk/hw/xen_blkif.h
   A /trunk/hw/xen_disk.c
   M /trunk/hw/xen_machine_pv.c
   M /trunk/sysemu.h
   M /trunk/vl.c

xen: add block device backend driver. (Gerd Hoffmann)

This patch adds a block device backend driver to qemu.  It is a pure
userspace implemention using the gntdev interface.  It uses "qdisk" as
backend name in xenstore so it doesn't interfere with the other existing
backends (blkback aka "vbd" and tapdisk aka "tap").

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7222)
+++ Makefile.target	(revision 7223)
@@ -562,7 +562,7 @@
 
 # xen backend driver support
 XEN_OBJS := xen_machine_pv.o xen_backend.o
-XEN_OBJS += xen_console.o xenfb.o
+XEN_OBJS += xen_console.o xenfb.o xen_disk.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
   LIBS += $(XEN_LIBS)
Index: vl.c
===================================================================
--- vl.c	(revision 7222)
+++ vl.c	(revision 7223)
@@ -2373,7 +2373,10 @@
         } else if (!strcmp(buf, "virtio")) {
             type = IF_VIRTIO;
             max_devs = 0;
-        } else {
+	} else if (!strcmp(buf, "xen")) {
+	    type = IF_XEN;
+            max_devs = 0;
+	} else {
             fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
             return -1;
 	}
@@ -2587,6 +2590,7 @@
     switch(type) {
     case IF_IDE:
     case IF_SCSI:
+    case IF_XEN:
         switch(media) {
 	case MEDIA_DISK:
             if (cyls != 0) {
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7222)
+++ sysemu.h	(revision 7223)
@@ -131,7 +131,7 @@
 #endif
 
 typedef enum {
-    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
+    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN
 } BlockInterfaceType;
 
 typedef enum {
Index: hw/xen_disk.c
===================================================================
--- hw/xen_disk.c	(revision 0)
+++ hw/xen_disk.c	(revision 7223)
@@ -0,0 +1,779 @@
+/*
+ *  xen paravirt block device backend
+ *
+ *  (c) Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "block_int.h"
+#include "qemu-char.h"
+#include "xen_blkif.h"
+#include "xen_backend.h"
+
+/* ------------------------------------------------------------- */
+
+static int syncwrite    = 0;
+static int batch_maps   = 0;
+
+static int max_requests = 32;
+static int use_aio      = 1;
+
+/* ------------------------------------------------------------- */
+
+#define BLOCK_SIZE  512
+#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
+
+struct ioreq {
+    blkif_request_t     req;
+    int16_t             status;
+
+    /* parsed request */
+    off_t               start;
+    QEMUIOVector        v;
+    int                 presync;
+    int                 postsync;
+
+    /* grant mapping */
+    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int                 prot;
+    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    void                *pages;
+
+    /* aio status */
+    int                 aio_inflight;
+    int                 aio_errors;
+
+    struct XenBlkDev    *blkdev;
+    LIST_ENTRY(ioreq)   list;
+};
+
+struct XenBlkDev {
+    struct XenDevice    xendev;  /* must be first */
+    char                *params;
+    char                *mode;
+    char                *type;
+    char                *dev;
+    char                *devtype;
+    const char          *fileproto;
+    const char          *filename;
+    int                 ring_ref;
+    void                *sring;
+    int64_t             file_blk;
+    int64_t             file_size;
+    int                 protocol;
+    blkif_back_rings_t  rings;
+    int                 more_work;
+    int                 cnt_map;
+
+    /* request lists */
+    LIST_HEAD(inflight_head, ioreq) inflight;
+    LIST_HEAD(finished_head, ioreq) finished;
+    LIST_HEAD(freelist_head, ioreq) freelist;
+    int                 requests_total;
+    int                 requests_inflight;
+    int                 requests_finished;
+
+    /* qemu block driver */
+    int                 index;
+    BlockDriverState    *bs;
+    QEMUBH              *bh;
+};
+
+/* ------------------------------------------------------------- */
+
+static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
+{
+    struct ioreq *ioreq = NULL;
+
+    if (LIST_EMPTY(&blkdev->freelist)) {
+	if (blkdev->requests_total >= max_requests)
+	    goto out;
+	/* allocate new struct */
+	ioreq = qemu_mallocz(sizeof(*ioreq));
+	ioreq->blkdev = blkdev;
+	blkdev->requests_total++;
+        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+    } else {
+	/* get one from freelist */
+	ioreq = LIST_FIRST(&blkdev->freelist);
+	LIST_REMOVE(ioreq, list);
+        qemu_iovec_reset(&ioreq->v);
+    }
+    LIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
+    blkdev->requests_inflight++;
+
+out:
+    return ioreq;
+}
+
+static void ioreq_finish(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+
+    LIST_REMOVE(ioreq, list);
+    LIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
+    blkdev->requests_inflight--;
+    blkdev->requests_finished++;
+}
+
+static void ioreq_release(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+
+    LIST_REMOVE(ioreq, list);
+    memset(ioreq, 0, sizeof(*ioreq));
+    ioreq->blkdev = blkdev;
+    LIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
+    blkdev->requests_finished--;
+}
+
+/*
+ * translate request into iovec + start offset
+ * do sanity checks along the way
+ */
+static int ioreq_parse(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+    uintptr_t mem;
+    size_t len;
+    int i;
+
+    xen_be_printf(&blkdev->xendev, 3,
+		  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
+		  ioreq->req.operation, ioreq->req.nr_segments,
+		  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
+    switch (ioreq->req.operation) {
+    case BLKIF_OP_READ:
+	ioreq->prot = PROT_WRITE; /* to memory */
+	if (BLKIF_OP_READ != ioreq->req.operation && blkdev->mode[0] != 'w') {
+	    xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
+	    goto err;
+	}
+	break;
+    case BLKIF_OP_WRITE_BARRIER:
+	if (!syncwrite)
+	    ioreq->presync = ioreq->postsync = 1;
+	/* fall through */
+    case BLKIF_OP_WRITE:
+	ioreq->prot = PROT_READ; /* from memory */
+	if (syncwrite)
+	    ioreq->postsync = 1;
+	break;
+    default:
+	xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
+		      ioreq->req.operation);
+	goto err;
+    };
+
+    ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
+    for (i = 0; i < ioreq->req.nr_segments; i++) {
+	if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+	    xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
+	    goto err;
+	}
+	if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
+	    xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
+	    goto err;
+	}
+	if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
+	    xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
+	    goto err;
+	}
+
+	ioreq->domids[i] = blkdev->xendev.dom;
+	ioreq->refs[i]   = ioreq->req.seg[i].gref;
+
+	mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
+	len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
+        qemu_iovec_add(&ioreq->v, (void*)mem, len);
+    }
+    if (ioreq->start + ioreq->v.size > blkdev->file_size) {
+	xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
+	goto err;
+    }
+    return 0;
+
+err:
+    ioreq->status = BLKIF_RSP_ERROR;
+    return -1;
+}
+
+static void ioreq_unmap(struct ioreq *ioreq)
+{
+    int gnt = ioreq->blkdev->xendev.gnttabdev;
+    int i;
+
+    if (ioreq->v.niov == 0)
+        return;
+    if (batch_maps) {
+	if (!ioreq->pages)
+	    return;
+	if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0)
+	    xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
+			  strerror(errno));
+	ioreq->blkdev->cnt_map -= ioreq->v.niov;
+	ioreq->pages = NULL;
+    } else {
+	for (i = 0; i < ioreq->v.niov; i++) {
+	    if (!ioreq->page[i])
+		continue;
+	    if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0)
+		xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
+			      strerror(errno));
+	    ioreq->blkdev->cnt_map--;
+	    ioreq->page[i] = NULL;
+	}
+    }
+}
+
+static int ioreq_map(struct ioreq *ioreq)
+{
+    int gnt = ioreq->blkdev->xendev.gnttabdev;
+    int i;
+
+    if (ioreq->v.niov == 0)
+        return 0;
+    if (batch_maps) {
+	ioreq->pages = xc_gnttab_map_grant_refs
+	    (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
+	if (ioreq->pages == NULL) {
+	    xen_be_printf(&ioreq->blkdev->xendev, 0,
+			  "can't map %d grant refs (%s, %d maps)\n",
+			  ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
+	    return -1;
+	}
+	for (i = 0; i < ioreq->v.niov; i++)
+	    ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
+		(uintptr_t)ioreq->v.iov[i].iov_base;
+	ioreq->blkdev->cnt_map += ioreq->v.niov;
+    } else  {
+	for (i = 0; i < ioreq->v.niov; i++) {
+	    ioreq->page[i] = xc_gnttab_map_grant_ref
+		(gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
+	    if (ioreq->page[i] == NULL) {
+		xen_be_printf(&ioreq->blkdev->xendev, 0,
+			      "can't map grant ref %d (%s, %d maps)\n",
+			      ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
+		ioreq_unmap(ioreq);
+		return -1;
+	    }
+	    ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
+	    ioreq->blkdev->cnt_map++;
+	}
+    }
+    return 0;
+}
+
+static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+    int i, rc, len = 0;
+    off_t pos;
+
+    if (ioreq_map(ioreq) == -1)
+	goto err;
+    if (ioreq->presync)
+	bdrv_flush(blkdev->bs);
+
+    switch (ioreq->req.operation) {
+    case BLKIF_OP_READ:
+	pos = ioreq->start;
+	for (i = 0; i < ioreq->v.niov; i++) {
+	    rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
+			   ioreq->v.iov[i].iov_base,
+			   ioreq->v.iov[i].iov_len / BLOCK_SIZE);
+	    if (rc != 0) {
+		xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
+			      ioreq->v.iov[i].iov_base,
+			      ioreq->v.iov[i].iov_len);
+		goto err;
+	    }
+	    len += ioreq->v.iov[i].iov_len;
+	    pos += ioreq->v.iov[i].iov_len;
+	}
+	break;
+    case BLKIF_OP_WRITE:
+    case BLKIF_OP_WRITE_BARRIER:
+	pos = ioreq->start;
+	for (i = 0; i < ioreq->v.niov; i++) {
+	    rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
+			    ioreq->v.iov[i].iov_base,
+			    ioreq->v.iov[i].iov_len / BLOCK_SIZE);
+	    if (rc != 0) {
+		xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
+			      ioreq->v.iov[i].iov_base,
+			      ioreq->v.iov[i].iov_len);
+		goto err;
+	    }
+	    len += ioreq->v.iov[i].iov_len;
+	    pos += ioreq->v.iov[i].iov_len;
+	}
+	break;
+    default:
+	/* unknown operation (shouldn't happen -- parse catches this) */
+	goto err;
+    }
+
+    if (ioreq->postsync)
+	bdrv_flush(blkdev->bs);
+    ioreq->status = BLKIF_RSP_OKAY;
+
+    ioreq_unmap(ioreq);
+    ioreq_finish(ioreq);
+    return 0;
+
+err:
+    ioreq->status = BLKIF_RSP_ERROR;
+    return -1;
+}
+
+static void qemu_aio_complete(void *opaque, int ret)
+{
+    struct ioreq *ioreq = opaque;
+
+    if (ret != 0) {
+        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
+                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
+        ioreq->aio_errors++;
+    }
+
+    ioreq->aio_inflight--;
+    if (ioreq->aio_inflight > 0)
+        return;
+
+    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
+    ioreq_unmap(ioreq);
+    ioreq_finish(ioreq);
+    qemu_bh_schedule(ioreq->blkdev->bh);
+}
+
+static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+
+    if (ioreq_map(ioreq) == -1)
+	goto err;
+
+    ioreq->aio_inflight++;
+    if (ioreq->presync)
+	bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
+
+    switch (ioreq->req.operation) {
+    case BLKIF_OP_READ:
+        ioreq->aio_inflight++;
+        bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
+                       &ioreq->v, ioreq->v.size / BLOCK_SIZE,
+                       qemu_aio_complete, ioreq);
+	break;
+    case BLKIF_OP_WRITE:
+    case BLKIF_OP_WRITE_BARRIER:
+        ioreq->aio_inflight++;
+        bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
+                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
+                        qemu_aio_complete, ioreq);
+	break;
+    default:
+	/* unknown operation (shouldn't happen -- parse catches this) */
+	goto err;
+    }
+
+    if (ioreq->postsync)
+	bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
+    qemu_aio_complete(ioreq, 0);
+
+    return 0;
+
+err:
+    ioreq->status = BLKIF_RSP_ERROR;
+    return -1;
+}
+
+static int blk_send_response_one(struct ioreq *ioreq)
+{
+    struct XenBlkDev  *blkdev = ioreq->blkdev;
+    int               send_notify   = 0;
+    int               have_requests = 0;
+    blkif_response_t  resp;
+    void              *dst;
+
+    resp.id        = ioreq->req.id;
+    resp.operation = ioreq->req.operation;
+    resp.status    = ioreq->status;
+
+    /* Place on the response ring for the relevant domain. */
+    switch (blkdev->protocol) {
+    case BLKIF_PROTOCOL_NATIVE:
+	dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
+	break;
+    case BLKIF_PROTOCOL_X86_32:
+	dst = RING_GET_RESPONSE(&blkdev->rings.x86_32, blkdev->rings.x86_32.rsp_prod_pvt);
+	break;
+    case BLKIF_PROTOCOL_X86_64:
+	dst = RING_GET_RESPONSE(&blkdev->rings.x86_64, blkdev->rings.x86_64.rsp_prod_pvt);
+	break;
+    default:
+	dst = NULL;
+    }
+    memcpy(dst, &resp, sizeof(resp));
+    blkdev->rings.common.rsp_prod_pvt++;
+
+    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
+    if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
+	/*
+	 * Tail check for pending requests. Allows frontend to avoid
+	 * notifications if requests are already in flight (lower
+	 * overheads and promotes batching).
+	 */
+	RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
+    } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
+	have_requests = 1;
+    }
+
+    if (have_requests)
+	blkdev->more_work++;
+    return send_notify;
+}
+
+/* walk finished list, send outstanding responses, free requests */
+static void blk_send_response_all(struct XenBlkDev *blkdev)
+{
+    struct ioreq *ioreq;
+    int send_notify = 0;
+
+    while (!LIST_EMPTY(&blkdev->finished)) {
+        ioreq = LIST_FIRST(&blkdev->finished);
+	send_notify += blk_send_response_one(ioreq);
+	ioreq_release(ioreq);
+    }
+    if (send_notify)
+	xen_be_send_notify(&blkdev->xendev);
+}
+
+static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
+{
+    switch (blkdev->protocol) {
+    case BLKIF_PROTOCOL_NATIVE:
+	memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
+	       sizeof(ioreq->req));
+	break;
+    case BLKIF_PROTOCOL_X86_32:
+	blkif_get_x86_32_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_32, rc));
+	break;
+    case BLKIF_PROTOCOL_X86_64:
+	blkif_get_x86_64_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_64, rc));
+	break;
+    }
+    return 0;
+}
+
+static void blk_handle_requests(struct XenBlkDev *blkdev)
+{
+    RING_IDX rc, rp;
+    struct ioreq *ioreq;
+
+    blkdev->more_work = 0;
+
+    rc = blkdev->rings.common.req_cons;
+    rp = blkdev->rings.common.sring->req_prod;
+    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+    if (use_aio)
+        blk_send_response_all(blkdev);
+    while ((rc != rp)) {
+        /* pull request from ring */
+        if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc))
+            break;
+        ioreq = ioreq_start(blkdev);
+        if (ioreq == NULL) {
+            blkdev->more_work++;
+            break;
+        }
+        blk_get_request(blkdev, ioreq, rc);
+        blkdev->rings.common.req_cons = ++rc;
+
+        /* parse them */
+        if (ioreq_parse(ioreq) != 0) {
+            if (blk_send_response_one(ioreq))
+                xen_be_send_notify(&blkdev->xendev);
+            ioreq_release(ioreq);
+            continue;
+        }
+
+        if (use_aio) {
+            /* run i/o in aio mode */
+            ioreq_runio_qemu_aio(ioreq);
+        } else {
+            /* run i/o in sync mode */
+            ioreq_runio_qemu_sync(ioreq);
+        }
+    }
+    if (!use_aio)
+        blk_send_response_all(blkdev);
+
+    if (blkdev->more_work && blkdev->requests_inflight < max_requests)
+        qemu_bh_schedule(blkdev->bh);
+}
+
+/* ------------------------------------------------------------- */
+
+static void blk_bh(void *opaque)
+{
+    struct XenBlkDev *blkdev = opaque;
+    blk_handle_requests(blkdev);
+}
+
+static void blk_alloc(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+
+    LIST_INIT(&blkdev->inflight);
+    LIST_INIT(&blkdev->finished);
+    LIST_INIT(&blkdev->freelist);
+    blkdev->bh = qemu_bh_new(blk_bh, blkdev);
+    if (xen_mode != XEN_EMULATE)
+        batch_maps = 1;
+}
+
+static int blk_init(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+    int mode, qflags, have_barriers, info = 0;
+    char *h;
+
+    /* read xenstore entries */
+    if (blkdev->params == NULL) {
+	blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
+        h = strchr(blkdev->params, ':');
+	if (h != NULL) {
+	    blkdev->fileproto = blkdev->params;
+	    blkdev->filename  = h+1;
+	    *h = 0;
+	} else {
+	    blkdev->fileproto = "";
+	    blkdev->filename  = blkdev->params;
+	}
+    }
+    if (blkdev->mode == NULL)
+	blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
+    if (blkdev->type == NULL)
+	blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
+    if (blkdev->dev == NULL)
+	blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
+    if (blkdev->devtype == NULL)
+	blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
+
+    /* do we have all we need? */
+    if (blkdev->params == NULL ||
+	blkdev->mode == NULL   ||
+	blkdev->type == NULL   ||
+	blkdev->dev == NULL)
+	return -1;
+
+    /* read-only ? */
+    if (strcmp(blkdev->mode, "w") == 0) {
+	mode   = O_RDWR;
+	qflags = BDRV_O_RDWR;
+    } else {
+	mode   = O_RDONLY;
+	qflags = BDRV_O_RDONLY;
+	info  |= VDISK_READONLY;
+    }
+
+    /* cdrom ? */
+    if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom"))
+	info  |= VDISK_CDROM;
+
+    /* init qemu block driver */
+    blkdev->index = (blkdev->xendev.dev - 202 * 256) / 16;
+    blkdev->index = drive_get_index(IF_XEN, 0, blkdev->index);
+    if (blkdev->index == -1) {
+        /* setup via xenbus -> create new block driver instance */
+        xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
+	blkdev->bs = bdrv_new(blkdev->dev);
+	if (blkdev->bs) {
+	    if (bdrv_open2(blkdev->bs, blkdev->filename, qflags,
+                           bdrv_find_format(blkdev->fileproto)) != 0) {
+		bdrv_delete(blkdev->bs);
+		blkdev->bs = NULL;
+	    }
+	}
+	if (!blkdev->bs)
+	    return -1;
+    } else {
+        /* setup via qemu cmdline -> already setup for us */
+        xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
+	blkdev->bs = drives_table[blkdev->index].bdrv;
+    }
+    blkdev->file_blk  = BLOCK_SIZE;
+    blkdev->file_size = bdrv_getlength(blkdev->bs);
+    if (blkdev->file_size < 0) {
+        xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
+                      (int)blkdev->file_size, strerror(-blkdev->file_size),
+                      blkdev->bs->drv ? blkdev->bs->drv->format_name : "-");
+	blkdev->file_size = 0;
+    }
+    have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0;
+
+    xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
+		  " size %" PRId64 " (%" PRId64 " MB)\n",
+		  blkdev->type, blkdev->fileproto, blkdev->filename,
+		  blkdev->file_size, blkdev->file_size >> 20);
+
+    /* fill info */
+    xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers);
+    xenstore_write_be_int(&blkdev->xendev, "info",            info);
+    xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
+    xenstore_write_be_int(&blkdev->xendev, "sectors",
+			  blkdev->file_size / blkdev->file_blk);
+    return 0;
+}
+
+static int blk_connect(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+
+    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1)
+	return -1;
+    if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
+                             &blkdev->xendev.remote_port) == -1)
+	return -1;
+
+    blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
+    if (blkdev->xendev.protocol) {
+        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0)
+            blkdev->protocol = BLKIF_PROTOCOL_X86_32;
+        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0)
+            blkdev->protocol = BLKIF_PROTOCOL_X86_64;
+    }
+
+    blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
+					    blkdev->xendev.dom,
+					    blkdev->ring_ref,
+					    PROT_READ | PROT_WRITE);
+    if (!blkdev->sring)
+	return -1;
+    blkdev->cnt_map++;
+
+    switch (blkdev->protocol) {
+    case BLKIF_PROTOCOL_NATIVE:
+    {
+	blkif_sring_t *sring_native = blkdev->sring;
+	BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
+	break;
+    }
+    case BLKIF_PROTOCOL_X86_32:
+    {
+	blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
+	BACK_RING_INIT(&blkdev->rings.x86_32, sring_x86_32, XC_PAGE_SIZE);
+	break;
+    }
+    case BLKIF_PROTOCOL_X86_64:
+    {
+	blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
+	BACK_RING_INIT(&blkdev->rings.x86_64, sring_x86_64, XC_PAGE_SIZE);
+	break;
+    }
+    }
+
+    xen_be_bind_evtchn(&blkdev->xendev);
+
+    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+		  "remote port %d, local port %d\n",
+		  blkdev->xendev.protocol, blkdev->ring_ref,
+		  blkdev->xendev.remote_port, blkdev->xendev.local_port);
+    return 0;
+}
+
+static void blk_disconnect(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+
+    if (blkdev->bs) {
+        if (blkdev->index == -1) {
+            /* close/delete only if we created it ourself */
+            bdrv_close(blkdev->bs);
+            bdrv_delete(blkdev->bs);
+        }
+	blkdev->bs = NULL;
+    }
+    xen_be_unbind_evtchn(&blkdev->xendev);
+
+    if (blkdev->sring) {
+	xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
+	blkdev->cnt_map--;
+	blkdev->sring = NULL;
+    }
+}
+
+static int blk_free(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+    struct ioreq *ioreq;
+
+    while (!LIST_EMPTY(&blkdev->freelist)) {
+	ioreq = LIST_FIRST(&blkdev->freelist);
+        LIST_REMOVE(ioreq, list);
+        qemu_iovec_destroy(&ioreq->v);
+	qemu_free(ioreq);
+    }
+
+    qemu_free(blkdev->params);
+    qemu_free(blkdev->mode);
+    qemu_free(blkdev->type);
+    qemu_free(blkdev->dev);
+    qemu_free(blkdev->devtype);
+    qemu_bh_delete(blkdev->bh);
+    return 0;
+}
+
+static void blk_event(struct XenDevice *xendev)
+{
+    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+
+    qemu_bh_schedule(blkdev->bh);
+}
+
+struct XenDevOps xen_blkdev_ops = {
+    .size       = sizeof(struct XenBlkDev),
+    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
+    .alloc      = blk_alloc,
+    .init       = blk_init,
+    .connect    = blk_connect,
+    .disconnect = blk_disconnect,
+    .event      = blk_event,
+    .free       = blk_free,
+};
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7222)
+++ hw/xen_machine_pv.c	(revision 7223)
@@ -59,6 +59,7 @@
     xen_be_register("console", &xen_console_ops);
     xen_be_register("vkbd", &xen_kbdmouse_ops);
     xen_be_register("vfb", &xen_framebuffer_ops);
+    xen_be_register("qdisk", &xen_blkdev_ops);
 
     /* setup framebuffer */
     xen_init_display(xen_domid);
Index: hw/xen_blkif.h
===================================================================
--- hw/xen_blkif.h	(revision 0)
+++ hw/xen_blkif.h	(revision 7223)
@@ -0,0 +1,103 @@
+#ifndef __XEN_BLKIF_H__
+#define __XEN_BLKIF_H__
+
+#include 
+#include 
+#include 
+
+/* Not a real protocol.  Used to generate ring structs which contain
+ * the elements common to all protocols only.  This way we get a
+ * compiler-checkable way to use common struct elements, so we can
+ * avoid using switch(protocol) in a number of places.  */
+struct blkif_common_request {
+	char dummy;
+};
+struct blkif_common_response {
+	char dummy;
+};
+
+/* i386 protocol version */
+#pragma pack(push, 4)
+struct blkif_x86_32_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_32_response {
+	uint64_t        id;              /* copied from request */
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+typedef struct blkif_x86_32_request blkif_x86_32_request_t;
+typedef struct blkif_x86_32_response blkif_x86_32_response_t;
+#pragma pack(pop)
+
+/* x86_64 protocol version */
+struct blkif_x86_64_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       __attribute__((__aligned__(8))) id;
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_64_response {
+	uint64_t       __attribute__((__aligned__(8))) id;
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+typedef struct blkif_x86_64_request blkif_x86_64_request_t;
+typedef struct blkif_x86_64_response blkif_x86_64_response_t;
+
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
+
+union blkif_back_rings {
+	blkif_back_ring_t        native;
+	blkif_common_back_ring_t common;
+	blkif_x86_32_back_ring_t x86_32;
+	blkif_x86_64_back_ring_t x86_64;
+};
+typedef union blkif_back_rings blkif_back_rings_t;
+
+enum blkif_protocol {
+	BLKIF_PROTOCOL_NATIVE = 1,
+	BLKIF_PROTOCOL_X86_32 = 2,
+	BLKIF_PROTOCOL_X86_64 = 3,
+};
+
+static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->sector_number = src->sector_number;
+	if (n > src->nr_segments)
+		n = src->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->seg[i] = src->seg[i];
+}
+
+static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->sector_number = src->sector_number;
+	if (n > src->nr_segments)
+		n = src->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->seg[i] = src->seg[i];
+}
+
+#endif /* __XEN_BLKIF_H__ */
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7222)
+++ hw/xen_backend.h	(revision 7223)
@@ -2,6 +2,7 @@
 #define QEMU_HW_XEN_BACKEND_H 1
 
 #include "xen_common.h"
+#include "sysemu.h"
 
 /* ------------------------------------------------------------- */
 
@@ -87,6 +88,7 @@
 extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
 extern struct XenDevOps xen_kbdmouse_ops;     /* xen_framebuffer.c */
 extern struct XenDevOps xen_framebuffer_ops;  /* xen_framebuffer.c */
+extern struct XenDevOps xen_blkdev_ops;       /* xen_disk.c        */
 
 void xen_init_display(int domid);
 

 ------------------------------------------------------------------------
r7222 | aliguori | 2009-04-22 10:19:25 -0500 (Wed, 22 Apr 2009) | 10 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/xen_backend.h
   M /trunk/hw/xen_machine_pv.c
   A /trunk/hw/xenfb.c

xen: add framebuffer backend driver (Gerd Hoffmann)

This patch adds a frsamebuffer (and kbd+mouse) backend driver.  It
it based on current xen-unstable code.  It has been changed to make
use of the common backend driver code.  It also has been changed to
compile with xen headers older than release 3.3

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7221)
+++ Makefile.target	(revision 7222)
@@ -562,7 +562,7 @@
 
 # xen backend driver support
 XEN_OBJS := xen_machine_pv.o xen_backend.o
-XEN_OBJS += xen_console.o
+XEN_OBJS += xen_console.o xenfb.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
   LIBS += $(XEN_LIBS)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7221)
+++ hw/xen_machine_pv.c	(revision 7222)
@@ -57,6 +57,11 @@
         exit(1);
     }
     xen_be_register("console", &xen_console_ops);
+    xen_be_register("vkbd", &xen_kbdmouse_ops);
+    xen_be_register("vfb", &xen_framebuffer_ops);
+
+    /* setup framebuffer */
+    xen_init_display(xen_domid);
 }
 
 QEMUMachine xenpv_machine = {
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7221)
+++ hw/xen_backend.h	(revision 7222)
@@ -85,5 +85,9 @@
 
 /* actual backend drivers */
 extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
+extern struct XenDevOps xen_kbdmouse_ops;     /* xen_framebuffer.c */
+extern struct XenDevOps xen_framebuffer_ops;  /* xen_framebuffer.c */
 
+void xen_init_display(int domid);
+
 #endif /* QEMU_HW_XEN_BACKEND_H */
Index: hw/xenfb.c
===================================================================
--- hw/xenfb.c	(revision 0)
+++ hw/xenfb.c	(revision 7222)
@@ -0,0 +1,1013 @@
+/*
+ *  xen paravirt framebuffer backend
+ *
+ *  Copyright IBM, Corp. 2005-2006
+ *  Copyright Red Hat, Inc. 2006-2008
+ *
+ *  Authors:
+ *       Anthony Liguori ,
+ *       Markus Armbruster ,
+ *       Daniel P. Berrange ,
+ *       Pat Campbell ,
+ *       Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "sysemu.h"
+#include "console.h"
+#include "qemu-char.h"
+#include "xen_backend.h"
+
+#ifndef BTN_LEFT
+#define BTN_LEFT 0x110 /* from  */
+#endif
+
+/* -------------------------------------------------------------------- */
+
+struct common {
+    struct XenDevice  xendev;  /* must be first */
+    void              *page;
+    DisplayState      *ds;
+};
+
+struct XenInput {
+    struct common c;
+    int abs_pointer_wanted; /* Whether guest supports absolute pointer */
+    int button_state;       /* Last seen pointer button state */
+    int extended;
+    QEMUPutMouseEntry *qmouse;
+};
+
+#define UP_QUEUE 8
+
+struct XenFB {
+    struct common     c;
+    size_t            fb_len;
+    int               row_stride;
+    int               depth;
+    int               width;
+    int               height;
+    int               offset;
+    void              *pixels;
+    int               fbpages;
+    int               feature_update;
+    int               refresh_period;
+    int               bug_trigger;
+    int               have_console;
+    int               do_resize;
+
+    struct {
+	int x,y,w,h;
+    } up_rects[UP_QUEUE];
+    int               up_count;
+    int               up_fullscreen;
+};
+
+/* -------------------------------------------------------------------- */
+
+static int common_bind(struct common *c)
+{
+    int mfn;
+
+    if (xenstore_read_fe_int(&c->xendev, "page-ref", &mfn) == -1)
+	return -1;
+    if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
+	return -1;
+
+    c->page = xc_map_foreign_range(xen_xc, c->xendev.dom,
+				   XC_PAGE_SIZE,
+				   PROT_READ | PROT_WRITE, mfn);
+    if (c->page == NULL)
+	return -1;
+
+    xen_be_bind_evtchn(&c->xendev);
+    xen_be_printf(&c->xendev, 1, "ring mfn %d, remote-port %d, local-port %d\n",
+		  mfn, c->xendev.remote_port, c->xendev.local_port);
+
+    return 0;
+}
+
+static void common_unbind(struct common *c)
+{
+    xen_be_unbind_evtchn(&c->xendev);
+    if (c->page) {
+	munmap(c->page, XC_PAGE_SIZE);
+	c->page = NULL;
+    }
+}
+
+/* -------------------------------------------------------------------- */
+
+#if 0
+/*
+ * These two tables are not needed any more, but left in here
+ * intentionally as documentation, to show how scancode2linux[]
+ * was generated.
+ *
+ * Tables to map from scancode to Linux input layer keycode.
+ * Scancodes are hardware-specific.  These maps assumes a
+ * standard AT or PS/2 keyboard which is what QEMU feeds us.
+ */
+const unsigned char atkbd_set2_keycode[512] = {
+
+     0, 67, 65, 63, 61, 59, 60, 88,  0, 68, 66, 64, 62, 15, 41,117,
+     0, 56, 42, 93, 29, 16,  2,  0,  0,  0, 44, 31, 30, 17,  3,  0,
+     0, 46, 45, 32, 18,  5,  4, 95,  0, 57, 47, 33, 20, 19,  6,183,
+     0, 49, 48, 35, 34, 21,  7,184,  0,  0, 50, 36, 22,  8,  9,185,
+     0, 51, 37, 23, 24, 11, 10,  0,  0, 52, 53, 38, 39, 25, 12,  0,
+     0, 89, 40,  0, 26, 13,  0,  0, 58, 54, 28, 27,  0, 43,  0, 85,
+     0, 86, 91, 90, 92,  0, 14, 94,  0, 79,124, 75, 71,121,  0,  0,
+    82, 83, 80, 76, 77, 72,  1, 69, 87, 78, 81, 74, 55, 73, 70, 99,
+
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    217,100,255,  0, 97,165,  0,  0,156,  0,  0,  0,  0,  0,  0,125,
+    173,114,  0,113,  0,  0,  0,126,128,  0,  0,140,  0,  0,  0,127,
+    159,  0,115,  0,164,  0,  0,116,158,  0,150,166,  0,  0,  0,142,
+    157,  0,  0,  0,  0,  0,  0,  0,155,  0, 98,  0,  0,163,  0,  0,
+    226,  0,  0,  0,  0,  0,  0,  0,  0,255, 96,  0,  0,  0,143,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,107,  0,105,102,  0,  0,112,
+    110,111,108,112,106,103,  0,119,  0,118,109,  0, 99,104,119,  0,
+
+};
+
+const unsigned char atkbd_unxlate_table[128] = {
+
+      0,118, 22, 30, 38, 37, 46, 54, 61, 62, 70, 69, 78, 85,102, 13,
+     21, 29, 36, 45, 44, 53, 60, 67, 68, 77, 84, 91, 90, 20, 28, 27,
+     35, 43, 52, 51, 59, 66, 75, 76, 82, 14, 18, 93, 26, 34, 33, 42,
+     50, 49, 58, 65, 73, 74, 89,124, 17, 41, 88,  5,  6,  4, 12,  3,
+     11,  2, 10,  1,  9,119,126,108,117,125,123,107,115,116,121,105,
+    114,122,112,113,127, 96, 97,120,  7, 15, 23, 31, 39, 47, 55, 63,
+     71, 79, 86, 94,  8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 87,111,
+     19, 25, 57, 81, 83, 92, 95, 98, 99,100,101,103,104,106,109,110
+
+};
+#endif
+
+/*
+ * for (i = 0; i < 128; i++) {
+ *     scancode2linux[i] = atkbd_set2_keycode[atkbd_unxlate_table[i]];
+ *     scancode2linux[i | 0x80] = atkbd_set2_keycode[atkbd_unxlate_table[i] | 0x80];
+ * }
+ */
+static const unsigned char scancode2linux[512] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+     80, 81, 82, 83, 99,  0, 86, 87, 88,117,  0,  0, 95,183,184,185,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     93,  0,  0, 89,  0,  0, 85, 91, 90, 92,  0, 94,  0,124,121,  0,
+
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    165,  0,  0,  0,  0,  0,  0,  0,  0,163,  0,  0, 96, 97,  0,  0,
+    113,140,164,  0,166,  0,  0,  0,  0,  0,255,  0,  0,  0,114,  0,
+    115,  0,150,  0,  0, 98,255, 99,100,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,119,119,102,103,104,  0,105,112,106,118,107,
+    108,109,110,111,  0,  0,  0,  0,  0,  0,  0,125,126,127,116,142,
+      0,  0,  0,143,  0,217,156,173,128,159,158,157,155,226,  0,112,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+/* Send an event to the keyboard frontend driver */
+static int xenfb_kbd_event(struct XenInput *xenfb,
+			   union xenkbd_in_event *event)
+{
+    struct xenkbd_page *page = xenfb->c.page;
+    uint32_t prod;
+
+    if (xenfb->c.xendev.be_state != XenbusStateConnected)
+	return 0;
+    if (!page)
+        return 0;
+
+    prod = page->in_prod;
+    if (prod - page->in_cons == XENKBD_IN_RING_LEN) {
+	errno = EAGAIN;
+	return -1;
+    }
+
+    xen_mb();		/* ensure ring space available */
+    XENKBD_IN_RING_REF(page, prod) = *event;
+    xen_wmb();		/* ensure ring contents visible */
+    page->in_prod = prod + 1;
+    return xen_be_send_notify(&xenfb->c.xendev);
+}
+
+/* Send a keyboard (or mouse button) event */
+static int xenfb_send_key(struct XenInput *xenfb, bool down, int keycode)
+{
+    union xenkbd_in_event event;
+
+    memset(&event, 0, XENKBD_IN_EVENT_SIZE);
+    event.type = XENKBD_TYPE_KEY;
+    event.key.pressed = down ? 1 : 0;
+    event.key.keycode = keycode;
+
+    return xenfb_kbd_event(xenfb, &event);
+}
+
+/* Send a relative mouse movement event */
+static int xenfb_send_motion(struct XenInput *xenfb,
+			     int rel_x, int rel_y, int rel_z)
+{
+    union xenkbd_in_event event;
+
+    memset(&event, 0, XENKBD_IN_EVENT_SIZE);
+    event.type = XENKBD_TYPE_MOTION;
+    event.motion.rel_x = rel_x;
+    event.motion.rel_y = rel_y;
+#if __XEN_LATEST_INTERFACE_VERSION__ >= 0x00030207
+    event.motion.rel_z = rel_z;
+#endif
+
+    return xenfb_kbd_event(xenfb, &event);
+}
+
+/* Send an absolute mouse movement event */
+static int xenfb_send_position(struct XenInput *xenfb,
+			       int abs_x, int abs_y, int z)
+{
+    union xenkbd_in_event event;
+
+    memset(&event, 0, XENKBD_IN_EVENT_SIZE);
+    event.type = XENKBD_TYPE_POS;
+    event.pos.abs_x = abs_x;
+    event.pos.abs_y = abs_y;
+#if __XEN_LATEST_INTERFACE_VERSION__ == 0x00030207
+    event.pos.abs_z = z;
+#endif
+#if __XEN_LATEST_INTERFACE_VERSION__ >= 0x00030208
+    event.pos.rel_z = z;
+#endif
+
+    return xenfb_kbd_event(xenfb, &event);
+}
+
+/*
+ * Send a key event from the client to the guest OS
+ * QEMU gives us a raw scancode from an AT / PS/2 style keyboard.
+ * We have to turn this into a Linux Input layer keycode.
+ *
+ * Extra complexity from the fact that with extended scancodes
+ * (like those produced by arrow keys) this method gets called
+ * twice, but we only want to send a single event. So we have to
+ * track the '0xe0' scancode state & collapse the extended keys
+ * as needed.
+ *
+ * Wish we could just send scancodes straight to the guest which
+ * already has code for dealing with this...
+ */
+static void xenfb_key_event(void *opaque, int scancode)
+{
+    struct XenInput *xenfb = opaque;
+    int down = 1;
+
+    if (scancode == 0xe0) {
+	xenfb->extended = 1;
+	return;
+    } else if (scancode & 0x80) {
+	scancode &= 0x7f;
+	down = 0;
+    }
+    if (xenfb->extended) {
+	scancode |= 0x80;
+	xenfb->extended = 0;
+    }
+    xenfb_send_key(xenfb, down, scancode2linux[scancode]);
+}
+
+/*
+ * Send a mouse event from the client to the guest OS
+ *
+ * The QEMU mouse can be in either relative, or absolute mode.
+ * Movement is sent separately from button state, which has to
+ * be encoded as virtual key events. We also don't actually get
+ * given any button up/down events, so have to track changes in
+ * the button state.
+ */
+static void xenfb_mouse_event(void *opaque,
+			      int dx, int dy, int dz, int button_state)
+{
+    struct XenInput *xenfb = opaque;
+    int dw = ds_get_width(xenfb->c.ds);
+    int dh = ds_get_height(xenfb->c.ds);
+    int i;
+
+    if (xenfb->abs_pointer_wanted)
+	xenfb_send_position(xenfb,
+			    dx * (dw - 1) / 0x7fff,
+			    dy * (dh - 1) / 0x7fff,
+			    dz);
+    else
+	xenfb_send_motion(xenfb, dx, dy, dz);
+
+    for (i = 0 ; i < 8 ; i++) {
+	int lastDown = xenfb->button_state & (1 << i);
+	int down = button_state & (1 << i);
+	if (down == lastDown)
+	    continue;
+
+	if (xenfb_send_key(xenfb, down, BTN_LEFT+i) < 0)
+	    return;
+    }
+    xenfb->button_state = button_state;
+}
+
+static int input_init(struct XenDevice *xendev)
+{
+    struct XenInput *in = container_of(xendev, struct XenInput, c.xendev);
+
+    if (!in->c.ds) {
+        xen_be_printf(xendev, 1, "ds not set (yet)\n");
+	return -1;
+    }
+
+    xenstore_write_be_int(xendev, "feature-abs-pointer", 1);
+    return 0;
+}
+
+static int input_connect(struct XenDevice *xendev)
+{
+    struct XenInput *in = container_of(xendev, struct XenInput, c.xendev);
+    int rc;
+
+    if (xenstore_read_fe_int(xendev, "request-abs-pointer",
+                             &in->abs_pointer_wanted) == -1)
+	in->abs_pointer_wanted = 0;
+
+    rc = common_bind(&in->c);
+    if (rc != 0)
+	return rc;
+
+    qemu_add_kbd_event_handler(xenfb_key_event, in);
+    in->qmouse = qemu_add_mouse_event_handler(xenfb_mouse_event, in,
+					      in->abs_pointer_wanted,
+					      "Xen PVFB Mouse");
+    return 0;
+}
+
+static void input_disconnect(struct XenDevice *xendev)
+{
+    struct XenInput *in = container_of(xendev, struct XenInput, c.xendev);
+
+    if (in->qmouse) {
+	qemu_remove_mouse_event_handler(in->qmouse);
+	in->qmouse = NULL;
+    }
+    qemu_add_kbd_event_handler(NULL, NULL);
+    common_unbind(&in->c);
+}
+
+static void input_event(struct XenDevice *xendev)
+{
+    struct XenInput *xenfb = container_of(xendev, struct XenInput, c.xendev);
+    struct xenkbd_page *page = xenfb->c.page;
+
+    /* We don't understand any keyboard events, so just ignore them. */
+    if (page->out_prod == page->out_cons)
+	return;
+    page->out_cons = page->out_prod;
+    xen_be_send_notify(&xenfb->c.xendev);
+}
+
+/* -------------------------------------------------------------------- */
+
+static void xenfb_copy_mfns(int mode, int count, unsigned long *dst, void *src)
+{
+    uint32_t *src32 = src;
+    uint64_t *src64 = src;
+    int i;
+
+    for (i = 0; i < count; i++)
+	dst[i] = (mode == 32) ? src32[i] : src64[i];
+}
+
+static int xenfb_map_fb(struct XenFB *xenfb)
+{
+    struct xenfb_page *page = xenfb->c.page;
+    char *protocol = xenfb->c.xendev.protocol;
+    int n_fbdirs;
+    unsigned long *pgmfns = NULL;
+    unsigned long *fbmfns = NULL;
+    void *map, *pd;
+    int mode, ret = -1;
+
+    /* default to native */
+    pd = page->pd;
+    mode = sizeof(unsigned long) * 8;
+
+    if (!protocol) {
+	/*
+	 * Undefined protocol, some guesswork needed.
+	 *
+	 * Old frontends which don't set the protocol use
+	 * one page directory only, thus pd[1] must be zero.
+	 * pd[1] of the 32bit struct layout and the lower
+	 * 32 bits of pd[0] of the 64bit struct layout have
+	 * the same location, so we can check that ...
+	 */
+	uint32_t *ptr32 = NULL;
+	uint32_t *ptr64 = NULL;
+#if defined(__i386__)
+	ptr32 = (void*)page->pd;
+	ptr64 = ((void*)page->pd) + 4;
+#elif defined(__x86_64__)
+	ptr32 = ((void*)page->pd) - 4;
+	ptr64 = (void*)page->pd;
+#endif
+	if (ptr32) {
+	    if (ptr32[1] == 0) {
+		mode = 32;
+		pd   = ptr32;
+	    } else {
+		mode = 64;
+		pd   = ptr64;
+	    }
+	}
+#if defined(__x86_64__)
+    } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
+	/* 64bit dom0, 32bit domU */
+	mode = 32;
+	pd   = ((void*)page->pd) - 4;
+#elif defined(__i386__)
+    } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
+	/* 32bit dom0, 64bit domU */
+	mode = 64;
+	pd   = ((void*)page->pd) + 4;
+#endif
+    }
+
+    if (xenfb->pixels) {
+        munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+        xenfb->pixels = NULL;
+    }
+
+    xenfb->fbpages = (xenfb->fb_len + (XC_PAGE_SIZE - 1)) / XC_PAGE_SIZE;
+    n_fbdirs = xenfb->fbpages * mode / 8;
+    n_fbdirs = (n_fbdirs + (XC_PAGE_SIZE - 1)) / XC_PAGE_SIZE;
+
+    pgmfns = qemu_mallocz(sizeof(unsigned long) * n_fbdirs);
+    fbmfns = qemu_mallocz(sizeof(unsigned long) * xenfb->fbpages);
+
+    xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
+    map = xc_map_foreign_pages(xen_xc, xenfb->c.xendev.dom,
+			       PROT_READ, pgmfns, n_fbdirs);
+    if (map == NULL)
+	goto out;
+    xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
+    munmap(map, n_fbdirs * XC_PAGE_SIZE);
+
+    xenfb->pixels = xc_map_foreign_pages(xen_xc, xenfb->c.xendev.dom,
+					 PROT_READ | PROT_WRITE, fbmfns, xenfb->fbpages);
+    if (xenfb->pixels == NULL)
+	goto out;
+
+    ret = 0; /* all is fine */
+
+out:
+    qemu_free(pgmfns);
+    qemu_free(fbmfns);
+    return ret;
+}
+
+static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
+			      int width, int height, int depth,
+			      size_t fb_len, int offset, int row_stride)
+{
+    size_t mfn_sz = sizeof(*((struct xenfb_page *)0)->pd);
+    size_t pd_len = sizeof(((struct xenfb_page *)0)->pd) / mfn_sz;
+    size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
+    size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+    int max_width, max_height;
+
+    if (fb_len_lim > fb_len_max) {
+	xen_be_printf(&xenfb->c.xendev, 0, "fb size limit %zu exceeds %zu, corrected\n",
+		      fb_len_lim, fb_len_max);
+	fb_len_lim = fb_len_max;
+    }
+    if (fb_len_lim && fb_len > fb_len_lim) {
+	xen_be_printf(&xenfb->c.xendev, 0, "frontend fb size %zu limited to %zu\n",
+		      fb_len, fb_len_lim);
+	fb_len = fb_len_lim;
+    }
+    if (depth != 8 && depth != 16 && depth != 24 && depth != 32) {
+	xen_be_printf(&xenfb->c.xendev, 0, "can't handle frontend fb depth %d\n",
+		      depth);
+	return -1;
+    }
+    if (row_stride <= 0 || row_stride > fb_len) {
+	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend stride %d\n", row_stride);
+	return -1;
+    }
+    max_width = row_stride / (depth / 8);
+    if (width < 0 || width > max_width) {
+	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend width %d limited to %d\n",
+		      width, max_width);
+	width = max_width;
+    }
+    if (offset < 0 || offset >= fb_len) {
+	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend offset %d (max %zu)\n",
+		      offset, fb_len - 1);
+	return -1;
+    }
+    max_height = (fb_len - offset) / row_stride;
+    if (height < 0 || height > max_height) {
+	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend height %d limited to %d\n",
+		      height, max_height);
+	height = max_height;
+    }
+    xenfb->fb_len = fb_len;
+    xenfb->row_stride = row_stride;
+    xenfb->depth = depth;
+    xenfb->width = width;
+    xenfb->height = height;
+    xenfb->offset = offset;
+    xenfb->up_fullscreen = 1;
+    xenfb->do_resize = 1;
+    xen_be_printf(&xenfb->c.xendev, 1, "framebuffer %dx%dx%d offset %d stride %d\n",
+		  width, height, depth, offset, row_stride);
+    return 0;
+}
+
+/* A convenient function for munging pixels between different depths */
+#define BLT(SRC_T,DST_T,RSB,GSB,BSB,RDB,GDB,BDB)                        \
+    for (line = y ; line < (y+h) ; line++) {				\
+	SRC_T *src = (SRC_T *)(xenfb->pixels				\
+			       + xenfb->offset				\
+			       + (line * xenfb->row_stride)		\
+			       + (x * xenfb->depth / 8));		\
+	DST_T *dst = (DST_T *)(data					\
+			       + (line * linesize)			\
+			       + (x * bpp / 8));			\
+	int col;							\
+	const int RSS = 32 - (RSB + GSB + BSB);				\
+	const int GSS = 32 - (GSB + BSB);				\
+	const int BSS = 32 - (BSB);					\
+	const uint32_t RSM = (~0U) << (32 - RSB);			\
+	const uint32_t GSM = (~0U) << (32 - GSB);			\
+	const uint32_t BSM = (~0U) << (32 - BSB);			\
+	const int RDS = 32 - (RDB + GDB + BDB);				\
+	const int GDS = 32 - (GDB + BDB);				\
+	const int BDS = 32 - (BDB);					\
+	const uint32_t RDM = (~0U) << (32 - RDB);			\
+	const uint32_t GDM = (~0U) << (32 - GDB);			\
+	const uint32_t BDM = (~0U) << (32 - BDB);			\
+	for (col = x ; col < (x+w) ; col++) {				\
+	    uint32_t spix = *src;					\
+	    *dst = (((spix << RSS) & RSM & RDM) >> RDS) |		\
+		(((spix << GSS) & GSM & GDM) >> GDS) |			\
+		(((spix << BSS) & BSM & BDM) >> BDS);			\
+	    src = (SRC_T *) ((unsigned long) src + xenfb->depth / 8);	\
+	    dst = (DST_T *) ((unsigned long) dst + bpp / 8);		\
+	}								\
+    }
+
+
+/*
+ * This copies data from the guest framebuffer region, into QEMU's
+ * displaysurface. qemu uses 16 or 32 bpp.  In case the pv framebuffer
+ * uses something else we must convert and copy, otherwise we can
+ * supply the buffer directly and no thing here.
+ */
+static void xenfb_guest_copy(struct XenFB *xenfb, int x, int y, int w, int h)
+{
+    int line, oops = 0;
+    int bpp = ds_get_bits_per_pixel(xenfb->c.ds);
+    int linesize = ds_get_linesize(xenfb->c.ds);
+    uint8_t *data = ds_get_data(xenfb->c.ds);
+
+    if (!is_buffer_shared(xenfb->c.ds->surface)) {
+        switch (xenfb->depth) {
+        case 8:
+            if (bpp == 16) {
+                BLT(uint8_t, uint16_t,   3, 3, 2,   5, 6, 5);
+            } else if (bpp == 32) {
+                BLT(uint8_t, uint32_t,   3, 3, 2,   8, 8, 8);
+            } else {
+                oops = 1;
+            }
+            break;
+        case 24:
+            if (bpp == 16) {
+                BLT(uint32_t, uint16_t,  8, 8, 8,   5, 6, 5);
+            } else if (bpp == 32) {
+                BLT(uint32_t, uint32_t,  8, 8, 8,   8, 8, 8);
+            } else {
+                oops = 1;
+            }
+            break;
+        default:
+            oops = 1;
+	}
+    }
+    if (oops) /* should not happen */
+        xen_be_printf(&xenfb->c.xendev, 0, "%s: oops: convert %d -> %d bpp?\n",
+                      __FUNCTION__, xenfb->depth, bpp);
+
+    dpy_update(xenfb->c.ds, x, y, w, h);
+}
+
+#ifdef XENFB_TYPE_REFRESH_PERIOD
+static int xenfb_queue_full(struct XenFB *xenfb)
+{
+    struct xenfb_page *page = xenfb->c.page;
+    uint32_t cons, prod;
+
+    if (!page)
+        return 1;
+
+    prod = page->in_prod;
+    cons = page->in_cons;
+    return prod - cons == XENFB_IN_RING_LEN;
+}
+
+static void xenfb_send_event(struct XenFB *xenfb, union xenfb_in_event *event)
+{
+    uint32_t prod;
+    struct xenfb_page *page = xenfb->c.page;
+
+    prod = page->in_prod;
+    /* caller ensures !xenfb_queue_full() */
+    xen_mb();                   /* ensure ring space available */
+    XENFB_IN_RING_REF(page, prod) = *event;
+    xen_wmb();                  /* ensure ring contents visible */
+    page->in_prod = prod + 1;
+
+    xen_be_send_notify(&xenfb->c.xendev);
+}
+
+static void xenfb_send_refresh_period(struct XenFB *xenfb, int period)
+{
+    union xenfb_in_event event;
+
+    memset(&event, 0, sizeof(event));
+    event.type = XENFB_TYPE_REFRESH_PERIOD;
+    event.refresh_period.period = period;
+    xenfb_send_event(xenfb, &event);
+}
+#endif
+
+/*
+ * Periodic update of display.
+ * Also transmit the refresh interval to the frontend.
+ *
+ * Never ever do any qemu display operations
+ * (resize, screen update) outside this function.
+ * Our screen might be inactive.  When asked for
+ * an update we know it is active.
+ */
+static void xenfb_update(void *opaque)
+{
+    struct XenFB *xenfb = opaque;
+    struct DisplayChangeListener *l;
+    int i;
+
+    if (xenfb->c.xendev.be_state != XenbusStateConnected)
+        return;
+
+    if (xenfb->feature_update) {
+#ifdef XENFB_TYPE_REFRESH_PERIOD
+        int period = 99999999;
+        int idle = 1;
+
+	if (xenfb_queue_full(xenfb))
+	    return;
+
+        for (l = xenfb->c.ds->listeners; l != NULL; l = l->next) {
+            if (l->idle)
+                continue;
+            idle = 0;
+            if (!l->gui_timer_interval) {
+                if (period > GUI_REFRESH_INTERVAL)
+                    period = GUI_REFRESH_INTERVAL;
+            } else {
+                if (period > l->gui_timer_interval)
+                    period = l->gui_timer_interval;
+            }
+        }
+        if (idle)
+	    period = XENFB_NO_REFRESH;
+
+	if (xenfb->refresh_period != period) {
+	    xenfb_send_refresh_period(xenfb, period);
+	    xenfb->refresh_period = period;
+            xen_be_printf(&xenfb->c.xendev, 1, "refresh period: %d\n", period);
+	}
+#else
+	; /* nothing */
+#endif
+    } else {
+	/* we don't get update notifications, thus use the
+	 * sledge hammer approach ... */
+	xenfb->up_fullscreen = 1;
+    }
+
+    /* resize if needed */
+    if (xenfb->do_resize) {
+        xenfb->do_resize = 0;
+        switch (xenfb->depth) {
+        case 16:
+        case 32:
+            /* console.c supported depth -> buffer can be used directly */
+            qemu_free_displaysurface(xenfb->c.ds);
+            xenfb->c.ds->surface = qemu_create_displaysurface_from
+                (xenfb->width, xenfb->height, xenfb->depth,
+                 xenfb->row_stride, xenfb->pixels + xenfb->offset);
+            break;
+        default:
+            /* we must convert stuff */
+            qemu_resize_displaysurface(xenfb->c.ds, xenfb->width, xenfb->height);
+            break;
+        }
+        xen_be_printf(&xenfb->c.xendev, 1, "update: resizing: %dx%d @ %d bpp%s\n",
+                      xenfb->width, xenfb->height, xenfb->depth,
+                      is_buffer_shared(xenfb->c.ds->surface) ? " (shared)" : "");
+        dpy_resize(xenfb->c.ds);
+        xenfb->up_fullscreen = 1;
+    }
+
+    /* run queued updates */
+    if (xenfb->up_fullscreen) {
+	xen_be_printf(&xenfb->c.xendev, 3, "update: fullscreen\n");
+	xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
+    } else if (xenfb->up_count) {
+	xen_be_printf(&xenfb->c.xendev, 3, "update: %d rects\n", xenfb->up_count);
+	for (i = 0; i < xenfb->up_count; i++)
+	    xenfb_guest_copy(xenfb,
+			     xenfb->up_rects[i].x,
+			     xenfb->up_rects[i].y,
+			     xenfb->up_rects[i].w,
+			     xenfb->up_rects[i].h);
+    } else {
+	xen_be_printf(&xenfb->c.xendev, 3, "update: nothing\n");
+    }
+    xenfb->up_count = 0;
+    xenfb->up_fullscreen = 0;
+}
+
+/* QEMU display state changed, so refresh the framebuffer copy */
+static void xenfb_invalidate(void *opaque)
+{
+    struct XenFB *xenfb = opaque;
+    xenfb->up_fullscreen = 1;
+}
+
+static void xenfb_handle_events(struct XenFB *xenfb)
+{
+    uint32_t prod, cons;
+    struct xenfb_page *page = xenfb->c.page;
+
+    prod = page->out_prod;
+    if (prod == page->out_cons)
+	return;
+    xen_rmb();		/* ensure we see ring contents up to prod */
+    for (cons = page->out_cons; cons != prod; cons++) {
+	union xenfb_out_event *event = &XENFB_OUT_RING_REF(page, cons);
+	int x, y, w, h;
+
+	switch (event->type) {
+	case XENFB_TYPE_UPDATE:
+	    if (xenfb->up_count == UP_QUEUE)
+		xenfb->up_fullscreen = 1;
+	    if (xenfb->up_fullscreen)
+		break;
+	    x = MAX(event->update.x, 0);
+	    y = MAX(event->update.y, 0);
+	    w = MIN(event->update.width, xenfb->width - x);
+	    h = MIN(event->update.height, xenfb->height - y);
+	    if (w < 0 || h < 0) {
+                xen_be_printf(&xenfb->c.xendev, 1, "bogus update ignored\n");
+		break;
+	    }
+	    if (x != event->update.x ||
+                y != event->update.y ||
+		w != event->update.width ||
+		h != event->update.height) {
+                xen_be_printf(&xenfb->c.xendev, 1, "bogus update clipped\n");
+	    }
+	    if (w == xenfb->width && h > xenfb->height / 2) {
+		/* scroll detector: updated more than 50% of the lines,
+		 * don't bother keeping track of the rectangles then */
+		xenfb->up_fullscreen = 1;
+	    } else {
+		xenfb->up_rects[xenfb->up_count].x = x;
+		xenfb->up_rects[xenfb->up_count].y = y;
+		xenfb->up_rects[xenfb->up_count].w = w;
+		xenfb->up_rects[xenfb->up_count].h = h;
+		xenfb->up_count++;
+	    }
+	    break;
+#ifdef XENFB_TYPE_RESIZE
+	case XENFB_TYPE_RESIZE:
+	    if (xenfb_configure_fb(xenfb, xenfb->fb_len,
+				   event->resize.width,
+				   event->resize.height,
+				   event->resize.depth,
+				   xenfb->fb_len,
+				   event->resize.offset,
+				   event->resize.stride) < 0)
+		break;
+	    xenfb_invalidate(xenfb);
+	    break;
+#endif
+	}
+    }
+    xen_mb();		/* ensure we're done with ring contents */
+    page->out_cons = cons;
+}
+
+static int fb_init(struct XenDevice *xendev)
+{
+    struct XenFB *fb = container_of(xendev, struct XenFB, c.xendev);
+
+    fb->refresh_period = -1;
+
+#ifdef XENFB_TYPE_RESIZE
+    xenstore_write_be_int(xendev, "feature-resize", 1);
+#endif
+    return 0;
+}
+
+static int fb_connect(struct XenDevice *xendev)
+{
+    struct XenFB *fb = container_of(xendev, struct XenFB, c.xendev);
+    struct xenfb_page *fb_page;
+    int videoram;
+    int rc;
+
+    if (xenstore_read_fe_int(xendev, "videoram", &videoram) == -1)
+	videoram = 0;
+
+    rc = common_bind(&fb->c);
+    if (rc != 0)
+	return rc;
+
+    fb_page = fb->c.page;
+    rc = xenfb_configure_fb(fb, videoram * 1024 * 1024U,
+			    fb_page->width, fb_page->height, fb_page->depth,
+			    fb_page->mem_length, 0, fb_page->line_length);
+    if (rc != 0)
+	return rc;
+
+    rc = xenfb_map_fb(fb);
+    if (rc != 0)
+	return rc;
+
+#if 0  /* handled in xen_init_display() for now */
+    if (!fb->have_console) {
+        fb->c.ds = graphic_console_init(xenfb_update,
+                                        xenfb_invalidate,
+                                        NULL,
+                                        NULL,
+                                        fb);
+        fb->have_console = 1;
+    }
+#endif
+
+    if (xenstore_read_fe_int(xendev, "feature-update", &fb->feature_update) == -1)
+	fb->feature_update = 0;
+    if (fb->feature_update)
+	xenstore_write_be_int(xendev, "request-update", 1);
+
+    xen_be_printf(xendev, 1, "feature-update=%d, videoram=%d\n",
+		  fb->feature_update, videoram);
+    return 0;
+}
+
+static void fb_disconnect(struct XenDevice *xendev)
+{
+    struct XenFB *fb = container_of(xendev, struct XenFB, c.xendev);
+
+    /*
+     * FIXME: qemu can't un-init gfx display (yet?).
+     *   Replacing the framebuffer with anonymous shared memory
+     *   instead.  This releases the guest pages and keeps qemu happy.
+     */
+    fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
+                      -1, 0);
+    common_unbind(&fb->c);
+    fb->feature_update = 0;
+    fb->bug_trigger    = 0;
+}
+
+static void fb_frontend_changed(struct XenDevice *xendev, const char *node)
+{
+    struct XenFB *fb = container_of(xendev, struct XenFB, c.xendev);
+
+    /*
+     * Set state to Connected *again* once the frontend switched
+     * to connected.  We must trigger the watch a second time to
+     * workaround a frontend bug.
+     */
+    if (fb->bug_trigger == 0 && strcmp(node, "state") == 0 &&
+        xendev->fe_state == XenbusStateConnected &&
+        xendev->be_state == XenbusStateConnected) {
+        xen_be_printf(xendev, 2, "re-trigger connected (frontend bug)\n");
+        xen_be_set_state(xendev, XenbusStateConnected);
+        fb->bug_trigger = 1; /* only once */
+    }
+}
+
+static void fb_event(struct XenDevice *xendev)
+{
+    struct XenFB *xenfb = container_of(xendev, struct XenFB, c.xendev);
+
+    xenfb_handle_events(xenfb);
+    xen_be_send_notify(&xenfb->c.xendev);
+}
+
+/* -------------------------------------------------------------------- */
+
+struct XenDevOps xen_kbdmouse_ops = {
+    .size       = sizeof(struct XenInput),
+    .init       = input_init,
+    .connect    = input_connect,
+    .disconnect = input_disconnect,
+    .event      = input_event,
+};
+
+struct XenDevOps xen_framebuffer_ops = {
+    .size       = sizeof(struct XenFB),
+    .init       = fb_init,
+    .connect    = fb_connect,
+    .disconnect = fb_disconnect,
+    .event      = fb_event,
+    .frontend_changed = fb_frontend_changed,
+};
+
+/*
+ * FIXME/TODO: Kill this.
+ * Temporary needed while DisplayState reorganization is in flight.
+ */
+void xen_init_display(int domid)
+{
+    struct XenDevice *xfb, *xin;
+    struct XenFB *fb;
+    struct XenInput *in;
+    int i = 0;
+
+wait_more:
+    i++;
+    main_loop_wait(10); /* miliseconds */
+    xfb = xen_be_find_xendev("vfb", domid, 0);
+    xin = xen_be_find_xendev("vkbd", domid, 0);
+    if (!xfb || !xin) {
+        if (i < 256)
+            goto wait_more;
+        xen_be_printf(NULL, 1, "displaystate setup failed\n");
+        return;
+    }
+
+    /* vfb */
+    fb = container_of(xfb, struct XenFB, c.xendev);
+    fb->c.ds = graphic_console_init(xenfb_update,
+                                    xenfb_invalidate,
+                                    NULL,
+                                    NULL,
+                                    fb);
+    fb->have_console = 1;
+
+    /* vkbd */
+    in = container_of(xin, struct XenInput, c.xendev);
+    in->c.ds = fb->c.ds;
+
+    /* retry ->init() */
+    xen_be_check_state(xin);
+    xen_be_check_state(xfb);
+}

 ------------------------------------------------------------------------
r7221 | aliguori | 2009-04-22 10:19:19 -0500 (Wed, 22 Apr 2009) | 9 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/xen_backend.h
   A /trunk/hw/xen_console.c
   M /trunk/hw/xen_machine_pv.c

xen: add console backend driver. (Gerd Hoffmann)

This patch adds a xenconsole backend driver.  It it based on current
xen-unstable code.  It has been changed to make use of the common
backend driver code.

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7220)
+++ Makefile.target	(revision 7221)
@@ -562,6 +562,7 @@
 
 # xen backend driver support
 XEN_OBJS := xen_machine_pv.o xen_backend.o
+XEN_OBJS += xen_console.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
   LIBS += $(XEN_LIBS)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7220)
+++ hw/xen_machine_pv.c	(revision 7221)
@@ -56,6 +56,7 @@
         fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
         exit(1);
     }
+    xen_be_register("console", &xen_console_ops);
 }
 
 QEMUMachine xenpv_machine = {
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 7220)
+++ hw/xen_backend.h	(revision 7221)
@@ -83,4 +83,7 @@
 void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
     __attribute__ ((format(printf, 3, 4)));
 
+/* actual backend drivers */
+extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
+
 #endif /* QEMU_HW_XEN_BACKEND_H */
Index: hw/xen_console.c
===================================================================
--- hw/xen_console.c	(revision 0)
+++ hw/xen_console.c	(revision 7221)
@@ -0,0 +1,270 @@
+/*
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori 
+ *
+ *  Copyright (C) Red Hat 2007
+ *
+ *  Xen Console
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "sysemu.h"
+#include "qemu-char.h"
+#include "xen_backend.h"
+
+struct buffer {
+    uint8_t *data;
+    size_t consumed;
+    size_t size;
+    size_t capacity;
+    size_t max_capacity;
+};
+
+struct XenConsole {
+    struct XenDevice  xendev;  /* must be first */
+    struct buffer     buffer;
+    char              console[XEN_BUFSIZE];
+    int               ring_ref;
+    void              *sring;
+    CharDriverState   *chr;
+    int               backlog;
+};
+
+static void buffer_append(struct XenConsole *con)
+{
+    struct buffer *buffer = &con->buffer;
+    XENCONS_RING_IDX cons, prod, size;
+    struct xencons_interface *intf = con->sring;
+
+    cons = intf->out_cons;
+    prod = intf->out_prod;
+    xen_mb();
+
+    size = prod - cons;
+    if ((size == 0) || (size > sizeof(intf->out)))
+	return;
+
+    if ((buffer->capacity - buffer->size) < size) {
+	buffer->capacity += (size + 1024);
+	buffer->data = qemu_realloc(buffer->data, buffer->capacity);
+    }
+
+    while (cons != prod)
+	buffer->data[buffer->size++] = intf->out[
+	    MASK_XENCONS_IDX(cons++, intf->out)];
+
+    xen_mb();
+    intf->out_cons = cons;
+    xen_be_send_notify(&con->xendev);
+
+    if (buffer->max_capacity &&
+	buffer->size > buffer->max_capacity) {
+	/* Discard the middle of the data. */
+
+	size_t over = buffer->size - buffer->max_capacity;
+	uint8_t *maxpos = buffer->data + buffer->max_capacity;
+
+	memmove(maxpos - over, maxpos, over);
+	buffer->data = qemu_realloc(buffer->data, buffer->max_capacity);
+	buffer->size = buffer->capacity = buffer->max_capacity;
+
+	if (buffer->consumed > buffer->max_capacity - over)
+	    buffer->consumed = buffer->max_capacity - over;
+    }
+}
+
+static void buffer_advance(struct buffer *buffer, size_t len)
+{
+    buffer->consumed += len;
+    if (buffer->consumed == buffer->size) {
+	buffer->consumed = 0;
+	buffer->size = 0;
+    }
+}
+
+static int ring_free_bytes(struct XenConsole *con)
+{
+    struct xencons_interface *intf = con->sring;
+    XENCONS_RING_IDX cons, prod, space;
+
+    cons = intf->in_cons;
+    prod = intf->in_prod;
+    xen_mb();
+
+    space = prod - cons;
+    if (space > sizeof(intf->in))
+	return 0; /* ring is screwed: ignore it */
+
+    return (sizeof(intf->in) - space);
+}
+
+static int xencons_can_receive(void *opaque)
+{
+    struct XenConsole *con = opaque;
+    return ring_free_bytes(con);
+}
+
+static void xencons_receive(void *opaque, const uint8_t *buf, int len)
+{
+    struct XenConsole *con = opaque;
+    struct xencons_interface *intf = con->sring;
+    XENCONS_RING_IDX prod;
+    int i, max;
+
+    max = ring_free_bytes(con);
+    /* The can_receive() func limits this, but check again anyway */
+    if (max < len)
+	len = max;
+
+    prod = intf->in_prod;
+    for (i = 0; i < len; i++) {
+	intf->in[MASK_XENCONS_IDX(prod++, intf->in)] =
+	    buf[i];
+    }
+    xen_wmb();
+    intf->in_prod = prod;
+    xen_be_send_notify(&con->xendev);
+}
+
+static void xencons_send(struct XenConsole *con)
+{
+    ssize_t len, size;
+
+    size = con->buffer.size - con->buffer.consumed;
+    if (con->chr)
+        len = qemu_chr_write(con->chr, con->buffer.data + con->buffer.consumed,
+                             size);
+    else
+        len = size;
+    if (len < 1) {
+	if (!con->backlog) {
+	    con->backlog = 1;
+	    xen_be_printf(&con->xendev, 1, "backlog piling up, nobody listening?\n");
+	}
+    } else {
+	buffer_advance(&con->buffer, len);
+	if (con->backlog && len == size) {
+	    con->backlog = 0;
+	    xen_be_printf(&con->xendev, 1, "backlog is gone\n");
+	}
+    }
+}
+
+/* -------------------------------------------------------------------- */
+
+static int con_init(struct XenDevice *xendev)
+{
+    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+    char *type, *dom;
+
+    /* setup */
+    dom = xs_get_domain_path(xenstore, con->xendev.dom);
+    snprintf(con->console, sizeof(con->console), "%s/console", dom);
+    free(dom);
+
+    type = xenstore_read_str(con->console, "type");
+    if (!type || 0 != strcmp(type, "ioemu")) {
+	xen_be_printf(xendev, 1, "not for me (type=%s)\n", type);
+	return -1;
+    }
+
+    if (!serial_hds[con->xendev.dev])
+	xen_be_printf(xendev, 1, "WARNING: serial line %d not configured\n",
+                      con->xendev.dev);
+    else
+        con->chr = serial_hds[con->xendev.dev];
+
+    return 0;
+}
+
+static int con_connect(struct XenDevice *xendev)
+{
+    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+    int limit;
+
+    if (xenstore_read_int(con->console, "ring-ref", &con->ring_ref) == -1)
+	return -1;
+    if (xenstore_read_int(con->console, "port", &con->xendev.remote_port) == -1)
+	return -1;
+    if (xenstore_read_int(con->console, "limit", &limit) == 0)
+	con->buffer.max_capacity = limit;
+
+    con->sring = xc_map_foreign_range(xen_xc, con->xendev.dom,
+				      XC_PAGE_SIZE,
+				      PROT_READ|PROT_WRITE,
+				      con->ring_ref);
+    if (!con->sring)
+	return -1;
+
+    xen_be_bind_evtchn(&con->xendev);
+    if (con->chr)
+        qemu_chr_add_handlers(con->chr, xencons_can_receive, xencons_receive,
+                              NULL, con);
+
+    xen_be_printf(xendev, 1, "ring mfn %d, remote port %d, local port %d, limit %zd\n",
+		  con->ring_ref,
+		  con->xendev.remote_port,
+		  con->xendev.local_port,
+		  con->buffer.max_capacity);
+    return 0;
+}
+
+static void con_disconnect(struct XenDevice *xendev)
+{
+    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+
+    if (con->chr)
+        qemu_chr_add_handlers(con->chr, NULL, NULL, NULL, NULL);
+    xen_be_unbind_evtchn(&con->xendev);
+
+    if (con->sring) {
+	munmap(con->sring, XC_PAGE_SIZE);
+	con->sring = NULL;
+    }
+}
+
+static void con_event(struct XenDevice *xendev)
+{
+    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+
+    buffer_append(con);
+    if (con->buffer.size - con->buffer.consumed)
+	xencons_send(con);
+}
+
+/* -------------------------------------------------------------------- */
+
+struct XenDevOps xen_console_ops = {
+    .size       = sizeof(struct XenConsole),
+    .flags      = DEVOPS_FLAG_IGNORE_STATE,
+    .init       = con_init,
+    .connect    = con_connect,
+    .event      = con_event,
+    .disconnect = con_disconnect,
+};

 ------------------------------------------------------------------------
r7220 | aliguori | 2009-04-22 10:19:15 -0500 (Wed, 22 Apr 2009) | 11 lines
Changed paths:
   M /trunk/Makefile.target
   A /trunk/hw/xen_backend.c
   A /trunk/hw/xen_backend.h
   A /trunk/hw/xen_common.h
   M /trunk/hw/xen_machine_pv.c

xen: backend driver core (Gerd Hoffmann)

This patch adds infrastructure for xen backend drivers living in qemu,
so drivers don't need to implement common stuff on their own.  It's
mostly xenbus management stuff: some functions to access xentore,
setting up xenstore watches, callbacks on device discovery and state
changes, handle event channel, ...

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7219)
+++ Makefile.target	(revision 7220)
@@ -561,7 +561,7 @@
 endif
 
 # xen backend driver support
-XEN_OBJS := xen_machine_pv.o
+XEN_OBJS := xen_machine_pv.o xen_backend.o
 ifeq ($(CONFIG_XEN), yes)
   OBJS += $(XEN_OBJS)
   LIBS += $(XEN_LIBS)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 7219)
+++ hw/xen_machine_pv.c	(revision 7220)
@@ -26,7 +26,7 @@
 #include "pc.h"
 #include "sysemu.h"
 #include "boards.h"
-#include "xen.h"
+#include "xen_backend.h"
 
 uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_EMULATE;
@@ -50,6 +50,12 @@
     }
     env = cpu_init(cpu_model);
     env->halted = 1;
+
+    /* Initialize backend core & drivers */
+    if (xen_be_init() != 0) {
+        fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
+        exit(1);
+    }
 }
 
 QEMUMachine xenpv_machine = {
Index: hw/xen_common.h
===================================================================
--- hw/xen_common.h	(revision 0)
+++ hw/xen_common.h	(revision 7220)
@@ -0,0 +1,34 @@
+#ifndef QEMU_HW_XEN_COMMON_H
+#define QEMU_HW_XEN_COMMON_H 1
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "xen.h"
+#include "sys-queue.h"   /* BSD list implementation */
+
+/*
+ * tweaks needed to build with different xen versions
+ *  0x00030205 -> 3.1.0
+ *  0x00030207 -> 3.2.0
+ *  0x00030208 -> unstable
+ */
+#include 
+#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030205
+# define evtchn_port_or_error_t int
+#endif
+#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030207
+# define xc_map_foreign_pages xc_map_foreign_batch
+#endif
+#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030208
+# define xen_mb()  mb()
+# define xen_rmb() rmb()
+# define xen_wmb() wmb()
+#endif
+
+#endif /* QEMU_HW_XEN_COMMON_H */
Index: hw/xen_backend.c
===================================================================
--- hw/xen_backend.c	(revision 0)
+++ hw/xen_backend.c	(revision 7220)
@@ -0,0 +1,713 @@
+/*
+ *  xen backend driver infrastructure
+ *  (c) 2008 Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * TODO: add some xenbus / xenstore concepts overview here.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "hw.h"
+#include "qemu-char.h"
+#include "xen_backend.h"
+
+/* ------------------------------------------------------------- */
+
+/* public */
+int xen_xc;
+struct xs_handle *xenstore = NULL;
+
+/* private */
+static TAILQ_HEAD(XenDeviceHead, XenDevice) xendevs = TAILQ_HEAD_INITIALIZER(xendevs);
+static int debug = 0;
+
+/* ------------------------------------------------------------- */
+
+int xenstore_write_str(const char *base, const char *node, const char *val)
+{
+    char abspath[XEN_BUFSIZE];
+
+    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+    if (!xs_write(xenstore, 0, abspath, val, strlen(val)))
+	return -1;
+    return 0;
+}
+
+char *xenstore_read_str(const char *base, const char *node)
+{
+    char abspath[XEN_BUFSIZE];
+    unsigned int len;
+    char *str, *ret = NULL;
+
+    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+    str = xs_read(xenstore, 0, abspath, &len);
+    if (str != NULL) {
+        /* move to qemu-allocated memory to make sure
+         * callers can savely qemu_free() stuff. */
+        ret = qemu_strdup(str);
+        free(str);
+    }
+    return ret;
+}
+
+int xenstore_write_int(const char *base, const char *node, int ival)
+{
+    char val[32];
+
+    snprintf(val, sizeof(val), "%d", ival);
+    return xenstore_write_str(base, node, val);
+}
+
+int xenstore_read_int(const char *base, const char *node, int *ival)
+{
+    char *val;
+    int rc = -1;
+
+    val = xenstore_read_str(base, node);
+    if (val && 1 == sscanf(val, "%d", ival))
+	rc = 0;
+    qemu_free(val);
+    return rc;
+}
+
+int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val)
+{
+    return xenstore_write_str(xendev->be, node, val);
+}
+
+int xenstore_write_be_int(struct XenDevice *xendev, const char *node, int ival)
+{
+    return xenstore_write_int(xendev->be, node, ival);
+}
+
+char *xenstore_read_be_str(struct XenDevice *xendev, const char *node)
+{
+    return xenstore_read_str(xendev->be, node);
+}
+
+int xenstore_read_be_int(struct XenDevice *xendev, const char *node, int *ival)
+{
+    return xenstore_read_int(xendev->be, node, ival);
+}
+
+char *xenstore_read_fe_str(struct XenDevice *xendev, const char *node)
+{
+    return xenstore_read_str(xendev->fe, node);
+}
+
+int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival)
+{
+    return xenstore_read_int(xendev->fe, node, ival);
+}
+
+/* ------------------------------------------------------------- */
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+	static const char *const name[] = {
+		[ XenbusStateUnknown      ] = "Unknown",
+		[ XenbusStateInitialising ] = "Initialising",
+		[ XenbusStateInitWait     ] = "InitWait",
+		[ XenbusStateInitialised  ] = "Initialised",
+		[ XenbusStateConnected    ] = "Connected",
+		[ XenbusStateClosing      ] = "Closing",
+		[ XenbusStateClosed	  ] = "Closed",
+	};
+	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
+int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state)
+{
+    int rc;
+
+    rc = xenstore_write_be_int(xendev, "state", state);
+    if (rc < 0)
+	return rc;
+    xen_be_printf(xendev, 1, "backend state: %s -> %s\n",
+		  xenbus_strstate(xendev->be_state), xenbus_strstate(state));
+    xendev->be_state = state;
+    return 0;
+}
+
+/* ------------------------------------------------------------- */
+
+struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev)
+{
+    struct XenDevice *xendev;
+
+    TAILQ_FOREACH(xendev, &xendevs, next) {
+	if (xendev->dom != dom)
+	    continue;
+	if (xendev->dev != dev)
+	    continue;
+	if (strcmp(xendev->type, type) != 0)
+	    continue;
+	return xendev;
+    }
+    return NULL;
+}
+
+/*
+ * get xen backend device, allocate a new one if it doesn't exist.
+ */
+static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
+                                           struct XenDevOps *ops)
+{
+    struct XenDevice *xendev;
+    char *dom0;
+
+    xendev = xen_be_find_xendev(type, dom, dev);
+    if (xendev)
+	return xendev;
+
+    /* init new xendev */
+    xendev = qemu_mallocz(ops->size);
+    xendev->type  = type;
+    xendev->dom   = dom;
+    xendev->dev   = dev;
+    xendev->ops   = ops;
+
+    dom0 = xs_get_domain_path(xenstore, 0);
+    snprintf(xendev->be, sizeof(xendev->be), "%s/backend/%s/%d/%d",
+	     dom0, xendev->type, xendev->dom, xendev->dev);
+    snprintf(xendev->name, sizeof(xendev->name), "%s-%d",
+	     xendev->type, xendev->dev);
+    free(dom0);
+
+    xendev->debug      = debug;
+    xendev->local_port = -1;
+
+    xendev->evtchndev = xc_evtchn_open();
+    if (xendev->evtchndev < 0) {
+	xen_be_printf(NULL, 0, "can't open evtchn device\n");
+	qemu_free(xendev);
+	return NULL;
+    }
+    fcntl(xc_evtchn_fd(xendev->evtchndev), F_SETFD, FD_CLOEXEC);
+
+    if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
+	xendev->gnttabdev = xc_gnttab_open();
+	if (xendev->gnttabdev < 0) {
+	    xen_be_printf(NULL, 0, "can't open gnttab device\n");
+	    xc_evtchn_close(xendev->evtchndev);
+	    qemu_free(xendev);
+	    return NULL;
+	}
+    } else {
+	xendev->gnttabdev = -1;
+    }
+
+    TAILQ_INSERT_TAIL(&xendevs, xendev, next);
+
+    if (xendev->ops->alloc)
+	xendev->ops->alloc(xendev);
+
+    return xendev;
+}
+
+/*
+ * release xen backend device.
+ */
+static struct XenDevice *xen_be_del_xendev(int dom, int dev)
+{
+    struct XenDevice *xendev, *xnext;
+
+    /*
+     * This is pretty much like TAILQ_FOREACH(xendev, &xendevs, next) but
+     * we save the next pointer in xnext because we might free xendev.
+     */
+    xnext = xendevs.tqh_first;
+    while (xnext) {
+        xendev = xnext;
+        xnext = xendev->next.tqe_next;
+
+	if (xendev->dom != dom)
+	    continue;
+	if (xendev->dev != dev && dev != -1)
+	    continue;
+
+	if (xendev->ops->free)
+	    xendev->ops->free(xendev);
+
+	if (xendev->fe) {
+	    char token[XEN_BUFSIZE];
+	    snprintf(token, sizeof(token), "fe:%p", xendev);
+	    xs_unwatch(xenstore, xendev->fe, token);
+	    qemu_free(xendev->fe);
+	}
+
+	if (xendev->evtchndev >= 0)
+	    xc_evtchn_close(xendev->evtchndev);
+	if (xendev->gnttabdev >= 0)
+	    xc_gnttab_close(xendev->gnttabdev);
+
+	TAILQ_REMOVE(&xendevs, xendev, next);
+	qemu_free(xendev);
+    }
+    return NULL;
+}
+
+/*
+ * Sync internal data structures on xenstore updates.
+ * Node specifies the changed field.  node = NULL means
+ * update all fields (used for initialization).
+ */
+static void xen_be_backend_changed(struct XenDevice *xendev, const char *node)
+{
+    if (node == NULL  ||  strcmp(node, "online") == 0) {
+	if (xenstore_read_be_int(xendev, "online", &xendev->online) == -1)
+	    xendev->online = 0;
+    }
+
+    if (node) {
+	xen_be_printf(xendev, 2, "backend update: %s\n", node);
+	if (xendev->ops->backend_changed)
+	    xendev->ops->backend_changed(xendev, node);
+    }
+}
+
+static void xen_be_frontend_changed(struct XenDevice *xendev, const char *node)
+{
+    int fe_state;
+
+    if (node == NULL  ||  strcmp(node, "state") == 0) {
+	if (xenstore_read_fe_int(xendev, "state", &fe_state) == -1)
+	    fe_state = XenbusStateUnknown;
+	if (xendev->fe_state != fe_state)
+	    xen_be_printf(xendev, 1, "frontend state: %s -> %s\n",
+			  xenbus_strstate(xendev->fe_state),
+			  xenbus_strstate(fe_state));
+	xendev->fe_state = fe_state;
+    }
+    if (node == NULL  ||  strcmp(node, "protocol") == 0) {
+	qemu_free(xendev->protocol);
+	xendev->protocol = xenstore_read_fe_str(xendev, "protocol");
+	if (xendev->protocol)
+	    xen_be_printf(xendev, 1, "frontend protocol: %s\n", xendev->protocol);
+    }
+
+    if (node) {
+	xen_be_printf(xendev, 2, "frontend update: %s\n", node);
+	if (xendev->ops->frontend_changed)
+	    xendev->ops->frontend_changed(xendev, node);
+    }
+}
+
+/* ------------------------------------------------------------- */
+/* Check for possible state transitions and perform them.        */
+
+/*
+ * Initial xendev setup.  Read frontend path, register watch for it.
+ * Should succeed once xend finished setting up the backend device.
+ *
+ * Also sets initial state (-> Initializing) when done.  Which
+ * only affects the xendev->be_state variable as xenbus should
+ * already be put into that state by xend.
+ */
+static int xen_be_try_setup(struct XenDevice *xendev)
+{
+    char token[XEN_BUFSIZE];
+    int be_state;
+
+    if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
+	xen_be_printf(xendev, 0, "reading backend state failed\n");
+	return -1;
+    }
+
+    if (be_state != XenbusStateInitialising) {
+	xen_be_printf(xendev, 0, "initial backend state is wrong (%s)\n",
+		      xenbus_strstate(be_state));
+	return -1;
+    }
+
+    xendev->fe = xenstore_read_be_str(xendev, "frontend");
+    if (xendev->fe == NULL) {
+	xen_be_printf(xendev, 0, "reading frontend path failed\n");
+	return -1;
+    }
+
+    /* setup frontend watch */
+    snprintf(token, sizeof(token), "fe:%p", xendev);
+    if (!xs_watch(xenstore, xendev->fe, token)) {
+	xen_be_printf(xendev, 0, "watching frontend path (%s) failed\n",
+		      xendev->fe);
+	return -1;
+    }
+    xen_be_set_state(xendev, XenbusStateInitialising);
+
+    xen_be_backend_changed(xendev, NULL);
+    xen_be_frontend_changed(xendev, NULL);
+    return 0;
+}
+
+/*
+ * Try initialize xendev.  Prepare everything the backend can do
+ * without synchronizing with the frontend.  Fakes hotplug-status.  No
+ * hotplug involved here because this is about userspace drivers, thus
+ * there are kernel backend devices which could invoke hotplug.
+ *
+ * Goes to InitWait on success.
+ */
+static int xen_be_try_init(struct XenDevice *xendev)
+{
+    int rc = 0;
+
+    if (!xendev->online) {
+	xen_be_printf(xendev, 1, "not online\n");
+	return -1;
+    }
+
+    if (xendev->ops->init)
+	rc = xendev->ops->init(xendev);
+    if (rc != 0) {
+	xen_be_printf(xendev, 1, "init() failed\n");
+	return rc;
+    }
+
+    xenstore_write_be_str(xendev, "hotplug-status", "connected");
+    xen_be_set_state(xendev, XenbusStateInitWait);
+    return 0;
+}
+
+/*
+ * Try to connect xendev.  Depends on the frontend being ready
+ * for it (shared ring and evtchn info in xenstore, state being
+ * Initialised or Connected).
+ *
+ * Goes to Connected on success.
+ */
+static int xen_be_try_connect(struct XenDevice *xendev)
+{
+    int rc = 0;
+
+    if (xendev->fe_state != XenbusStateInitialised  &&
+	xendev->fe_state != XenbusStateConnected) {
+	if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) {
+	    xen_be_printf(xendev, 2, "frontend not ready, ignoring\n");
+	} else {
+	    xen_be_printf(xendev, 2, "frontend not ready (yet)\n");
+	    return -1;
+	}
+    }
+
+    if (xendev->ops->connect)
+	rc = xendev->ops->connect(xendev);
+    if (rc != 0) {
+	xen_be_printf(xendev, 0, "connect() failed\n");
+	return rc;
+    }
+
+    xen_be_set_state(xendev, XenbusStateConnected);
+    return 0;
+}
+
+/*
+ * Teardown connection.
+ *
+ * Goes to Closed when done.
+ */
+static void xen_be_disconnect(struct XenDevice *xendev, enum xenbus_state state)
+{
+    if (xendev->be_state != XenbusStateClosing &&
+        xendev->be_state != XenbusStateClosed  &&
+        xendev->ops->disconnect)
+	xendev->ops->disconnect(xendev);
+    if (xendev->be_state != state)
+        xen_be_set_state(xendev, state);
+}
+
+/*
+ * Try to reset xendev, for reconnection by another frontend instance.
+ */
+static int xen_be_try_reset(struct XenDevice *xendev)
+{
+    if (xendev->fe_state != XenbusStateInitialising)
+        return -1;
+
+    xen_be_printf(xendev, 1, "device reset (for re-connect)\n");
+    xen_be_set_state(xendev, XenbusStateInitialising);
+    return 0;
+}
+
+/*
+ * state change dispatcher function
+ */
+void xen_be_check_state(struct XenDevice *xendev)
+{
+    int rc = 0;
+
+    /* frontend may request shutdown from almost anywhere */
+    if (xendev->fe_state == XenbusStateClosing ||
+	xendev->fe_state == XenbusStateClosed) {
+	xen_be_disconnect(xendev, xendev->fe_state);
+	return;
+    }
+
+    /* check for possible backend state transitions */
+    for (;;) {
+	switch (xendev->be_state) {
+	case XenbusStateUnknown:
+	    rc = xen_be_try_setup(xendev);
+	    break;
+	case XenbusStateInitialising:
+	    rc = xen_be_try_init(xendev);
+	    break;
+	case XenbusStateInitWait:
+	    rc = xen_be_try_connect(xendev);
+	    break;
+        case XenbusStateClosed:
+            rc = xen_be_try_reset(xendev);
+            break;
+	default:
+	    rc = -1;
+	}
+	if (rc != 0)
+	    break;
+    }
+}
+
+/* ------------------------------------------------------------- */
+
+static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
+{
+    struct XenDevice *xendev;
+    char path[XEN_BUFSIZE], token[XEN_BUFSIZE];
+    char **dev = NULL, *dom0;
+    unsigned int cdev, j;
+
+    /* setup watch */
+    dom0 = xs_get_domain_path(xenstore, 0);
+    snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
+    snprintf(path, sizeof(path), "%s/backend/%s/%d", dom0, type, dom);
+    free(dom0);
+    if (!xs_watch(xenstore, path, token)) {
+	xen_be_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", path);
+	return -1;
+    }
+
+    /* look for backends */
+    dev = xs_directory(xenstore, 0, path, &cdev);
+    if (!dev)
+	return 0;
+    for (j = 0; j < cdev; j++) {
+	xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
+	if (xendev == NULL)
+	    continue;
+	xen_be_check_state(xendev);
+    }
+    free(dev);
+    return 0;
+}
+
+static void xenstore_update_be(char *watch, char *type, int dom,
+			       struct XenDevOps *ops)
+{
+    struct XenDevice *xendev;
+    char path[XEN_BUFSIZE], *dom0;
+    unsigned int len, dev;
+
+    dom0 = xs_get_domain_path(xenstore, 0);
+    len = snprintf(path, sizeof(path), "%s/backend/%s/%d", dom0, type, dom);
+    free(dom0);
+    if (strncmp(path, watch, len) != 0)
+	return;
+    if (sscanf(watch+len, "/%u/%255s", &dev, path) != 2) {
+	strcpy(path, "");
+	if (sscanf(watch+len, "/%u", &dev) != 1)
+	    dev = -1;
+    }
+    if (dev == -1)
+	return;
+
+    if (0) {
+	/* FIXME: detect devices being deleted from xenstore ... */
+	xen_be_del_xendev(dom, dev);
+    }
+
+    xendev = xen_be_get_xendev(type, dom, dev, ops);
+    if (xendev != NULL) {
+	xen_be_backend_changed(xendev, path);
+	xen_be_check_state(xendev);
+    }
+}
+
+static void xenstore_update_fe(char *watch, struct XenDevice *xendev)
+{
+    char *node;
+    unsigned int len;
+
+    len = strlen(xendev->fe);
+    if (strncmp(xendev->fe, watch, len) != 0)
+	return;
+    if (watch[len] != '/')
+	return;
+    node = watch + len + 1;
+
+    xen_be_frontend_changed(xendev, node);
+    xen_be_check_state(xendev);
+}
+
+static void xenstore_update(void *unused)
+{
+    char **vec = NULL;
+    intptr_t type, ops, ptr;
+    unsigned int dom, count;
+
+    vec = xs_read_watch(xenstore, &count);
+    if (vec == NULL)
+	goto cleanup;
+
+    if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
+               &type, &dom, &ops) == 3)
+	xenstore_update_be(vec[XS_WATCH_PATH], (void*)type, dom, (void*)ops);
+    if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1)
+	xenstore_update_fe(vec[XS_WATCH_PATH], (void*)ptr);
+
+cleanup:
+    qemu_free(vec);
+}
+
+static void xen_be_evtchn_event(void *opaque)
+{
+    struct XenDevice *xendev = opaque;
+    evtchn_port_t port;
+
+    port = xc_evtchn_pending(xendev->evtchndev);
+    if (port != xendev->local_port) {
+	xen_be_printf(xendev, 0, "xc_evtchn_pending returned %d (expected %d)\n",
+		      port, xendev->local_port);
+	return;
+    }
+    xc_evtchn_unmask(xendev->evtchndev, port);
+
+    if (xendev->ops->event)
+	xendev->ops->event(xendev);
+}
+
+/* -------------------------------------------------------------------- */
+
+int xen_be_init(void)
+{
+    xenstore = xs_daemon_open();
+    if (!xenstore) {
+	xen_be_printf(NULL, 0, "can't connect to xenstored\n");
+	return -1;
+    }
+
+    if (qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL) < 0)
+	goto err;
+
+    xen_xc = xc_interface_open();
+    if (xen_xc == -1) {
+	xen_be_printf(NULL, 0, "can't open xen interface\n");
+	goto err;
+    }
+    return 0;
+
+err:
+    qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL);
+    xs_daemon_close(xenstore);
+    xenstore = NULL;
+
+    return -1;
+}
+
+int xen_be_register(const char *type, struct XenDevOps *ops)
+{
+    return xenstore_scan(type, xen_domid, ops);
+}
+
+int xen_be_bind_evtchn(struct XenDevice *xendev)
+{
+    if (xendev->local_port != -1)
+	return 0;
+    xendev->local_port = xc_evtchn_bind_interdomain
+	(xendev->evtchndev, xendev->dom, xendev->remote_port);
+    if (xendev->local_port == -1) {
+	xen_be_printf(xendev, 0, "xc_evtchn_bind_interdomain failed\n");
+	return -1;
+    }
+    xen_be_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
+    qemu_set_fd_handler(xc_evtchn_fd(xendev->evtchndev),
+			xen_be_evtchn_event, NULL, xendev);
+    return 0;
+}
+
+void xen_be_unbind_evtchn(struct XenDevice *xendev)
+{
+    if (xendev->local_port == -1)
+	return;
+    qemu_set_fd_handler(xc_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+    xc_evtchn_unbind(xendev->evtchndev, xendev->local_port);
+    xen_be_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
+    xendev->local_port = -1;
+}
+
+int xen_be_send_notify(struct XenDevice *xendev)
+{
+    return xc_evtchn_notify(xendev->evtchndev, xendev->local_port);
+}
+
+/*
+ * msg_level:
+ *  0 == errors (stderr + logfile).
+ *  1 == informative debug messages (logfile only).
+ *  2 == noisy debug messages (logfile only).
+ *  3 == will flood your log (logfile only).
+ */
+void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
+{
+    va_list args;
+
+    if (xendev) {
+        if (msg_level > xendev->debug)
+            return;
+        qemu_log("xen be: %s: ", xendev->name);
+        if (msg_level == 0)
+            fprintf(stderr, "xen be: %s: ", xendev->name);
+    } else {
+        if (msg_level > debug)
+            return;
+        qemu_log("xen be core: ");
+        if (msg_level == 0)
+            fprintf(stderr, "xen be core: ");
+    }
+    va_start(args, fmt);
+    qemu_log_vprintf(fmt, args);
+    va_end(args);
+    if (msg_level == 0) {
+        va_start(args, fmt);
+        vfprintf(stderr, fmt, args);
+        va_end(args);
+    }
+    qemu_log_flush();
+}
Index: hw/xen_backend.h
===================================================================
--- hw/xen_backend.h	(revision 0)
+++ hw/xen_backend.h	(revision 7220)
@@ -0,0 +1,86 @@
+#ifndef QEMU_HW_XEN_BACKEND_H
+#define QEMU_HW_XEN_BACKEND_H 1
+
+#include "xen_common.h"
+
+/* ------------------------------------------------------------- */
+
+#define XEN_BUFSIZE 1024
+
+struct XenDevice;
+
+/* driver uses grant tables  ->  open gntdev device (xendev->gnttabdev) */
+#define DEVOPS_FLAG_NEED_GNTDEV   1
+/* don't expect frontend doing correct state transitions (aka console quirk) */
+#define DEVOPS_FLAG_IGNORE_STATE  2
+
+struct XenDevOps {
+    size_t    size;
+    uint32_t  flags;
+    void      (*alloc)(struct XenDevice *xendev);
+    int       (*init)(struct XenDevice *xendev);
+    int       (*connect)(struct XenDevice *xendev);
+    void      (*event)(struct XenDevice *xendev);
+    void      (*disconnect)(struct XenDevice *xendev);
+    int       (*free)(struct XenDevice *xendev);
+    void      (*backend_changed)(struct XenDevice *xendev, const char *node);
+    void      (*frontend_changed)(struct XenDevice *xendev, const char *node);
+};
+
+struct XenDevice {
+    const char         *type;
+    int                dom;
+    int                dev;
+    char               name[64];
+    int                debug;
+
+    enum xenbus_state  be_state;
+    enum xenbus_state  fe_state;
+    int                online;
+    char               be[XEN_BUFSIZE];
+    char               *fe;
+    char               *protocol;
+    int                remote_port;
+    int                local_port;
+
+    int                evtchndev;
+    int                gnttabdev;
+
+    struct XenDevOps   *ops;
+    TAILQ_ENTRY(XenDevice) next;
+};
+
+/* ------------------------------------------------------------- */
+
+/* variables */
+extern int xen_xc;
+extern struct xs_handle *xenstore;
+
+/* xenstore helper functions */
+int xenstore_write_str(const char *base, const char *node, const char *val);
+int xenstore_write_int(const char *base, const char *node, int ival);
+char *xenstore_read_str(const char *base, const char *node);
+int xenstore_read_int(const char *base, const char *node, int *ival);
+
+int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val);
+int xenstore_write_be_int(struct XenDevice *xendev, const char *node, int ival);
+char *xenstore_read_be_str(struct XenDevice *xendev, const char *node);
+int xenstore_read_be_int(struct XenDevice *xendev, const char *node, int *ival);
+char *xenstore_read_fe_str(struct XenDevice *xendev, const char *node);
+int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival);
+
+const char *xenbus_strstate(enum xenbus_state state);
+struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev);
+void xen_be_check_state(struct XenDevice *xendev);
+
+/* xen backend driver bits */
+int xen_be_init(void);
+int xen_be_register(const char *type, struct XenDevOps *ops);
+int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state);
+int xen_be_bind_evtchn(struct XenDevice *xendev);
+void xen_be_unbind_evtchn(struct XenDevice *xendev);
+int xen_be_send_notify(struct XenDevice *xendev);
+void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
+    __attribute__ ((format(printf, 3, 4)));
+
+#endif /* QEMU_HW_XEN_BACKEND_H */

 ------------------------------------------------------------------------
r7219 | aliguori | 2009-04-22 10:19:10 -0500 (Wed, 22 Apr 2009) | 9 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/configure
   M /trunk/hw/boards.h
   A /trunk/hw/xen.h
   A /trunk/hw/xen_machine_pv.c
   M /trunk/qemu-options.hx
   M /trunk/target-i386/machine.c
   M /trunk/vl.c

xen: groundwork for xen support (Gerd Hoffmann)

- configure script and build system changes.
- wind up new machine type.
- add -xen-* command line options.

Signed-off-by: Gerd Hoffmann 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7218)
+++ Makefile.target	(revision 7219)
@@ -560,6 +560,13 @@
 LIBS += $(CONFIG_BLUEZ_LIBS)
 endif
 
+# xen backend driver support
+XEN_OBJS := xen_machine_pv.o
+ifeq ($(CONFIG_XEN), yes)
+  OBJS += $(XEN_OBJS)
+  LIBS += $(XEN_LIBS)
+endif
+
 # SCSI layer
 OBJS+= lsi53c895a.o esp.o
 
Index: vl.c
===================================================================
--- vl.c	(revision 7218)
+++ vl.c	(revision 7219)
@@ -139,6 +139,7 @@
 #include "hw/baum.h"
 #include "hw/bt.h"
 #include "hw/smbios.h"
+#include "hw/xen.h"
 #include "bt-host.h"
 #include "net.h"
 #include "monitor.h"
@@ -5046,6 +5047,17 @@
                 run_as = optarg;
                 break;
 #endif
+#ifdef CONFIG_XEN
+            case QEMU_OPTION_xen_domid:
+                xen_domid = atoi(optarg);
+                break;
+            case QEMU_OPTION_xen_create:
+                xen_mode = XEN_CREATE;
+                break;
+            case QEMU_OPTION_xen_attach:
+                xen_mode = XEN_ATTACH;
+                break;
+#endif
             }
         }
     }
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7218)
+++ qemu-options.hx	(revision 7219)
@@ -1348,6 +1348,17 @@
 if KVM support is enabled when compiling.
 ETEXI
 
+#ifdef CONFIG_XEN
+DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
+    "-xen-domid id   specify xen guest domain id\n")
+DEF("xen-create", 0, QEMU_OPTION_xen_create,
+    "-xen-create     create domain using xen hypercalls, bypassing xend\n"
+    "                warning: should not be used when xend is in use\n")
+DEF("xen-attach", 0, QEMU_OPTION_xen_attach,
+    "-xen-attach     attach to existing xen domain\n"
+    "                xend will use this when starting qemu\n")
+#endif
+
 DEF("no-reboot", 0, QEMU_OPTION_no_reboot, \
     "-no-reboot      exit instead of rebooting\n")
 STEXI
Index: configure
===================================================================
--- configure	(revision 7218)
+++ configure	(revision 7219)
@@ -191,6 +191,7 @@
 blobs="yes"
 fdt="yes"
 sdl_x11="no"
+xen="yes"
 pkgversion=""
 
 # OS specific
@@ -421,6 +422,8 @@
   ;;
   --disable-kqemu) kqemu="no"
   ;;
+  --disable-xen) xen="no"
+  ;;
   --disable-brlapi) brlapi="no"
   ;;
   --disable-bluez) bluez="no"
@@ -586,6 +589,7 @@
 echo "  --audio-card-list=LIST   set list of emulated audio cards [$audio_card_list]"
 echo "                           Available cards: $audio_possible_cards"
 echo "  --enable-mixemu          enable mixer emulation"
+echo "  --disable-xen            disable xen backend driver support"
 echo "  --disable-brlapi         disable BrlAPI"
 echo "  --disable-vnc-tls        disable TLS encryption for VNC server"
 echo "  --disable-vnc-sasl       disable SASL encryption for VNC server"
@@ -802,6 +806,22 @@
 fi
 
 ##########################################
+# xen probe
+
+if test "$xen" = "yes" ; then
+cat > $TMPC <
+#include 
+int main(void) { xs_daemon_open; xc_interface_open; }
+EOF
+   if $cc $ARCH_CFLAGS -c -o $TMPO $TMPC -lxenstore -lxenctrl 2> /dev/null ; then
+      :
+   else
+      xen="no"
+   fi
+fi
+
+##########################################
 # SDL probe
 
 sdl_too_old=no
@@ -1296,6 +1316,7 @@
     echo "Target Sparc Arch $sparc_cpu"
 fi
 echo "kqemu support     $kqemu"
+echo "xen support       $xen"
 echo "brlapi support    $brlapi"
 echo "Documentation     $build_docs"
 [ ! -z "$uname_release" ] && \
@@ -1612,6 +1633,9 @@
   echo "CONFIG_BLUEZ_LIBS=$bluez_libs" >> $config_mak
   echo "#define CONFIG_BLUEZ 1" >> $config_h
 fi
+if test "$xen" = "yes" ; then
+  echo "XEN_LIBS=-lxenstore -lxenctrl" >> $config_mak
+fi
 if test "$aio" = "yes" ; then
   echo "#define CONFIG_AIO 1" >> $config_h
   echo "CONFIG_AIO=yes" >> $config_mak
@@ -1777,6 +1801,11 @@
       echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
       echo "#define CONFIG_KVM 1" >> $config_h
     fi
+    if test "$xen" = "yes" -a "$target_softmmu" = "yes";
+    then
+      echo "CONFIG_XEN=yes" >> $config_mak
+      echo "#define CONFIG_XEN 1" >> $config_h
+    fi
   ;;
   x86_64)
     echo "TARGET_ARCH=x86_64" >> $config_mak
@@ -1793,6 +1822,11 @@
       echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
       echo "#define CONFIG_KVM 1" >> $config_h
     fi
+    if test "$xen" = "yes" -a "$target_softmmu" = "yes"
+    then
+      echo "CONFIG_XEN=yes" >> $config_mak
+      echo "#define CONFIG_XEN 1" >> $config_h
+    fi
   ;;
   alpha)
     echo "TARGET_ARCH=alpha" >> $config_mak
Index: target-i386/machine.c
===================================================================
--- target-i386/machine.c	(revision 7218)
+++ target-i386/machine.c	(revision 7219)
@@ -9,6 +9,9 @@
 {
     qemu_register_machine(&pc_machine);
     qemu_register_machine(&isapc_machine);
+#ifdef CONFIG_XEN
+    qemu_register_machine(&xenpv_machine);
+#endif
 }
 
 static void cpu_put_seg(QEMUFile *f, SegmentCache *dt)
Index: hw/xen_machine_pv.c
===================================================================
--- hw/xen_machine_pv.c	(revision 0)
+++ hw/xen_machine_pv.c	(revision 7219)
@@ -0,0 +1,60 @@
+/*
+ * QEMU Xen PV Machine
+ *
+ * Copyright (c) 2007 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "sysemu.h"
+#include "boards.h"
+#include "xen.h"
+
+uint32_t xen_domid;
+enum xen_mode xen_mode = XEN_EMULATE;
+
+static void xen_init_pv(ram_addr_t ram_size, int vga_ram_size,
+			const char *boot_device,
+			const char *kernel_filename,
+			const char *kernel_cmdline,
+			const char *initrd_filename,
+			const char *cpu_model)
+{
+    CPUState *env;
+
+    /* Initialize a dummy CPU */
+    if (cpu_model == NULL) {
+#ifdef TARGET_X86_64
+        cpu_model = "qemu64";
+#else
+        cpu_model = "qemu32";
+#endif
+    }
+    env = cpu_init(cpu_model);
+    env->halted = 1;
+}
+
+QEMUMachine xenpv_machine = {
+    .name = "xenpv",
+    .desc = "Xen Para-virtualized PC",
+    .init = xen_init_pv,
+    .max_cpus = 1,
+};
Index: hw/xen.h
===================================================================
--- hw/xen.h	(revision 0)
+++ hw/xen.h	(revision 7219)
@@ -0,0 +1,20 @@
+#ifndef QEMU_HW_XEN_H
+#define QEMU_HW_XEN_H 1
+/*
+ * public xen header
+ *   stuff needed outside xen-*.c, i.e. interfaces to qemu.
+ *   must not depend on any xen headers being present in
+ *   /usr/include/xen, so it can be included unconditionally.
+ */
+
+/* xen-machine.c */
+enum xen_mode {
+    XEN_EMULATE = 0,  // xen emulation, using xenner (default)
+    XEN_CREATE,       // create xen domain
+    XEN_ATTACH        // attach to xen domain created by xend
+};
+
+extern uint32_t xen_domid;
+extern enum xen_mode xen_mode;
+
+#endif /* QEMU_HW_XEN_H */
Index: hw/boards.h
===================================================================
--- hw/boards.h	(revision 7218)
+++ hw/boards.h	(revision 7219)
@@ -32,6 +32,9 @@
 extern QEMUMachine pc_machine;
 extern QEMUMachine isapc_machine;
 
+/* xen_machine.c */
+extern QEMUMachine xenpv_machine;
+
 /* ppc.c */
 extern QEMUMachine prep_machine;
 extern QEMUMachine core99_machine;

 ------------------------------------------------------------------------
r7218 | aliguori | 2009-04-22 08:05:35 -0500 (Wed, 22 Apr 2009) | 4 lines
Changed paths:
   M /trunk/.gitignore

update .gitignore: add qemu-io (Gerd Hoffmann)

Signed-off-by: Gerd Hoffmann 

 ------------------------------------------------------------------------

Index: .gitignore
===================================================================
--- .gitignore	(revision 7217)
+++ .gitignore	(revision 7218)
@@ -16,6 +16,7 @@
 qemu-nbd
 qemu-nbd.8
 qemu-nbd.pod
+qemu-io
 .gdbinit
 *.a
 *.aux

 ------------------------------------------------------------------------
r7217 | aliguori | 2009-04-21 18:12:02 -0500 (Tue, 21 Apr 2009) | 10 lines
Changed paths:
   M /trunk/block-qcow2.c

qcow2: Add plausibility check for L1/L2 entries (Kevin Wolf)

From: Kevin Wolf 

All L1 and L2 entries must point at the start of a cluster. If there is some
offset into the cluster, the entry is corrupted.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7216)
+++ block-qcow2.c	(revision 7217)
@@ -2666,6 +2666,13 @@
                 errors += inc_refcounts(bs, refcount_table,
                               refcount_table_size,
                               offset, s->cluster_size);
+
+                /* Correct offsets are cluster aligned */
+                if (offset & (s->cluster_size - 1)) {
+                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+                        "properly aligned; L2 entry corrupted.\n", offset);
+                    errors++;
+                }
             }
         }
     }
@@ -2734,6 +2741,13 @@
                           l2_offset,
                           s->cluster_size);
 
+            /* L2 tables are cluster aligned */
+            if (l2_offset & (s->cluster_size - 1)) {
+                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+                    "cluster aligned; L1 entry corrupted\n", l2_offset);
+                errors++;
+            }
+
             /* Process and check L2 entries */
             ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
                 l2_offset, check_copied);

 ------------------------------------------------------------------------
r7216 | aliguori | 2009-04-21 18:11:58 -0500 (Tue, 21 Apr 2009) | 11 lines
Changed paths:
   M /trunk/block-qcow2.c

qcow2: Refcount checking code cleanup (Kevin Wolf)

This is purely cosmetical changes to make the code easier to read. Move L2
table processing from a deeply nested block to its own function, add some
comments.

Patch v2: Fix misplaced bracket causing false positives

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7215)
+++ block-qcow2.c	(revision 7216)
@@ -2603,6 +2603,90 @@
     return errors;
 }
 
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs,
+    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+    int check_copied)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t *l2_table, offset;
+    int i, l2_size, nb_csectors, refcount;
+    int errors = 0;
+
+    /* Read L2 table from disk */
+    l2_size = s->l2_size * sizeof(uint64_t);
+    l2_table = qemu_malloc(l2_size);
+
+    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+        goto fail;
+
+    /* Do the actual checks */
+    for(i = 0; i < s->l2_size; i++) {
+        offset = be64_to_cpu(l2_table[i]);
+        if (offset != 0) {
+            if (offset & QCOW_OFLAG_COMPRESSED) {
+                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+                if (offset & QCOW_OFLAG_COPIED) {
+                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+                        "copied flag must never be set for compressed "
+                        "clusters\n", offset >> s->cluster_bits);
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    errors++;
+                }
+
+                /* Mark cluster as used */
+                nb_csectors = ((offset >> s->csize_shift) &
+                               s->csize_mask) + 1;
+                offset &= s->cluster_offset_mask;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset & ~511, nb_csectors * 512);
+            } else {
+                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+                if (check_copied) {
+                    uint64_t entry = offset;
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    refcount = get_refcount(bs, offset >> s->cluster_bits);
+                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
+                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+                            PRIx64 " refcount=%d\n", entry, refcount);
+                        errors++;
+                    }
+                }
+
+                /* Mark cluster as used */
+                offset &= ~QCOW_OFLAG_COPIED;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset, s->cluster_size);
+            }
+        }
+    }
+
+    qemu_free(l2_table);
+    return errors;
+
+fail:
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+    qemu_free(l2_table);
+    return -EIO;
+}
+
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
 static int check_refcounts_l1(BlockDriverState *bs,
                               uint16_t *refcount_table,
                               int refcount_table_size,
@@ -2610,16 +2694,17 @@
                               int check_copied)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
-    int l2_size, i, j, nb_csectors, refcount;
+    uint64_t *l1_table, l2_offset, l1_size2;
+    int i, refcount, ret;
     int errors = 0;
 
-    l2_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
 
+    /* Mark L1 table as used */
     errors += inc_refcounts(bs, refcount_table, refcount_table_size,
                   l1_table_offset, l1_size2);
 
+    /* Read L1 table entries from disk */
     l1_table = qemu_malloc(l1_size2);
     if (bdrv_pread(s->hd, l1_table_offset,
                    l1_table, l1_size2) != l1_size2)
@@ -2627,68 +2712,43 @@
     for(i = 0;i < l1_size; i++)
         be64_to_cpus(&l1_table[i]);
 
-    l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = qemu_malloc(l2_size);
+    /* Do the actual checks */
     for(i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
         if (l2_offset) {
+            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
             if (check_copied) {
-                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
+                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+                    >> s->cluster_bits);
                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
                     fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
                         " refcount=%d\n", l2_offset, refcount);
                     errors++;
                 }
             }
+
+            /* Mark L2 table as used */
             l2_offset &= ~QCOW_OFLAG_COPIED;
-            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
-                goto fail;
-            for(j = 0; j < s->l2_size; j++) {
-                offset = be64_to_cpu(l2_table[j]);
-                if (offset != 0) {
-                    if (offset & QCOW_OFLAG_COMPRESSED) {
-                        if (offset & QCOW_OFLAG_COPIED) {
-                            fprintf(stderr, "ERROR: cluster %" PRId64 ": "
-                                "copied flag must never be set for compressed "
-                                "clusters\n", offset >> s->cluster_bits);
-                            offset &= ~QCOW_OFLAG_COPIED;
-                            errors++;
-                        }
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        offset &= s->cluster_offset_mask;
-                        errors += inc_refcounts(bs, refcount_table,
-                                      refcount_table_size,
-                                      offset & ~511, nb_csectors * 512);
-                    } else {
-                        if (check_copied) {
-                            refcount = get_refcount(bs, (offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
-                            if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) {
-                                fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
-                                    PRIx64 " refcount=%d\n", offset, refcount);
-                                errors++;
-                            }
-                        }
-                        offset &= ~QCOW_OFLAG_COPIED;
-                        errors += inc_refcounts(bs, refcount_table,
-                                      refcount_table_size,
-                                      offset, s->cluster_size);
-                    }
-                }
-            }
             errors += inc_refcounts(bs, refcount_table,
                           refcount_table_size,
                           l2_offset,
                           s->cluster_size);
+
+            /* Process and check L2 entries */
+            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
+                l2_offset, check_copied);
+            if (ret < 0) {
+                goto fail;
+            }
+            errors += ret;
         }
     }
     qemu_free(l1_table);
-    qemu_free(l2_table);
     return errors;
- fail:
+
+fail:
     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
     qemu_free(l1_table);
-    qemu_free(l2_table);
     return -EIO;
 }
 
@@ -2715,6 +2775,7 @@
     errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   0, s->cluster_size);
 
+    /* current L1 table */
     ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
                        s->l1_table_offset, s->l1_size, 1);
     if (ret < 0) {

 ------------------------------------------------------------------------
r7215 | aliguori | 2009-04-21 18:11:53 -0500 (Tue, 21 Apr 2009) | 10 lines
Changed paths:
   M /trunk/qemu-img.c

Introduce qemu-img check subcommand (Kevin Wolf)

From: Kevin Wolf 

Now that block drivers can provide check functions, expose them through
qemu-img.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: qemu-img.c
===================================================================
--- qemu-img.c	(revision 7214)
+++ qemu-img.c	(revision 7215)
@@ -58,6 +58,7 @@
            "QEMU disk image utility\n"
            "\n"
            "Command syntax:\n"
+           "  check [-f fmt] filename\n"
            "  create [-e] [-6] [-F fmt] [-b base_image] [-f fmt] filename [size]\n"
            "  commit [-f fmt] filename\n"
            "  convert [-c] [-e] [-6] [-f fmt] [-O output_fmt] [-B output_base_image] filename [filename2 [...]] output_filename\n"
@@ -315,6 +316,65 @@
     return 0;
 }
 
+static int img_check(int argc, char **argv)
+{
+    int c, ret;
+    const char *filename, *fmt;
+    BlockDriver *drv;
+    BlockDriverState *bs;
+
+    fmt = NULL;
+    for(;;) {
+        c = getopt(argc, argv, "f:h");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        }
+    }
+    if (optind >= argc)
+        help();
+    filename = argv[optind++];
+
+    bs = bdrv_new("");
+    if (!bs)
+        error("Not enough memory");
+    if (fmt) {
+        drv = bdrv_find_format(fmt);
+        if (!drv)
+            error("Unknown file format '%s'", fmt);
+    } else {
+        drv = NULL;
+    }
+    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+        error("Could not open '%s'", filename);
+    }
+    ret = bdrv_check(bs);
+    switch(ret) {
+    case 0:
+        printf("No errors were found on the image.\n");
+        break;
+    case -ENOTSUP:
+        error("This image format does not support checks");
+        break;
+    default:
+        if (ret < 0) {
+            error("An error occurred during the check");
+        } else {
+            printf("%d errors were found on the image.\n", ret);
+        }
+        break;
+    }
+
+    bdrv_delete(bs);
+    return 0;
+}
+
 static int img_commit(int argc, char **argv)
 {
     int c, ret;
@@ -888,6 +948,8 @@
     argc--; argv++;
     if (!strcmp(cmd, "create")) {
         img_create(argc, argv);
+    } else if (!strcmp(cmd, "check")) {
+        img_check(argc, argv);
     } else if (!strcmp(cmd, "commit")) {
         img_commit(argc, argv);
     } else if (!strcmp(cmd, "convert")) {

 ------------------------------------------------------------------------
r7214 | aliguori | 2009-04-21 18:11:50 -0500 (Tue, 21 Apr 2009) | 10 lines
Changed paths:
   M /trunk/block-qcow2.c
   M /trunk/block.c
   M /trunk/block.h
   M /trunk/block_int.h

Introduce bdrv_check (Kevin Wolf)

From: Kevin Wolf 

Introduce a new bdrv_check function pointer for block drivers. Modify qcow2 to
return an error status in check_refcounts(), so it can implement bdrv_check.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block_int.h
===================================================================
--- block_int.h	(revision 7213)
+++ block_int.h	(revision 7214)
@@ -102,6 +102,9 @@
                         const char *backing_file, const char *backing_format,
                         int flags);
 
+    /* Returns number of errors in image, -errno for internal errors */
+    int (*bdrv_check)(BlockDriverState* bs);
+
     struct BlockDriver *next;
 };
 
Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7213)
+++ block-qcow2.c	(revision 7214)
@@ -177,9 +177,7 @@
 static int64_t alloc_bytes(BlockDriverState *bs, int size);
 static void free_clusters(BlockDriverState *bs,
                           int64_t offset, int64_t size);
-#ifdef DEBUG_ALLOC
-static void check_refcounts(BlockDriverState *bs);
-#endif
+static int check_refcounts(BlockDriverState *bs);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -2564,8 +2562,14 @@
     }
 }
 
-#ifdef DEBUG_ALLOC
-static void inc_refcounts(BlockDriverState *bs,
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Returns the number of errors in the image that were found
+ */
+static int inc_refcounts(BlockDriverState *bs,
                           uint16_t *refcount_table,
                           int refcount_table_size,
                           int64_t offset, int64_t size)
@@ -2573,9 +2577,10 @@
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
     int k;
+    int errors = 0;
 
     if (size <= 0)
-        return;
+        return 0;
 
     start = offset & ~(s->cluster_size - 1);
     last = (offset + size - 1) & ~(s->cluster_size - 1);
@@ -2585,13 +2590,17 @@
         if (k < 0 || k >= refcount_table_size) {
             fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
                 cluster_offset);
+            errors++;
         } else {
             if (++refcount_table[k] == 0) {
                 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
                     "\n", cluster_offset);
+                errors++;
             }
         }
     }
+
+    return errors;
 }
 
 static int check_refcounts_l1(BlockDriverState *bs,
@@ -2603,11 +2612,12 @@
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
     int l2_size, i, j, nb_csectors, refcount;
+    int errors = 0;
 
     l2_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
 
-    inc_refcounts(bs, refcount_table, refcount_table_size,
+    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
                   l1_table_offset, l1_size2);
 
     l1_table = qemu_malloc(l1_size2);
@@ -2627,6 +2637,7 @@
                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
                     fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
                         " refcount=%d\n", l2_offset, refcount);
+                    errors++;
                 }
             }
             l2_offset &= ~QCOW_OFLAG_COPIED;
@@ -2641,11 +2652,12 @@
                                 "copied flag must never be set for compressed "
                                 "clusters\n", offset >> s->cluster_bits);
                             offset &= ~QCOW_OFLAG_COPIED;
+                            errors++;
                         }
                         nb_csectors = ((offset >> s->csize_shift) &
                                        s->csize_mask) + 1;
                         offset &= s->cluster_offset_mask;
-                        inc_refcounts(bs, refcount_table,
+                        errors += inc_refcounts(bs, refcount_table,
                                       refcount_table_size,
                                       offset & ~511, nb_csectors * 512);
                     } else {
@@ -2654,16 +2666,17 @@
                             if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) {
                                 fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
                                     PRIx64 " refcount=%d\n", offset, refcount);
+                                errors++;
                             }
                         }
                         offset &= ~QCOW_OFLAG_COPIED;
-                        inc_refcounts(bs, refcount_table,
+                        errors += inc_refcounts(bs, refcount_table,
                                       refcount_table_size,
                                       offset, s->cluster_size);
                     }
                 }
             }
-            inc_refcounts(bs, refcount_table,
+            errors += inc_refcounts(bs, refcount_table,
                           refcount_table_size,
                           l2_offset,
                           s->cluster_size);
@@ -2671,7 +2684,7 @@
     }
     qemu_free(l1_table);
     qemu_free(l2_table);
-    return 0;
+    return errors;
  fail:
     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
     qemu_free(l1_table);
@@ -2679,24 +2692,35 @@
     return -EIO;
 }
 
-static void check_refcounts(BlockDriverState *bs)
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occured.
+ */
+static int check_refcounts(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     int64_t size;
     int nb_clusters, refcount1, refcount2, i;
     QCowSnapshot *sn;
     uint16_t *refcount_table;
+    int ret, errors = 0;
 
     size = bdrv_getlength(s->hd);
     nb_clusters = size_to_clusters(s, size);
     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
 
     /* header */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   0, s->cluster_size);
 
-    check_refcounts_l1(bs, refcount_table, nb_clusters,
+    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
                        s->l1_table_offset, s->l1_size, 1);
+    if (ret < 0) {
+        return ret;
+    }
+    errors += ret;
 
     /* snapshots */
     for(i = 0; i < s->nb_snapshots; i++) {
@@ -2704,18 +2728,18 @@
         check_refcounts_l1(bs, refcount_table, nb_clusters,
                            sn->l1_table_offset, sn->l1_size, 0);
     }
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   s->snapshots_offset, s->snapshots_size);
 
     /* refcount data */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   s->refcount_table_offset,
                   s->refcount_table_size * sizeof(uint64_t));
     for(i = 0; i < s->refcount_table_size; i++) {
         int64_t offset;
         offset = s->refcount_table[i];
         if (offset != 0) {
-            inc_refcounts(bs, refcount_table, nb_clusters,
+            errors += inc_refcounts(bs, refcount_table, nb_clusters,
                           offset, s->cluster_size);
         }
     }
@@ -2724,14 +2748,23 @@
     for(i = 0; i < nb_clusters; i++) {
         refcount1 = get_refcount(bs, i);
         refcount2 = refcount_table[i];
-        if (refcount1 != refcount2)
+        if (refcount1 != refcount2) {
             fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
                    i, refcount1, refcount2);
+            errors++;
+        }
     }
 
     qemu_free(refcount_table);
+
+    return errors;
 }
 
+static int qcow_check(BlockDriverState *bs)
+{
+    return check_refcounts(bs);
+}
+
 #if 0
 static void dump_refcounts(BlockDriverState *bs)
 {
@@ -2751,7 +2784,6 @@
     }
 }
 #endif
-#endif
 
 static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf,
                            int64_t pos, int size)
@@ -2806,4 +2838,5 @@
     .bdrv_get_buffer    = qcow_get_buffer,
 
     .bdrv_create2 = qcow_create2,
+    .bdrv_check = qcow_check,
 };
Index: block.c
===================================================================
--- block.c	(revision 7213)
+++ block.c	(revision 7214)
@@ -506,6 +506,20 @@
     qemu_free(bs);
 }
 
+/*
+ * Run consistency checks on an image
+ *
+ * Returns the number of errors or -errno when an internal error occurs
+ */
+int bdrv_check(BlockDriverState *bs)
+{
+    if (bs->drv->bdrv_check == NULL) {
+        return -ENOTSUP;
+    }
+
+    return bs->drv->bdrv_check(bs);
+}
+
 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
 {
Index: block.h
===================================================================
--- block.h	(revision 7213)
+++ block.h	(revision 7214)
@@ -73,6 +73,7 @@
 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
                BlockDriver *drv);
 void bdrv_close(BlockDriverState *bs);
+int bdrv_check(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,

 ------------------------------------------------------------------------
r7213 | aliguori | 2009-04-21 18:11:45 -0500 (Tue, 21 Apr 2009) | 13 lines
Changed paths:
   M /trunk/block-qcow2.c

qcow2: Fix warnings in check_refcount() (Kevin Wolf)

From: Kevin Wolf 

This code is currently only compiled when DEBUG_ALLOC is defined, so you
usually don't see compiler warnings on it. This patch series wants to enable
the code, so fix the format string warnings first.

While we're at it, let's print error messages to stderr.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7212)
+++ block-qcow2.c	(revision 7213)
@@ -2583,10 +2583,12 @@
         cluster_offset += s->cluster_size) {
         k = cluster_offset >> s->cluster_bits;
         if (k < 0 || k >= refcount_table_size) {
-            printf("ERROR: invalid cluster offset=0x%llx\n", cluster_offset);
+            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+                cluster_offset);
         } else {
             if (++refcount_table[k] == 0) {
-                printf("ERROR: overflow cluster offset=0x%llx\n", cluster_offset);
+                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+                    "\n", cluster_offset);
             }
         }
     }
@@ -2623,8 +2625,8 @@
             if (check_copied) {
                 refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
-                    printf("ERROR OFLAG_COPIED: l2_offset=%llx refcount=%d\n",
-                           l2_offset, refcount);
+                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+                        " refcount=%d\n", l2_offset, refcount);
                 }
             }
             l2_offset &= ~QCOW_OFLAG_COPIED;
@@ -2635,8 +2637,9 @@
                 if (offset != 0) {
                     if (offset & QCOW_OFLAG_COMPRESSED) {
                         if (offset & QCOW_OFLAG_COPIED) {
-                            printf("ERROR: cluster %lld: copied flag must never be set for compressed clusters\n",
-                                   offset >> s->cluster_bits);
+                            fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+                                "copied flag must never be set for compressed "
+                                "clusters\n", offset >> s->cluster_bits);
                             offset &= ~QCOW_OFLAG_COPIED;
                         }
                         nb_csectors = ((offset >> s->csize_shift) &
@@ -2649,8 +2652,8 @@
                         if (check_copied) {
                             refcount = get_refcount(bs, (offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
                             if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) {
-                                printf("ERROR OFLAG_COPIED: offset=%llx refcount=%d\n",
-                                       offset, refcount);
+                                fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+                                    PRIx64 " refcount=%d\n", offset, refcount);
                             }
                         }
                         offset &= ~QCOW_OFLAG_COPIED;
@@ -2670,7 +2673,7 @@
     qemu_free(l2_table);
     return 0;
  fail:
-    printf("ERROR: I/O error in check_refcounts_l1\n");
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
     qemu_free(l1_table);
     qemu_free(l2_table);
     return -EIO;
@@ -2722,7 +2725,7 @@
         refcount1 = get_refcount(bs, i);
         refcount2 = refcount_table[i];
         if (refcount1 != refcount2)
-            printf("ERROR cluster %d refcount=%d reference=%d\n",
+            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
                    i, refcount1, refcount2);
     }
 

 ------------------------------------------------------------------------
r7212 | aliguori | 2009-04-21 17:31:41 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/hw/fw_cfg.h
   M /trunk/hw/pc.c
   A /trunk/pc-bios/bios-pq/0014_add-srat-acpi-table-support.patch
   M /trunk/pc-bios/bios-pq/series
   M /trunk/pc-bios/bios.bin

sending NUMA topology to BIOS (Andre Przywara)

uses the QEMU firmware configuration interfacce to send the NUMA
topology to the BIOS, which has to setup the tables. Only one firmware
configuration channel is used.

Signed-off-by: Andre Przywara 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/fw_cfg.h
===================================================================
--- hw/fw_cfg.h	(revision 7211)
+++ hw/fw_cfg.h	(revision 7212)
@@ -14,6 +14,7 @@
 #define FW_CFG_INITRD_ADDR      0x0a
 #define FW_CFG_INITRD_SIZE      0x0b
 #define FW_CFG_BOOT_DEVICE      0x0c
+#define FW_CFG_NUMA             0x0d
 #define FW_CFG_MAX_ENTRY        0x10
 
 #define FW_CFG_WRITE_CHANNEL    0x4000
Index: hw/pc.c
===================================================================
--- hw/pc.c	(revision 7211)
+++ hw/pc.c	(revision 7212)
@@ -424,11 +424,15 @@
     }
 }
 
+extern uint64_t node_cpumask[MAX_NODES];
+
 static void bochs_bios_init(void)
 {
     void *fw_cfg;
     uint8_t *smbios_table;
     size_t smbios_len;
+    uint64_t *numa_fw_cfg;
+    int i, j;
 
     register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
@@ -451,6 +455,26 @@
     if (smbios_table)
         fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
                          smbios_table, smbios_len);
+
+    /* allocate memory for the NUMA channel: one (64bit) word for the number
+     * of nodes, one word for each VCPU->node and one word for each node to
+     * hold the amount of memory.
+     */
+    numa_fw_cfg = qemu_mallocz((1 + smp_cpus + nb_numa_nodes) * 8);
+    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < nb_numa_nodes; j++) {
+            if (node_cpumask[j] & (1 << i)) {
+                numa_fw_cfg[i + 1] = cpu_to_le64(j);
+                break;
+            }
+        }
+    }
+    for (i = 0; i < nb_numa_nodes; i++) {
+        numa_fw_cfg[smp_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+    }
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+                     (1 + smp_cpus + nb_numa_nodes) * 8);
 }
 
 /* Generate an initial boot sector which sets state and jump to
Index: pc-bios/bios.bin
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: pc-bios/bios-pq/series
===================================================================
--- pc-bios/bios-pq/series	(revision 7211)
+++ pc-bios/bios-pq/series	(revision 7212)
@@ -11,3 +11,4 @@
 0011_read-additional-acpi-tables-from-a-vm.patch
 0012-load-smbios-entries-and-files-from-qemu.patch
 0013_fix-non-acpi-timer-interrupt-routing.patch
+0014_add-srat-acpi-table-support.patch
Index: pc-bios/bios-pq/0014_add-srat-acpi-table-support.patch
===================================================================
--- pc-bios/bios-pq/0014_add-srat-acpi-table-support.patch	(revision 0)
+++ pc-bios/bios-pq/0014_add-srat-acpi-table-support.patch	(revision 7212)
@@ -0,0 +1,305 @@
+add SRAT ACPI table support (Andre Przywara)
+
+Take NUMA topology info from the QEMU firmware configuration interface
+(number of nodes, node for each (V)CPU and amount of memory) and build
+a SRAT table describing this topology for the guest OS. Handles more than
+4 GB of RAM by including a hole for 32bit PCI memory mapping.
+
+Signed-off-by: Andre Przywara 
+Signed-off-by: Anthony Liguori 
+
+diff --git a/bios/rombios32.c b/bios/rombios32.c
+index 49dfd62..d8f6d4e 100644
+--- a/bios/rombios32.c
++++ b/bios/rombios32.c
+@@ -450,6 +450,11 @@ int pm_sci_int;
+ unsigned long bios_table_cur_addr;
+ unsigned long bios_table_end_addr;
+
++static inline uint64_t le64_to_cpu(uint64_t x)
++{
++    return x;
++}
++
+ void wrmsr_smp(uint32_t index, uint64_t val)
+ {
+     static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
+@@ -468,6 +473,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
+ #define QEMU_CFG_SIGNATURE  0x00
+ #define QEMU_CFG_ID         0x01
+ #define QEMU_CFG_UUID       0x02
++#define QEMU_CFG_NUMA       0x0D
+ #define QEMU_CFG_ARCH_LOCAL     0x8000
+ #define QEMU_CFG_ACPI_TABLES  (QEMU_CFG_ARCH_LOCAL + 0)
+ #define QEMU_CFG_SMBIOS_ENTRIES  (QEMU_CFG_ARCH_LOCAL + 1)
+@@ -529,6 +535,14 @@ static uint16_t smbios_entries(void)
+
+     return cnt;
+ }
++
++uint64_t qemu_cfg_get64 (void)
++{
++    uint64_t ret;
++
++    qemu_cfg_read((uint8_t*)&ret, 8);
++    return le64_to_cpu(ret);
++}
+ #endif
+
+ void cpu_probe(void)
+@@ -1281,7 +1295,7 @@ struct rsdt_descriptor_rev1
+ {
+ 	ACPI_TABLE_HEADER_DEF                           /* ACPI common table header */
+ #ifdef BX_QEMU
+-	uint32_t                             table_offset_entry [4]; /* Array of pointers to other */
++	uint32_t                             table_offset_entry [5]; /* Array of pointers to other */
+ #else
+ 	uint32_t                             table_offset_entry [3]; /* Array of pointers to other */
+ #endif
+@@ -1389,7 +1403,7 @@ struct multiple_apic_table
+ } __attribute__((__packed__));
+
+
+-/* Values for Type in APIC_HEADER_DEF */
++/* Values for Type in APIC sub-headers */
+
+ #define APIC_PROCESSOR          0
+ #define APIC_IO                 1
+@@ -1402,18 +1416,18 @@ struct multiple_apic_table
+ #define APIC_XRUPT_SOURCE       8
+ #define APIC_RESERVED           9           /* 9 and greater are reserved */
+
+-/*
+- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
+- */
+-#define APIC_HEADER_DEF                     /* Common APIC sub-structure header */\
++#define ACPI_SUB_HEADER_DEF                 /* Common ACPI sub-structure header */\
+ 	uint8_t                              type; \
+ 	uint8_t                              length;
+
++/*
++ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
++ */
+ /* Sub-structures for MADT */
+
+ struct madt_processor_apic
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                              processor_id;           /* ACPI processor id */
+ 	uint8_t                              local_apic_id;          /* Processor's local APIC id */
+ #if 0
+@@ -1424,6 +1438,43 @@ struct madt_processor_apic
+ #endif
+ } __attribute__((__packed__));
+
++/*
++ * SRAT (NUMA topology description) table
++ */
++
++#define SRAT_PROCESSOR          0
++#define SRAT_MEMORY             1
++
++struct system_resource_affinity_table
++{
++    ACPI_TABLE_HEADER_DEF
++    uint32_t    reserved1;
++    uint32_t    reserved2[2];
++};
++
++struct srat_processor_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity_lo;
++    uint8_t     local_apic_id;
++    uint32_t    flags;
++    uint8_t     local_sapic_eid;
++    uint8_t     proximity_hi[3];
++    uint32_t    reserved;
++};
++
++struct srat_memory_affinity
++{
++    ACPI_SUB_HEADER_DEF
++    uint8_t     proximity[4];
++    uint16_t    reserved1;
++    uint32_t    base_addr_low,base_addr_high;
++    uint32_t    length_low,length_high;
++    uint32_t    reserved2;
++    uint32_t    flags;
++    uint32_t    reserved3[2];
++};
++
+ #ifdef BX_QEMU
+ /*
+  *  * ACPI 2.0 Generic Address Space definition.
+@@ -1452,7 +1503,7 @@ struct acpi_20_hpet {
+
+ struct madt_io_apic
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                              io_apic_id;             /* I/O APIC ID */
+ 	uint8_t                              reserved;               /* Reserved - must be zero */
+ 	uint32_t                             address;                /* APIC physical address */
+@@ -1463,7 +1514,7 @@ struct madt_io_apic
+ #ifdef BX_QEMU
+ struct madt_int_override
+ {
+-	APIC_HEADER_DEF
++	ACPI_SUB_HEADER_DEF
+ 	uint8_t                bus;     /* Identifies ISA Bus */
+ 	uint8_t                source;  /* Bus-relative interrupt source */
+ 	uint32_t               gsi;     /* GSI that source will signal */
+@@ -1567,6 +1618,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
+     return ssdt_ptr - ssdt;
+ }
+
++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
++    uint64_t base, uint64_t len, int node, int enabled)
++{
++     numamem->type = SRAT_MEMORY;
++     numamem->length = sizeof(*numamem);
++     memset (numamem->proximity, 0 ,4);
++     numamem->proximity[0] = node;
++     numamem->flags = cpu_to_le32(!!enabled);
++     numamem->base_addr_low = base & 0xFFFFFFFF;
++     numamem->base_addr_high = base >> 32;
++     numamem->length_low = len & 0xFFFFFFFF;
++     numamem->length_high = len >> 32;
++     return;
++}
++
+ /* base_addr must be a multiple of 4KB */
+ void acpi_bios_init(void)
+ {
+@@ -1577,12 +1643,15 @@ void acpi_bios_init(void)
+     struct multiple_apic_table *madt;
+     uint8_t *dsdt, *ssdt;
+ #ifdef BX_QEMU
++    struct system_resource_affinity_table *srat;
+     struct acpi_20_hpet *hpet;
+     uint32_t hpet_addr;
+ #endif
+     uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
+     uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
++    uint32_t srat_addr,srat_size;
+     uint16_t i, external_tables;
++    int nb_numa_nodes;
+
+     /* reserve memory space for tables */
+ #ifdef BX_USE_EBDA_TABLES
+@@ -1624,6 +1693,25 @@ void acpi_bios_init(void)
+     ssdt_addr = addr;
+     ssdt = (void *)(addr);
+     addr += acpi_build_processor_ssdt(ssdt);
++#ifdef BX_QEMU
++    qemu_cfg_select(QEMU_CFG_NUMA);
++    nb_numa_nodes = qemu_cfg_get64();
++#else
++    nb_numa_nodes = 0;
++#endif
++    if (nb_numa_nodes > 0) {
++        addr = (addr + 7) & ~7;
++        srat_addr = addr;
++        srat_size = sizeof(*srat) +
++            sizeof(struct srat_processor_affinity) * smp_cpus +
++            sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
++        srat = (void *)(addr);
++        addr += srat_size;
++    } else {
++        srat_addr = addr;
++        srat = (void*)(addr);
++        srat_size = 0;
++    }
+
+     addr = (addr + 7) & ~7;
+     madt_addr = addr;
+@@ -1733,6 +1821,69 @@ void acpi_bios_init(void)
+
+     memset(rsdt, 0, rsdt_size);
+ #ifdef BX_QEMU
++    /* SRAT */
++    if (nb_numa_nodes > 0) {
++        struct srat_processor_affinity *core;
++        struct srat_memory_affinity *numamem;
++        int slots;
++        uint64_t mem_len, mem_base, next_base = 0, curnode;
++
++        qemu_cfg_select(QEMU_CFG_NUMA);
++        qemu_cfg_get64();
++        memset (srat, 0 , srat_size);
++        srat->reserved1=1;
++ 
++        core = (void*)(srat + 1);
++        for (i = 0; i < smp_cpus; ++i) {
++             core->type = SRAT_PROCESSOR;
++             core->length = sizeof(*core);
++             core->local_apic_id = i;
++             curnode = qemu_cfg_get64();
++             core->proximity_lo = curnode;
++             memset (core->proximity_hi, 0, 3);
++             core->local_sapic_eid = 0;
++             if (i < smp_cpus)
++                 core->flags = cpu_to_le32(1);
++             else
++                 core->flags = 0;
++             core++;
++        }
++
++        /* the memory map is a bit tricky, it contains at least one hole
++         * from 640k-1M and possibly another one from 3.5G-4G.
++         */
++        numamem = (void*)core; slots = 0;
++        acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
++        next_base = 1024 * 1024; numamem++;slots++;
++        for (i = 1; i < nb_numa_nodes + 1; ++i) {
++            mem_base = next_base;
++            mem_len = qemu_cfg_get64();
++            if (i == 1) mem_len -= 1024 * 1024;
++            next_base = mem_base + mem_len;
++ 
++            /* Cut out the PCI hole */
++            if (mem_base <= ram_size && next_base > ram_size) {
++                mem_len -= next_base - ram_size;
++                if (mem_len > 0) {
++                    acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++                    numamem++; slots++;
++                }
++                mem_base = 1ULL << 32;
++                mem_len = next_base - ram_size;
++                next_base += (1ULL << 32) - ram_size;
++            }
++            acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++            numamem++; slots++;
++        }
++        for (; slots < nb_numa_nodes + 2; slots++) {
++            acpi_build_srat_memory(numamem, 0, 0, 0, 0);
++            numamem++;
++        }
++
++         acpi_build_table_header((struct acpi_table_header *)srat,
++                                "SRAT", srat_size, 1);
++    }
++
+     /* HPET */
+     memset(hpet, 0, sizeof(*hpet));
+     /* Note timer_block_id value must be kept in sync with value advertised by
+@@ -1761,9 +1912,11 @@ void acpi_bios_init(void)
+     rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
+ #ifdef BX_QEMU
+     rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
++    if (nb_numa_nodes > 0)
++        rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
+ #endif
+-    acpi_build_table_header((struct acpi_table_header *)rsdt,
+-                            "RSDT", rsdt_size, 1);
++    acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
++        rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
+
+     acpi_tables_size = addr - base_addr;
+
+-- 
+1.6.1.3
+
+

 ------------------------------------------------------------------------
r7211 | aliguori | 2009-04-21 17:30:47 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/monitor.c

add info numa command to monitor (Andre Przywara)

adds an "info numa" command to the monitor to output the current
topology. Since NUMA is advertised via static ACPI tables, no changes are
possible during runtime.

Signed-off-by: Andre Przywara 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: monitor.c
===================================================================
--- monitor.c	(revision 7210)
+++ monitor.c	(revision 7211)
@@ -1409,6 +1409,25 @@
 #endif
 }
 
+static void do_info_numa(Monitor *mon)
+{
+    int i, j;
+    CPUState *env;
+
+    monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        monitor_printf(mon, "node %d cpus:", i);
+        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            if (env->numa_node == i) {
+                monitor_printf(mon, " %d", env->cpu_index);
+            }
+        }
+        monitor_printf(mon, "\n");
+        monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
+            node_mem[i] >> 20);
+    }
+}
+
 #ifdef CONFIG_PROFILER
 
 int64_t kqemu_time;
@@ -1792,6 +1811,8 @@
       "", "show KQEMU information", },
     { "kvm", "", do_info_kvm,
       "", "show KVM information", },
+    { "numa", "", do_info_numa,
+      "", "show NUMA information", },
     { "usb", "", usb_info,
       "", "show guest USB devices", },
     { "usbhost", "", usb_host_info,

 ------------------------------------------------------------------------
r7210 | aliguori | 2009-04-21 17:30:27 -0500 (Tue, 21 Apr 2009) | 11 lines
Changed paths:
   M /trunk/cpu-defs.h
   M /trunk/exec.c
   M /trunk/qemu-options.hx
   M /trunk/sysemu.h
   M /trunk/vl.c

added -numa cmdline parameter parser (Andre Przywara)

adds a -numa command line parameter and sets a QEMU global array with
the memory sizes. The CPU-to-node assignemnt is written into the
CPUState. If no specific values for memory and CPUs are given,
all resources will be split equally across all nodes.
This code currently support only up to 64 virtual CPUs.

Signed-off-by: Andre Przywara 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7209)
+++ vl.c	(revision 7210)
@@ -265,6 +265,10 @@
 int nb_drives_opt;
 struct drive_opt drives_opt[MAX_DRIVES];
 
+int nb_numa_nodes;
+uint64_t node_mem[MAX_NODES];
+uint64_t node_cpumask[MAX_NODES];
+
 static CPUState *cur_cpu;
 static CPUState *next_cpu;
 static int event_pending = 1;
@@ -1865,12 +1869,12 @@
 }
 #endif
 
-const char *get_opt_name(char *buf, int buf_size, const char *p)
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
 {
     char *q;
 
     q = buf;
-    while (*p != '\0' && *p != '=') {
+    while (*p != '\0' && *p != delim) {
         if (q && (q - buf) < buf_size - 1)
             *q++ = *p;
         p++;
@@ -1910,7 +1914,7 @@
 
     p = str;
     for(;;) {
-        p = get_opt_name(option, sizeof(option), p);
+        p = get_opt_name(option, sizeof(option), p, '=');
         if (*p != '=')
             break;
         p++;
@@ -1935,7 +1939,7 @@
 
     p = str;
     while (*p != '\0') {
-        p = get_opt_name(buf, buf_size, p);
+        p = get_opt_name(buf, buf_size, p, '=');
         if (*p != '=')
             return -1;
         p++;
@@ -2628,6 +2632,62 @@
     return drives_table_idx;
 }
 
+static void numa_add(const char *optarg)
+{
+    char option[128];
+    char *endptr;
+    unsigned long long value, endvalue;
+    int nodenr;
+
+    optarg = get_opt_name(option, 128, optarg, ',') + 1;
+    if (!strcmp(option, "node")) {
+        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
+            nodenr = nb_numa_nodes;
+        } else {
+            nodenr = strtoull(option, NULL, 10);
+        }
+
+        if (get_param_value(option, 128, "mem", optarg) == 0) {
+            node_mem[nodenr] = 0;
+        } else {
+            value = strtoull(option, &endptr, 0);
+            switch (*endptr) {
+            case 0: case 'M': case 'm':
+                value <<= 20;
+                break;
+            case 'G': case 'g':
+                value <<= 30;
+                break;
+            }
+            node_mem[nodenr] = value;
+        }
+        if (get_param_value(option, 128, "cpus", optarg) == 0) {
+            node_cpumask[nodenr] = 0;
+        } else {
+            value = strtoull(option, &endptr, 10);
+            if (value >= 64) {
+                value = 63;
+                fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
+            } else {
+                if (*endptr == '-') {
+                    endvalue = strtoull(endptr+1, &endptr, 10);
+                    if (endvalue >= 63) {
+                        endvalue = 62;
+                        fprintf(stderr,
+                            "only 63 CPUs in NUMA mode supported.\n");
+                    }
+                    value = (1 << (endvalue + 1)) - (1 << value);
+                } else {
+                    value = 1 << value;
+                }
+            }
+            node_cpumask[nodenr] = value;
+        }
+        nb_numa_nodes++;
+    }
+    return;
+}
+
 /***********************************************************/
 /* USB devices */
 
@@ -4290,6 +4350,7 @@
     const char *chroot_dir = NULL;
     const char *run_as = NULL;
 #endif
+    CPUState *env;
 
     qemu_cache_utils_init(envp);
 
@@ -4353,12 +4414,18 @@
         virtio_consoles[i] = NULL;
     virtio_console_index = 0;
 
+    for (i = 0; i < MAX_NODES; i++) {
+        node_mem[i] = 0;
+        node_cpumask[i] = 0;
+    }
+
     usb_devices_index = 0;
 
     nb_net_clients = 0;
     nb_bt_opts = 0;
     nb_drives = 0;
     nb_drives_opt = 0;
+    nb_numa_nodes = 0;
     hda_index = -1;
 
     nb_nics = 0;
@@ -4508,6 +4575,13 @@
 			             ",trans=none" : "");
                 }
                 break;
+            case QEMU_OPTION_numa:
+                if (nb_numa_nodes >= MAX_NODES) {
+                    fprintf(stderr, "qemu: too many NUMA nodes\n");
+                    exit(1);
+                }
+                numa_add(optarg);
+                break;
             case QEMU_OPTION_nographic:
                 nographic = 1;
                 break;
@@ -5211,6 +5285,48 @@
         }
     }
 
+    if (nb_numa_nodes > 0) {
+        int i;
+
+        if (nb_numa_nodes > smp_cpus) {
+            nb_numa_nodes = smp_cpus;
+        }
+
+        /* If no memory size if given for any node, assume the default case
+         * and distribute the available memory equally across all nodes
+         */
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_mem[i] != 0)
+                break;
+        }
+        if (i == nb_numa_nodes) {
+            uint64_t usedmem = 0;
+
+            /* On Linux, the each node's border has to be 8MB aligned,
+             * the final node gets the rest.
+             */
+            for (i = 0; i < nb_numa_nodes - 1; i++) {
+                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
+                usedmem += node_mem[i];
+            }
+            node_mem[i] = ram_size - usedmem;
+        }
+
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_cpumask[i] != 0)
+                break;
+        }
+        /* assigning the VCPUs round-robin is easier to implement, guest OSes
+         * must cope with this anyway, because there are BIOSes out there in
+         * real machines which also use this scheme.
+         */
+        if (i == nb_numa_nodes) {
+            for (i = 0; i < smp_cpus; i++) {
+                node_cpumask[i % nb_numa_nodes] |= 1 << i;
+            }
+        }
+    }
+
     if (kvm_enabled()) {
         int ret;
 
@@ -5274,6 +5390,15 @@
     machine->init(ram_size, vga_ram_size, boot_devices,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_cpumask[i] & (1 << env->cpu_index)) {
+                env->numa_node = i;
+            }
+        }
+    }
+
     current_machine = machine;
 
     /* Set KVM's vcpu state to qemu's initial CPUState. */
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7209)
+++ qemu-options.hx	(revision 7210)
@@ -47,6 +47,14 @@
 to 4.
 ETEXI
 
+DEF("numa", HAS_ARG, QEMU_OPTION_numa,
+    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n")
+STEXI
+@item -numa @var{opts}
+Simulate a multi node NUMA system. If mem and cpus are omitted, resources
+are split equally.
+ETEXI
+
 DEF("fda", HAS_ARG, QEMU_OPTION_fda,
     "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n")
 DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "")
Index: exec.c
===================================================================
--- exec.c	(revision 7209)
+++ exec.c	(revision 7210)
@@ -554,6 +554,7 @@
         cpu_index++;
     }
     env->cpu_index = cpu_index;
+    env->numa_node = 0;
     TAILQ_INIT(&env->breakpoints);
     TAILQ_INIT(&env->watchpoints);
     *penv = env;
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7209)
+++ sysemu.h	(revision 7210)
@@ -108,6 +108,10 @@
 extern int kqemu_allowed;
 #endif
 
+#define MAX_NODES 64
+extern int nb_numa_nodes;
+extern uint64_t node_mem[MAX_NODES];
+
 #define MAX_OPTION_ROMS 16
 extern const char *option_rom[MAX_OPTION_ROMS];
 extern int nb_option_roms;
@@ -248,7 +252,7 @@
 void do_usb_del(Monitor *mon, const char *devname);
 void usb_info(Monitor *mon);
 
-const char *get_opt_name(char *buf, int buf_size, const char *p);
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim);
 const char *get_opt_value(char *buf, int buf_size, const char *p);
 int get_param_value(char *buf, int buf_size,
                     const char *tag, const char *str);
Index: cpu-defs.h
===================================================================
--- cpu-defs.h	(revision 7209)
+++ cpu-defs.h	(revision 7210)
@@ -205,6 +205,7 @@
                                                                         \
     CPUState *next_cpu; /* next CPU sharing TB cache */                 \
     int cpu_index; /* CPU index (informative) */                        \
+    int numa_node; /* NUMA node this cpu is belonging to  */            \
     int running; /* Nonzero if cpu is currently running(usermode).  */  \
     /* user data */                                                     \
     void *opaque;                                                       \

 ------------------------------------------------------------------------
r7209 | malc | 2009-04-21 17:26:22 -0500 (Tue, 21 Apr 2009) | 3 lines
Changed paths:
   M /trunk/disas.c

Safety net for the cases where disassembler/translator disagree over instruction decoding

Noticed by Mark Karpeles.
 ------------------------------------------------------------------------

Index: disas.c
===================================================================
--- disas.c	(revision 7208)
+++ disas.c	(revision 7209)
@@ -222,6 +222,13 @@
 	fprintf(out, "\n");
 	if (count < 0)
 	    break;
+        if (size < count) {
+            fprintf(out,
+                    "Disassembler disagrees with translator over instruction "
+                    "decoding\n"
+                    "Please report this to qemu-devel@nongnu.org\n");
+            break;
+        }
     }
 }
 

 ------------------------------------------------------------------------
r7208 | aliguori | 2009-04-21 15:49:11 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/net.c

net: Prevent multiple slirp instances (Jan Kiszka)

The slirp stack is full of global variables which prevents instantiating
it more than once. Catch this during net_slirp_init to prevent more harm
later on.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7207)
+++ net.c	(revision 7208)
@@ -544,15 +544,27 @@
     slirp_input(buf, size);
 }
 
+static int slirp_in_use;
+
+static void net_slirp_cleanup(VLANClientState *vc)
+{
+    slirp_in_use = 0;
+}
+
 static int net_slirp_init(VLANState *vlan, const char *model, const char *name)
 {
+    if (slirp_in_use) {
+        /* slirp only supports a single instance so far */
+        return -1;
+    }
     if (!slirp_inited) {
         slirp_inited = 1;
         slirp_init(slirp_restrict, slirp_ip);
     }
     slirp_vc = qemu_new_vlan_client(vlan, model, name,
-                                    slirp_receive, NULL, NULL, NULL);
+                                    slirp_receive, NULL, net_slirp_cleanup, NULL);
     slirp_vc->info_str[0] = '\0';
+    slirp_in_use = 1;
     return 0;
 }
 

 ------------------------------------------------------------------------
r7204 | aliguori | 2009-04-21 14:56:44 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/monitor.c
   M /trunk/net.c
   M /trunk/net.h
   M /trunk/qemu-options.hx
   M /trunk/vl.c

slirp: Enhance host-guest redirection setup (Jan Kiszka)

Allow to establish a TCP/UDP connection redirection also via a monitor
command 'host_net_redir'. Moreover, assume TCP as connection type if
that parameter is omitted.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7203)
+++ vl.c	(revision 7204)
@@ -4589,7 +4589,7 @@
                 break;
 #endif
             case QEMU_OPTION_redir:
-                net_slirp_redir(optarg);
+                net_slirp_redir(NULL, optarg);
                 break;
 #endif
             case QEMU_OPTION_bt:
Index: net.c
===================================================================
--- net.c	(revision 7203)
+++ net.c	(revision 7204)
@@ -556,11 +556,11 @@
     return 0;
 }
 
-void net_slirp_redir(const char *redir_str)
+void net_slirp_redir(Monitor *mon, const char *redir_str)
 {
     int is_udp;
     char buf[256], *r;
-    const char *p;
+    const char *p, *errmsg;
     struct in_addr guest_addr;
     int host_port, guest_port;
 
@@ -571,41 +571,48 @@
 
     p = redir_str;
     if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
-        goto fail;
-    if (!strcmp(buf, "tcp")) {
+        goto fail_syntax;
+    if (!strcmp(buf, "tcp") || buf[0] == '\0') {
         is_udp = 0;
     } else if (!strcmp(buf, "udp")) {
         is_udp = 1;
     } else {
-        goto fail;
+        goto fail_syntax;
     }
 
     if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
-        goto fail;
+        goto fail_syntax;
     host_port = strtol(buf, &r, 0);
     if (r == buf)
-        goto fail;
+        goto fail_syntax;
 
     if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
-        goto fail;
+        goto fail_syntax;
     if (buf[0] == '\0') {
         pstrcpy(buf, sizeof(buf), "10.0.2.15");
     }
     if (!inet_aton(buf, &guest_addr))
-        goto fail;
+        goto fail_syntax;
 
     guest_port = strtol(p, &r, 0);
     if (r == p)
-        goto fail;
+        goto fail_syntax;
 
     if (slirp_redir(is_udp, host_port, guest_addr, guest_port) < 0) {
-        fprintf(stderr, "qemu: could not set up redirection\n");
-        exit(1);
+        errmsg = "could not set up redirection\n";
+        goto fail;
     }
     return;
+
+ fail_syntax:
+    errmsg = "invalid redirection format\n";
  fail:
-    fprintf(stderr, "qemu: syntax: -redir [tcp|udp]:host-port:[guest-host]:guest-port\n");
-    exit(1);
+    if (mon) {
+        monitor_printf(mon, errmsg);
+    } else {
+        fprintf(stderr, "qemu: %s", errmsg);
+        exit(1);
+    }
 }
 
 #ifndef _WIN32
Index: net.h
===================================================================
--- net.h	(revision 7203)
+++ net.h	(revision 7204)
@@ -112,7 +112,7 @@
 void net_client_uninit(NICInfo *nd);
 int net_client_parse(const char *str);
 void net_slirp_smb(const char *exported_dir);
-void net_slirp_redir(const char *redir_str);
+void net_slirp_redir(Monitor *mon, const char *redir_str);
 void net_cleanup(void);
 int slirp_is_inited(void);
 void net_client_check(void);
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7203)
+++ qemu-options.hx	(revision 7204)
@@ -943,7 +943,7 @@
 connections to the host port @var{host-port} to the guest
 @var{guest-host} on guest port @var{guest-port}. If @var{guest-host}
 is not specified, its value is 10.0.2.15 (default address given by the
-built-in DHCP server).
+built-in DHCP server). If no connection type is specified, TCP is used.
 
 For example, to redirect host X11 connection from screen 1 to guest
 screen 0, use the following:
Index: monitor.c
===================================================================
--- monitor.c	(revision 7203)
+++ monitor.c	(revision 7204)
@@ -1735,6 +1735,10 @@
       "tap|user|socket|vde|dump [options]", "add host VLAN client" },
     { "host_net_remove", "is", net_host_device_remove,
       "vlan_id name", "remove host VLAN client" },
+#ifdef CONFIG_SLIRP
+    { "host_net_redir", "s", net_slirp_redir,
+      "[tcp|udp]:host-port:[guest-host]:guest-port", "redirect TCP or UDP connections from host to guest (requires -net user)" },
+#endif
     { "balloon", "i", do_balloon,
       "target", "request VM to change it's memory allocation (in MB)" },
     { "set_link", "ss", do_set_link,

 ------------------------------------------------------------------------
r7203 | aliguori | 2009-04-21 14:56:41 -0500 (Tue, 21 Apr 2009) | 10 lines
Changed paths:
   M /trunk/net.c
   M /trunk/net.h

net: Untangle nested qemu_send_packet (Jan Kiszka)

Queue packets that are send during an ongoing packet delivery. This
ensures that packets will always arrive in their logical order at each
client of a VLAN. Currently, slirp generates such immediate relies, and
e.g. packet-sniffing clients on the same VLAN may get confused.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7202)
+++ net.c	(revision 7203)
@@ -403,22 +403,46 @@
     return 0;
 }
 
-void qemu_send_packet(VLANClientState *vc1, const uint8_t *buf, int size)
+static void
+qemu_deliver_packet(VLANClientState *sender, const uint8_t *buf, int size)
 {
-    VLANState *vlan = vc1->vlan;
     VLANClientState *vc;
 
-    if (vc1->link_down)
+    for (vc = sender->vlan->first_client; vc != NULL; vc = vc->next) {
+        if (vc != sender && !vc->link_down) {
+            vc->fd_read(vc->opaque, buf, size);
+        }
+    }
+}
+
+void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size)
+{
+    VLANState *vlan = vc->vlan;
+    VLANPacket *packet;
+
+    if (vc->link_down)
         return;
 
 #ifdef DEBUG_NET
     printf("vlan %d send:\n", vlan->id);
     hex_dump(stdout, buf, size);
 #endif
-    for(vc = vlan->first_client; vc != NULL; vc = vc->next) {
-        if (vc != vc1 && !vc->link_down) {
-            vc->fd_read(vc->opaque, buf, size);
+    if (vlan->delivering) {
+        packet = qemu_malloc(sizeof(VLANPacket) + size);
+        packet->next = vlan->send_queue;
+        packet->sender = vc;
+        packet->size = size;
+        memcpy(packet->data, buf, size);
+        vlan->send_queue = packet;
+    } else {
+        vlan->delivering = 1;
+        qemu_deliver_packet(vc, buf, size);
+        while ((packet = vlan->send_queue) != NULL) {
+            qemu_deliver_packet(packet->sender, packet->data, packet->size);
+            vlan->send_queue = packet->next;
+            qemu_free(packet);
         }
+        vlan->delivering = 0;
     }
 }
 
Index: net.h
===================================================================
--- net.h	(revision 7202)
+++ net.h	(revision 7203)
@@ -29,11 +29,22 @@
     char info_str[256];
 };
 
+typedef struct VLANPacket VLANPacket;
+
+struct VLANPacket {
+    struct VLANPacket *next;
+    VLANClientState *sender;
+    int size;
+    uint8_t data[0];
+};
+
 struct VLANState {
     int id;
     VLANClientState *first_client;
     struct VLANState *next;
     unsigned int nb_guest_devs, nb_host_devs;
+    VLANPacket *send_queue;
+    int delivering;
 };
 
 VLANState *qemu_find_vlan(int id);

 ------------------------------------------------------------------------
r7202 | aliguori | 2009-04-21 14:56:36 -0500 (Tue, 21 Apr 2009) | 8 lines
Changed paths:
   M /trunk/monitor.c

monitor: Allow host_net_add/remove for all targets (Jan Kiszka)

There is nothing x86-specific in host_net_add/remove, so allow them for
all targets.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: monitor.c
===================================================================
--- monitor.c	(revision 7201)
+++ monitor.c	(revision 7202)
@@ -1730,11 +1730,11 @@
                                         "add drive to PCI storage controller" },
     { "pci_add", "sss", pci_device_hot_add, "pci_addr=auto|[[:]:] nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" },
     { "pci_del", "s", pci_device_hot_remove, "pci_addr=[[:]:]", "hot remove PCI device" },
+#endif
     { "host_net_add", "ss?", net_host_device_add,
       "tap|user|socket|vde|dump [options]", "add host VLAN client" },
     { "host_net_remove", "is", net_host_device_remove,
       "vlan_id name", "remove host VLAN client" },
-#endif
     { "balloon", "i", do_balloon,
       "target", "request VM to change it's memory allocation (in MB)" },
     { "set_link", "ss", do_set_link,

 ------------------------------------------------------------------------
r7201 | aliguori | 2009-04-21 14:56:32 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/monitor.c
   M /trunk/net.c

monitor: Improve host_net_add (Jan Kiszka)

Fix the documentation of the host_net_add monitor command and allow the
user to pass no options at all. Moreover, inform the user on the
monitor terminal if a request failed.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7200)
+++ net.c	(revision 7201)
@@ -2042,7 +2042,9 @@
         monitor_printf(mon, "invalid host network device %s\n", device);
         return;
     }
-    net_client_init(device, opts);
+    if (net_client_init(device, opts ? : "") < 0) {
+        monitor_printf(mon, "adding host network device %s failed\n", device);
+    }
 }
 
 void net_host_device_remove(Monitor *mon, int vlan_id, const char *device)
Index: monitor.c
===================================================================
--- monitor.c	(revision 7200)
+++ monitor.c	(revision 7201)
@@ -1730,8 +1730,8 @@
                                         "add drive to PCI storage controller" },
     { "pci_add", "sss", pci_device_hot_add, "pci_addr=auto|[[:]:] nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" },
     { "pci_del", "s", pci_device_hot_remove, "pci_addr=[[:]:]", "hot remove PCI device" },
-    { "host_net_add", "ss", net_host_device_add,
-      "[tap,user,socket,vde,dump] options", "add host VLAN client" },
+    { "host_net_add", "ss?", net_host_device_add,
+      "tap|user|socket|vde|dump [options]", "add host VLAN client" },
     { "host_net_remove", "is", net_host_device_remove,
       "vlan_id name", "remove host VLAN client" },
 #endif

 ------------------------------------------------------------------------
r7200 | aliguori | 2009-04-21 14:56:28 -0500 (Tue, 21 Apr 2009) | 17 lines
Changed paths:
   M /trunk/monitor.c
   M /trunk/net.c
   M /trunk/qemu-options.hx

net: Add support for capturing VLANs (Jan Kiszka)

This patch is derived from Tristan Gingold's patch. It adds a new VLAN
client type that writes all traffic on the VLAN it is attached to into a
pcap file. Such a file can then be analyzed offline with Wireshark or
tcpdump.

Besides rebasing and some minor cleanups, the major differences to the
original version are:
 - support for enabling/disabling via the monitor (host_net_add/remove)
 - no special ordering of VLAN client list, qemu_send_packet now takes
   care of properly ordered packets
 - 64k default capturing limit (I hate tcpdump's default)

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7199)
+++ net.c	(revision 7200)
@@ -118,6 +118,7 @@
 #include "qemu-char.h"
 #include "audio/audio.h"
 #include "qemu_socket.h"
+#include "qemu-log.h"
 
 #if defined(CONFIG_SLIRP)
 #include "libslirp.h"
@@ -1558,6 +1559,106 @@
 
 }
 
+typedef struct DumpState {
+    VLANClientState *pcap_vc;
+    int fd;
+    int pcap_caplen;
+} DumpState;
+
+#define PCAP_MAGIC 0xa1b2c3d4
+
+struct pcap_file_hdr {
+    uint32_t magic;
+    uint16_t version_major;
+    uint16_t version_minor;
+    int32_t thiszone;
+    uint32_t sigfigs;
+    uint32_t snaplen;
+    uint32_t linktype;
+};
+
+struct pcap_sf_pkthdr {
+    struct {
+        int32_t tv_sec;
+        int32_t tv_usec;
+    } ts;
+    uint32_t caplen;
+    uint32_t len;
+};
+
+static void dump_receive(void *opaque, const uint8_t *buf, int size)
+{
+    DumpState *s = opaque;
+    struct pcap_sf_pkthdr hdr;
+    int64_t ts;
+    int caplen;
+
+    /* Early return in case of previous error. */
+    if (s->fd < 0) {
+        return;
+    }
+
+    ts = muldiv64 (qemu_get_clock(vm_clock),1000000, ticks_per_sec);
+    caplen = size > s->pcap_caplen ? s->pcap_caplen : size;
+
+    hdr.ts.tv_sec = ts / 1000000000LL;
+    hdr.ts.tv_usec = ts % 1000000;
+    hdr.caplen = caplen;
+    hdr.len = size;
+    if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
+        write(s->fd, buf, caplen) != caplen) {
+        qemu_log("-net dump write error - stop dump\n");
+        close(s->fd);
+        s->fd = -1;
+    }
+}
+
+static void net_dump_cleanup(VLANClientState *vc)
+{
+    DumpState *s = vc->opaque;
+
+    close(s->fd);
+    qemu_free(s);
+}
+
+static int net_dump_init(VLANState *vlan, const char *device,
+                         const char *name, const char *filename, int len)
+{
+    struct pcap_file_hdr hdr;
+    DumpState *s;
+
+    s = qemu_malloc(sizeof(DumpState));
+
+    s->fd = open(filename, O_CREAT | O_WRONLY, 0644);
+    if (s->fd < 0) {
+        fprintf(stderr, "-net dump: can't open %s\n", filename);
+        return -1;
+    }
+
+    s->pcap_caplen = len;
+
+    hdr.magic = PCAP_MAGIC;
+    hdr.version_major = 2;
+    hdr.version_minor = 4;
+    hdr.thiszone = 0;
+    hdr.sigfigs = 0;
+    hdr.snaplen = s->pcap_caplen;
+    hdr.linktype = 1;
+
+    if (write(s->fd, &hdr, sizeof(hdr)) < sizeof(hdr)) {
+        perror("-net dump write error");
+        close(s->fd);
+        qemu_free(s);
+        return -1;
+    }
+
+    s->pcap_vc = qemu_new_vlan_client(vlan, device, name, dump_receive, NULL,
+                                      net_dump_cleanup, s);
+    snprintf(s->pcap_vc->info_str, sizeof(s->pcap_vc->info_str),
+             "dump to %s (len=%d)", filename, len);
+    return 0;
+}
+
 /* find or alloc a new VLAN */
 VLANState *qemu_find_vlan(int id)
 {
@@ -1883,7 +1984,17 @@
 	ret = net_vde_init(vlan, device, name, vde_sock, vde_port, vde_group, vde_mode);
     } else
 #endif
-    {
+    if (!strcmp(device, "dump")) {
+        int len = 65536;
+
+        if (get_param_value(buf, sizeof(buf), "len", p) > 0) {
+            len = strtol(buf, NULL, 0);
+        }
+        if (!get_param_value(buf, sizeof(buf), "file", p)) {
+            snprintf(buf, sizeof(buf), "qemu-vlan%d.pcap", vlan_id);
+        }
+        ret = net_dump_init(vlan, device, name, buf, len);
+    } else {
         fprintf(stderr, "Unknown network device: %s\n", device);
         ret = -1;
         goto out;
@@ -1908,7 +2019,7 @@
 static int net_host_check_device(const char *device)
 {
     int i;
-    const char *valid_param_list[] = { "tap", "socket"
+    const char *valid_param_list[] = { "tap", "socket", "dump"
 #ifdef CONFIG_SLIRP
                                        ,"user"
 #endif
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7199)
+++ qemu-options.hx	(revision 7200)
@@ -745,6 +745,8 @@
     "                Use group 'groupname' and mode 'octalmode' to change default\n"
     "                ownership and permissions for communication port.\n"
 #endif
+    "-net dump[,vlan=n][,file=f][,len=n]\n"
+    "                dump traffic on vlan 'n' to file 'f' (max n bytes per packet)\n"
     "-net none       use it alone to have zero network devices; if no -net option\n"
     "                is provided, the default is '-net nic -net user'\n")
 STEXI
@@ -865,6 +867,11 @@
 qemu linux.img -net nic -net vde,sock=/tmp/myswitch
 @end example
 
+@item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
+Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
+At most @var{len} bytes (64k by default) per packet are stored. The file format is
+libpcap, so it can be analyzed with tools such as tcpdump or Wireshark.
+
 @item -net none
 Indicate that no network devices should be configured. It is used to
 override the default configuration (@option{-net nic -net user}) which
Index: monitor.c
===================================================================
--- monitor.c	(revision 7199)
+++ monitor.c	(revision 7200)
@@ -1731,7 +1731,7 @@
     { "pci_add", "sss", pci_device_hot_add, "pci_addr=auto|[[:]:] nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" },
     { "pci_del", "s", pci_device_hot_remove, "pci_addr=[[:]:]", "hot remove PCI device" },
     { "host_net_add", "ss", net_host_device_add,
-      "[tap,user,socket,vde] options", "add host VLAN client" },
+      "[tap,user,socket,vde,dump] options", "add host VLAN client" },
     { "host_net_remove", "is", net_host_device_remove,
       "vlan_id name", "remove host VLAN client" },
 #endif

 ------------------------------------------------------------------------
r7199 | aliguori | 2009-04-21 14:56:23 -0500 (Tue, 21 Apr 2009) | 5 lines
Changed paths:
   M /trunk/vl.c

Allow empty params for check_params (Jan Kiszka)

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7198)
+++ vl.c	(revision 7199)
@@ -1934,7 +1934,7 @@
     int i;
 
     p = str;
-    for(;;) {
+    while (*p != '\0') {
         p = get_opt_name(buf, buf_size, p);
         if (*p != '=')
             return -1;

 ------------------------------------------------------------------------
r7198 | aliguori | 2009-04-21 14:56:20 -0500 (Tue, 21 Apr 2009) | 15 lines
Changed paths:
   M /trunk/slirp/bootp.c
   M /trunk/slirp/bootp.h

slirp: Handle DHCP requests for specific IP (Jan Kiszka)

This adds proper handling of the ciaddr field as well as the "Requested
IP Address" option to slirp's DHCP server. If the client requests an
invalid or used IP, a NAK reply is sent, if it requests a specific but
valid IP, this is now respected.

NAK'ing invalid IPs is specifically useful when changing the slirp IP
range via '-net user,ip=...' while the client saved its previously used
address and tries to reacquire it. Now this will be NAK'ed and the
client will start a new discovery round.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: slirp/bootp.c
===================================================================
--- slirp/bootp.c	(revision 7197)
+++ slirp/bootp.c	(revision 7198)
@@ -66,6 +66,24 @@
     return bc;
 }
 
+static BOOTPClient *request_addr(const struct in_addr *paddr,
+                                 const uint8_t *macaddr)
+{
+    uint32_t req_addr = ntohl(paddr->s_addr);
+    uint32_t spec_addr = ntohl(special_addr.s_addr);
+    BOOTPClient *bc;
+
+    if (req_addr >= (spec_addr | START_ADDR) &&
+        req_addr < (spec_addr | (NB_ADDR + START_ADDR))) {
+        bc = &bootp_clients[(req_addr & 0xff) - START_ADDR];
+        if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) {
+            bc->allocated = 1;
+            return bc;
+        }
+    }
+    return NULL;
+}
+
 static BOOTPClient *find_addr(struct in_addr *paddr, const uint8_t *macaddr)
 {
     BOOTPClient *bc;
@@ -83,18 +101,17 @@
     return bc;
 }
 
-static void dhcp_decode(const uint8_t *buf, int size,
-                        int *pmsg_type)
+static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type,
+                        const struct in_addr **preq_addr)
 {
     const uint8_t *p, *p_end;
     int len, tag;
 
     *pmsg_type = 0;
+    *preq_addr = NULL;
 
-    p = buf;
-    p_end = buf + size;
-    if (size < 5)
-        return;
+    p = bp->bp_vend;
+    p_end = p + DHCP_OPT_LEN;
     if (memcmp(p, rfc1533_cookie, 4) != 0)
         return;
     p += 4;
@@ -109,34 +126,46 @@
             if (p >= p_end)
                 break;
             len = *p++;
-            dprintf("dhcp: tag=0x%02x len=%d\n", tag, len);
+            dprintf("dhcp: tag=%d len=%d\n", tag, len);
 
             switch(tag) {
             case RFC2132_MSG_TYPE:
                 if (len >= 1)
                     *pmsg_type = p[0];
                 break;
+            case RFC2132_REQ_ADDR:
+                if (len >= 4)
+                    *preq_addr = (struct in_addr *)p;
+                break;
             default:
                 break;
             }
             p += len;
         }
     }
+    if (*pmsg_type == DHCPREQUEST && !*preq_addr && bp->bp_ciaddr.s_addr) {
+        *preq_addr = &bp->bp_ciaddr;
+    }
 }
 
-static void bootp_reply(struct bootp_t *bp)
+static void bootp_reply(const struct bootp_t *bp)
 {
-    BOOTPClient *bc;
+    BOOTPClient *bc = NULL;
     struct mbuf *m;
     struct bootp_t *rbp;
     struct sockaddr_in saddr, daddr;
     struct in_addr dns_addr;
+    const struct in_addr *preq_addr;
     int dhcp_msg_type, val;
     uint8_t *q;
 
     /* extract exact DHCP msg type */
-    dhcp_decode(bp->bp_vend, DHCP_OPT_LEN, &dhcp_msg_type);
-    dprintf("bootp packet op=%d msgtype=%d\n", bp->bp_op, dhcp_msg_type);
+    dhcp_decode(bp, &dhcp_msg_type, &preq_addr);
+    dprintf("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type);
+    if (preq_addr)
+        dprintf(" req_addr=%08x\n", ntohl(preq_addr->s_addr));
+    else
+        dprintf("\n");
 
     if (dhcp_msg_type == 0)
         dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */
@@ -155,13 +184,29 @@
     memset(rbp, 0, sizeof(struct bootp_t));
 
     if (dhcp_msg_type == DHCPDISCOVER) {
-    new_addr:
-        bc = get_new_addr(&daddr.sin_addr);
+        if (preq_addr) {
+            bc = request_addr(preq_addr, client_ethaddr);
+            if (bc) {
+                daddr.sin_addr = *preq_addr;
+            }
+        }
         if (!bc) {
-            dprintf("no address left\n");
-            return;
+         new_addr:
+            bc = get_new_addr(&daddr.sin_addr);
+            if (!bc) {
+                dprintf("no address left\n");
+                return;
+            }
         }
         memcpy(bc->macaddr, client_ethaddr, 6);
+    } else if (preq_addr) {
+        bc = request_addr(preq_addr, client_ethaddr);
+        if (bc) {
+            daddr.sin_addr = *preq_addr;
+            memcpy(bc->macaddr, client_ethaddr, 6);
+        } else {
+            daddr.sin_addr.s_addr = 0;
+        }
     } else {
         bc = find_addr(&daddr.sin_addr, bp->bp_hwaddr);
         if (!bc) {
@@ -171,12 +216,6 @@
         }
     }
 
-    if (bootp_filename)
-        snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s",
-                 bootp_filename);
-
-    dprintf("offered addr=%08x\n", ntohl(daddr.sin_addr.s_addr));
-
     saddr.sin_addr.s_addr = htonl(ntohl(special_addr.s_addr) | CTL_ALIAS);
     saddr.sin_port = htons(BOOTP_SERVER);
 
@@ -191,24 +230,29 @@
     rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */
     rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */
 
-    daddr.sin_addr.s_addr = 0xffffffffu;
-
     q = rbp->bp_vend;
     memcpy(q, rfc1533_cookie, 4);
     q += 4;
 
-    if (dhcp_msg_type == DHCPDISCOVER) {
-        *q++ = RFC2132_MSG_TYPE;
-        *q++ = 1;
-        *q++ = DHCPOFFER;
-    } else if (dhcp_msg_type == DHCPREQUEST) {
-        *q++ = RFC2132_MSG_TYPE;
-        *q++ = 1;
-        *q++ = DHCPACK;
-    }
+    if (bc) {
+        dprintf("%s addr=%08x\n",
+                (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed",
+                ntohl(daddr.sin_addr.s_addr));
 
-    if (dhcp_msg_type == DHCPDISCOVER ||
-        dhcp_msg_type == DHCPREQUEST) {
+        if (dhcp_msg_type == DHCPDISCOVER) {
+            *q++ = RFC2132_MSG_TYPE;
+            *q++ = 1;
+            *q++ = DHCPOFFER;
+        } else /* DHCPREQUEST */ {
+            *q++ = RFC2132_MSG_TYPE;
+            *q++ = 1;
+            *q++ = DHCPACK;
+        }
+
+        if (bootp_filename)
+            snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s",
+                     bootp_filename);
+
         *q++ = RFC2132_SRV_ID;
         *q++ = 4;
         memcpy(q, &saddr.sin_addr, 4);
@@ -247,9 +291,24 @@
             memcpy(q, slirp_hostname, val);
             q += val;
         }
+    } else {
+        static const char nak_msg[] = "requested address not available";
+
+        dprintf("nak'ed addr=%08x\n", ntohl(preq_addr->s_addr));
+
+        *q++ = RFC2132_MSG_TYPE;
+        *q++ = 1;
+        *q++ = DHCPNAK;
+
+        *q++ = RFC2132_MESSAGE;
+        *q++ = sizeof(nak_msg) - 1;
+        memcpy(q, nak_msg, sizeof(nak_msg) - 1);
+        q += sizeof(nak_msg) - 1;
     }
     *q++ = RFC1533_END;
 
+    daddr.sin_addr.s_addr = 0xffffffffu;
+
     m->m_len = sizeof(struct bootp_t) -
         sizeof(struct ip) - sizeof(struct udphdr);
     udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY);
Index: slirp/bootp.h
===================================================================
--- slirp/bootp.h	(revision 7197)
+++ slirp/bootp.h	(revision 7198)
@@ -63,6 +63,7 @@
 #define RFC2132_MSG_TYPE	53
 #define RFC2132_SRV_ID		54
 #define RFC2132_PARAM_LIST	55
+#define RFC2132_MESSAGE		56
 #define RFC2132_MAX_SIZE	57
 #define RFC2132_RENEWAL_TIME    58
 #define RFC2132_REBIND_TIME     59
@@ -71,6 +72,7 @@
 #define DHCPOFFER		2
 #define DHCPREQUEST		3
 #define DHCPACK			5
+#define DHCPNAK			6
 
 #define RFC1533_VENDOR_MAJOR	0
 #define RFC1533_VENDOR_MINOR	0

 ------------------------------------------------------------------------
r7197 | aliguori | 2009-04-21 14:56:15 -0500 (Tue, 21 Apr 2009) | 10 lines
Changed paths:
   M /trunk/net.c

net: Add parameter checks for VLAN clients (Jan Kiszka)

This aims at helping the user to find typos or other mistakes in
parameter lists passed for VLAN client initialization. The existing
parsing infrastructure does not allow a leaner approach, but this is
better than nothing IMHO.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7196)
+++ net.c	(revision 7197)
@@ -1622,6 +1622,9 @@
 
 int net_client_init(const char *device, const char *p)
 {
+    static const char * const fd_params[] = {
+        "vlan", "name", "fd", NULL
+    };
     char buf[1024];
     int vlan_id, ret;
     VLANState *vlan;
@@ -1637,10 +1640,18 @@
         name = strdup(buf);
     }
     if (!strcmp(device, "nic")) {
+        static const char * const nic_params[] = {
+            "vlan", "name", "macaddr", "model", NULL
+        };
         NICInfo *nd;
         uint8_t *macaddr;
         int idx = nic_get_free_idx();
 
+        if (check_params(buf, sizeof(buf), nic_params, p) < 0) {
+            fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                    buf, p);
+            return -1;
+        }
         if (idx == -1 || nb_nics >= MAX_NICS) {
             fprintf(stderr, "Too Many NICs\n");
             ret = -1;
@@ -1674,12 +1685,24 @@
         ret = idx;
     } else
     if (!strcmp(device, "none")) {
+        if (*p != '\0') {
+            fprintf(stderr, "qemu: 'none' takes no parameters\n");
+            return -1;
+        }
         /* does nothing. It is needed to signal that no network cards
            are wanted */
         ret = 0;
     } else
 #ifdef CONFIG_SLIRP
     if (!strcmp(device, "user")) {
+        static const char * const slirp_params[] = {
+            "vlan", "name", "hostname", "restrict", "ip", NULL
+        };
+        if (check_params(buf, sizeof(buf), slirp_params, p) < 0) {
+            fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                    buf, p);
+            return -1;
+        }
         if (get_param_value(buf, sizeof(buf), "hostname", p)) {
             pstrcpy(slirp_hostname, sizeof(slirp_hostname), buf);
         }
@@ -1721,7 +1744,16 @@
 #endif
 #ifdef _WIN32
     if (!strcmp(device, "tap")) {
+        static const char * const tap_params[] = {
+            "vlan", "name", "ifname", NULL
+        };
         char ifname[64];
+
+        if (check_params(buf, sizeof(buf), tap_params, p) < 0) {
+            fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                    buf, p);
+            return -1;
+        }
         if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) {
             fprintf(stderr, "tap: no interface name\n");
             ret = -1;
@@ -1738,11 +1770,24 @@
         int fd;
         vlan->nb_host_devs++;
         if (get_param_value(buf, sizeof(buf), "fd", p) > 0) {
+            if (check_params(buf, sizeof(buf), fd_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             fd = strtol(buf, NULL, 0);
             fcntl(fd, F_SETFL, O_NONBLOCK);
             net_tap_fd_init(vlan, device, name, fd);
             ret = 0;
         } else {
+            static const char * const tap_params[] = {
+                "vlan", "name", "ifname", "script", "downscript", NULL
+            };
+            if (check_params(buf, sizeof(buf), tap_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) {
                 ifname[0] = '\0';
             }
@@ -1759,15 +1804,44 @@
     if (!strcmp(device, "socket")) {
         if (get_param_value(buf, sizeof(buf), "fd", p) > 0) {
             int fd;
+            if (check_params(buf, sizeof(buf), fd_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             fd = strtol(buf, NULL, 0);
             ret = -1;
             if (net_socket_fd_init(vlan, device, name, fd, 1))
                 ret = 0;
         } else if (get_param_value(buf, sizeof(buf), "listen", p) > 0) {
+            static const char * const listen_params[] = {
+                "vlan", "name", "listen", NULL
+            };
+            if (check_params(buf, sizeof(buf), listen_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             ret = net_socket_listen_init(vlan, device, name, buf);
         } else if (get_param_value(buf, sizeof(buf), "connect", p) > 0) {
+            static const char * const connect_params[] = {
+                "vlan", "name", "connect", NULL
+            };
+            if (check_params(buf, sizeof(buf), connect_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             ret = net_socket_connect_init(vlan, device, name, buf);
         } else if (get_param_value(buf, sizeof(buf), "mcast", p) > 0) {
+            static const char * const mcast_params[] = {
+                "vlan", "name", "mcast", NULL
+            };
+            if (check_params(buf, sizeof(buf), mcast_params, p) < 0) {
+                fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                        buf, p);
+                return -1;
+            }
             ret = net_socket_mcast_init(vlan, device, name, buf);
         } else {
             fprintf(stderr, "Unknown socket options: %s\n", p);
@@ -1778,8 +1852,17 @@
     } else
 #ifdef CONFIG_VDE
     if (!strcmp(device, "vde")) {
+        static const char * const vde_params[] = {
+            "vlan", "name", "sock", "port", "group", "mode", NULL
+        };
         char vde_sock[1024], vde_group[512];
 	int vde_port, vde_mode;
+
+        if (check_params(buf, sizeof(buf), vde_params, p) < 0) {
+            fprintf(stderr, "qemu: invalid parameter '%s' in '%s'\n",
+                    buf, p);
+            return -1;
+        }
         vlan->nb_host_devs++;
         if (get_param_value(vde_sock, sizeof(vde_sock), "sock", p) <= 0) {
 	    vde_sock[0] = '\0';

 ------------------------------------------------------------------------
r7196 | aliguori | 2009-04-21 14:56:11 -0500 (Tue, 21 Apr 2009) | 9 lines
Changed paths:
   M /trunk/net.c

net: Fix -net socket,listen (Jan Kiszka)

In case no symbolic name is provided when requesting VLAN connection via
listening TCP socket ('-net socket,listen=...'), qemu crashes. This
fixes the cause.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7195)
+++ net.c	(revision 7196)
@@ -1472,7 +1472,7 @@
     }
     s->vlan = vlan;
     s->model = strdup(model);
-    s->name = strdup(name);
+    s->name = name ? strdup(name) : NULL;
     s->fd = fd;
     qemu_set_fd_handler(fd, net_socket_accept, NULL, s);
     return 0;

 ------------------------------------------------------------------------
r7195 | aliguori | 2009-04-21 14:56:08 -0500 (Tue, 21 Apr 2009) | 7 lines
Changed paths:
   M /trunk/net.c

net: Check device passed to host_net_remove (Jan Kiszka)

Make sure that we do not delete guest NICs via host_net_remove.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7194)
+++ net.c	(revision 7195)
@@ -1858,14 +1858,20 @@
 
     vlan = qemu_find_vlan(vlan_id);
 
-   for(vc = vlan->first_client; vc != NULL; vc = vc->next)
-        if (!strcmp(vc->name, device))
+    for (vc = vlan->first_client; vc != NULL; vc = vc->next) {
+        if (!strcmp(vc->name, device)) {
             break;
+        }
+    }
 
     if (!vc) {
         monitor_printf(mon, "can't find device %s\n", device);
         return;
     }
+    if (!net_host_check_device(vc->model)) {
+        monitor_printf(mon, "invalid host network device %s\n", device);
+        return;
+    }
     qemu_del_vlan_client(vc);
 }
 

 ------------------------------------------------------------------------
r7194 | pbrook | 2009-04-20 20:41:10 -0500 (Mon, 20 Apr 2009) | 15 lines
Changed paths:
   M /trunk/linux-user/main.c
   M /trunk/linux-user/mips/syscall.h
   M /trunk/linux-user/signal.c

MIPS signal handling fixes.

Also fixes a register corruption bug in do_sigreturn. When "returning"
from sigreturn we are actually restoring the virtual cpu state from the
signal frame.  This is actually surprisingly hard to observe in practice.

Typically an thread be blocked in a FUTEX_WAIT call when the signal arrives,
so the effect is a spurious syscall success and the introduction of a
subtle race condition.

On x86/arm a syscall modifies a single word sized register, so
do_sigreturn can just return that value.  On MIPS a syscall clobbers
multiple registers, so we need additional smarts.  My solution is to
invent a magic errno value that means "don't touch CPU state".

 ------------------------------------------------------------------------

Index: linux-user/mips/syscall.h
===================================================================
--- linux-user/mips/syscall.h	(revision 7193)
+++ linux-user/mips/syscall.h	(revision 7194)
@@ -221,4 +221,7 @@
 
 
 
+/* Nasty hack: define a fake errno value for use by sigreturn.  */
+#define TARGET_QEMU_ESIGRETURN 255
+
 #define UNAME_MACHINE "mips"
Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c	(revision 7193)
+++ linux-user/signal.c	(revision 7194)
@@ -2313,6 +2313,21 @@
     target_sigset_t sf_mask;
 };
 
+struct target_ucontext {
+    target_ulong uc_flags;
+    target_ulong uc_link;
+    target_stack_t uc_stack;
+    struct target_sigcontext uc_mcontext;
+    target_sigset_t uc_sigmask;
+};
+
+struct target_rt_sigframe {
+    uint32_t rs_ass[4];               /* argument save space for o32 */
+    uint32_t rs_code[2];              /* signal trampoline */
+    struct target_siginfo rs_info;
+    struct target_ucontext rs_uc;
+};
+
 /* Install trampoline to jump back from signal handler */
 static inline int install_sigtramp(unsigned int *tramp,   unsigned int syscall)
 {
@@ -2592,7 +2607,7 @@
     /* I am not sure this is right, but it seems to work
     * maybe a problem with nested signals ? */
     regs->CP0_EPC = 0;
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 
 badframe:
     force_sig(TARGET_SIGSEGV/*, current*/);
@@ -2603,13 +2618,95 @@
                            target_siginfo_t *info,
 			   target_sigset_t *set, CPUState *env)
 {
-    fprintf(stderr, "setup_rt_frame: not implemented\n");
+    struct target_rt_sigframe *frame;
+    abi_ulong frame_addr;
+    int i;
+
+    frame_addr = get_sigframe(ka, env, sizeof(*frame));
+    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
+	goto give_sigsegv;
+
+    install_sigtramp(frame->rs_code, TARGET_NR_rt_sigreturn);
+
+    copy_siginfo_to_user(&frame->rs_info, info);
+
+    __put_user(0, &frame->rs_uc.uc_flags);
+    __put_user(0, &frame->rs_uc.uc_link);
+    __put_user(target_sigaltstack_used.ss_sp, &frame->rs_uc.uc_stack.ss_sp);
+    __put_user(target_sigaltstack_used.ss_size, &frame->rs_uc.uc_stack.ss_size);
+    __put_user(sas_ss_flags(get_sp_from_cpustate(env)),
+               &frame->rs_uc.uc_stack.ss_flags);
+
+    setup_sigcontext(env, &frame->rs_uc.uc_mcontext);
+
+    for(i = 0; i < TARGET_NSIG_WORDS; i++) {
+        __put_user(set->sig[i], &frame->rs_uc.uc_sigmask.sig[i]);
+    }
+
+    /*
+    * Arguments to signal handler:
+    *
+    *   a0 = signal number
+    *   a1 = pointer to struct siginfo
+    *   a2 = pointer to struct ucontext
+    *
+    * $25 and PC point to the signal handler, $29 points to the
+    * struct sigframe.
+    */
+    env->active_tc.gpr[ 4] = sig;
+    env->active_tc.gpr[ 5] = frame_addr
+                             + offsetof(struct target_rt_sigframe, rs_info);
+    env->active_tc.gpr[ 6] = frame_addr
+                             + offsetof(struct target_rt_sigframe, rs_uc);
+    env->active_tc.gpr[29] = frame_addr;
+    env->active_tc.gpr[31] = frame_addr
+                             + offsetof(struct target_rt_sigframe, rs_code);
+    /* The original kernel code sets CP0_EPC to the handler
+    * since it returns to userland using eret
+    * we cannot do this here, and we must set PC directly */
+    env->active_tc.PC = env->active_tc.gpr[25] = ka->_sa_handler;
+    unlock_user_struct(frame, frame_addr, 1);
+    return;
+
+give_sigsegv:
+    unlock_user_struct(frame, frame_addr, 1);
+    force_sig(TARGET_SIGSEGV/*, current*/);
+    return;
 }
 
 long do_rt_sigreturn(CPUState *env)
 {
-    fprintf(stderr, "do_rt_sigreturn: not implemented\n");
-    return -TARGET_ENOSYS;
+    struct target_rt_sigframe *frame;
+    abi_ulong frame_addr;
+    sigset_t blocked;
+
+#if defined(DEBUG_SIGNAL)
+    fprintf(stderr, "do_rt_sigreturn\n");
+#endif
+    frame_addr = env->active_tc.gpr[29];
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
+   	goto badframe;
+
+    target_to_host_sigset(&blocked, &frame->rs_uc.uc_sigmask);
+    sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+    if (restore_sigcontext(env, &frame->rs_uc.uc_mcontext))
+        goto badframe;
+
+    if (do_sigaltstack(frame_addr +
+		       offsetof(struct target_rt_sigframe, rs_uc.uc_stack),
+		       0, get_sp_from_cpustate(env)) == -EFAULT)
+        goto badframe;
+
+    env->active_tc.PC = env->CP0_EPC;
+    /* I am not sure this is right, but it seems to work
+    * maybe a problem with nested signals ? */
+    env->CP0_EPC = 0;
+    return -TARGET_QEMU_ESIGRETURN;
+
+badframe:
+    force_sig(TARGET_SIGSEGV/*, current*/);
+    return 0;
 }
 
 #elif defined(TARGET_SH4)
Index: linux-user/main.c
===================================================================
--- linux-user/main.c	(revision 7193)
+++ linux-user/main.c	(revision 7194)
@@ -1858,6 +1858,11 @@
                                  env->active_tc.gpr[7],
                                  arg5, arg6/*, arg7, arg8*/);
             }
+            if (ret == -TARGET_QEMU_ESIGRETURN) {
+                /* Returning from a successful sigreturn syscall.
+                   Avoid clobbering register state.  */
+                break;
+            }
             if ((unsigned int)ret >= (unsigned int)(-1133)) {
                 env->active_tc.gpr[7] = 1; /* error flag */
                 ret = -ret;

 ------------------------------------------------------------------------
r7193 | pbrook | 2009-04-20 20:03:10 -0500 (Mon, 20 Apr 2009) | 4 lines
Changed paths:
   M /trunk/linux-user/main.c

MIPS: Raise SIGSEGV, not SIGILL when an access faults.

Signed-off-by: Paul Brook 

 ------------------------------------------------------------------------

Index: linux-user/main.c
===================================================================
--- linux-user/main.c	(revision 7192)
+++ linux-user/main.c	(revision 7193)
@@ -1868,6 +1868,13 @@
             break;
         case EXCP_TLBL:
         case EXCP_TLBS:
+            info.si_signo = TARGET_SIGSEGV;
+            info.si_errno = 0;
+            /* XXX: check env->error_code */
+            info.si_code = TARGET_SEGV_MAPERR;
+            info._sifields._sigfault._addr = env->CP0_BadVAddr;
+            queue_signal(env, info.si_signo, &info);
+            break;
         case EXCP_CpU:
         case EXCP_RI:
             info.si_signo = TARGET_SIGILL;

 ------------------------------------------------------------------------
r7192 | pbrook | 2009-04-20 19:59:40 -0500 (Mon, 20 Apr 2009) | 4 lines
Changed paths:
   M /trunk/linux-user/syscall_defs.h

Fix target_siginfo ordering for MIPS.

Signed-off-by: Paul Brook 

 ------------------------------------------------------------------------

Index: linux-user/syscall_defs.h
===================================================================
--- linux-user/syscall_defs.h	(revision 7191)
+++ linux-user/syscall_defs.h	(revision 7192)
@@ -504,9 +504,15 @@
 #define TARGET_SI_PAD_SIZE	((TARGET_SI_MAX_SIZE/sizeof(int)) - 3)
 
 typedef struct target_siginfo {
+#ifdef TARGET_MIPS
 	int si_signo;
+	int si_code;
 	int si_errno;
+#else
+	int si_signo;
+	int si_errno;
 	int si_code;
+#endif
 
 	union {
 		int _pad[TARGET_SI_PAD_SIZE];

 ------------------------------------------------------------------------
r7191 | pbrook | 2009-04-20 18:55:57 -0500 (Mon, 20 Apr 2009) | 4 lines
Changed paths:
   M /trunk/target-mips/translate.c

Enable access to SYNCI_Step register in usermode emulation.

Signed-off-by: Paul Brook 

 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7190)
+++ target-mips/translate.c	(revision 7191)
@@ -8568,6 +8568,8 @@
     /* Minimal init */
 #if defined(CONFIG_USER_ONLY)
     env->hflags = MIPS_HFLAG_UM;
+    /* Enable access to the SYNCI_Step register.  */
+    env->CP0_HWREna |= (1 << 1);
 #else
     if (env->hflags & MIPS_HFLAG_BMASK) {
         /* If the exception was raised from a delay slot,

 ------------------------------------------------------------------------
r7190 | blueswir1 | 2009-04-19 05:25:05 -0500 (Sun, 19 Apr 2009) | 4 lines
Changed paths:
   M /trunk/Makefile

Build system: Fix dependency of qemu.1

Signed-off-by: Jan Kiszka 

 ------------------------------------------------------------------------

Index: Makefile
===================================================================
--- Makefile	(revision 7189)
+++ Makefile	(revision 7190)
@@ -310,7 +310,7 @@
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx
 	$(call quiet-command,sh $(SRC_PATH)/hxtool -t < $< > $@,"  GEN   $@")
 
-qemu.1: qemu-doc.texi
+qemu.1: qemu-doc.texi qemu-options.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/texi2pod.pl $< qemu.pod && \
 	  pod2man --section=1 --center=" " --release=" " qemu.pod > $@, \

 ------------------------------------------------------------------------
r7189 | blueswir1 | 2009-04-19 05:18:01 -0500 (Sun, 19 Apr 2009) | 7 lines
Changed paths:
   M /trunk/configure
   M /trunk/cpu-all.h
   M /trunk/cpu-exec.c
   M /trunk/exec-all.h
   M /trunk/exec.c
   M /trunk/hw/pc.c
   M /trunk/kqemu.c
   M /trunk/monitor.c
   M /trunk/osdep.c
   M /trunk/qemu-options.hx
   M /trunk/softmmu_template.h
   M /trunk/sysemu.h
   M /trunk/target-i386/cpu.h
   M /trunk/target-i386/helper.c
   M /trunk/target-i386/op_helper.c
   M /trunk/vl.c

kqemu: merge CONFIG_KQEMU and USE_KQEMU

Basically a recursive ":%s/USE_KQEMU/CONFIG_KQEMU/g".

Signed-off-by: Paul Bolle 


 ------------------------------------------------------------------------

Index: osdep.c
===================================================================
--- osdep.c	(revision 7188)
+++ osdep.c	(revision 7189)
@@ -69,7 +69,7 @@
 
 #else
 
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
 
 #ifdef __OpenBSD__
 #include 
@@ -197,7 +197,7 @@
 /* alloc shared memory pages */
 void *qemu_vmalloc(size_t size)
 {
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
     if (kqemu_allowed)
         return kqemu_vmalloc(size);
 #endif
@@ -206,7 +206,7 @@
 
 void qemu_vfree(void *ptr)
 {
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
     if (kqemu_allowed)
         kqemu_vfree(ptr);
 #endif
Index: vl.c
===================================================================
--- vl.c	(revision 7188)
+++ vl.c	(revision 7189)
@@ -435,7 +435,7 @@
 {
     LOG_IOPORT("outb: %04x %02x\n", addr, val);
     ioport_write(0, addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -445,7 +445,7 @@
 {
     LOG_IOPORT("outw: %04x %04x\n", addr, val);
     ioport_write(1, addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -455,7 +455,7 @@
 {
     LOG_IOPORT("outl: %04x %08x\n", addr, val);
     ioport_write(2, addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -466,7 +466,7 @@
     int val;
     val = ioport_read(0, addr);
     LOG_IOPORT("inb : %04x %02x\n", addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -478,7 +478,7 @@
     int val;
     val = ioport_read(1, addr);
     LOG_IOPORT("inw : %04x %04x\n", addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -490,7 +490,7 @@
     int val;
     val = ioport_read(2, addr);
     LOG_IOPORT("inl : %04x %08x\n", addr, val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env)
         env->last_io_time = cpu_get_time_fast();
 #endif
@@ -1357,7 +1357,7 @@
         if (env) {
             /* stop the currently executing cpu because a timer occured */
             cpu_exit(env);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
             if (env->kqemu_enabled) {
                 kqemu_cpu_interrupt(env);
             }
@@ -3343,7 +3343,7 @@
     CPUState *env = cpu_single_env;
     if (env) {
         cpu_exit(env);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
         if (env->kqemu_enabled) {
             kqemu_cpu_interrupt(env);
         }
@@ -4634,7 +4634,7 @@
 
                 /* On 32-bit hosts, QEMU is limited by virtual address space */
                 if (value > (2047 << 20)
-#ifndef USE_KQEMU
+#ifndef CONFIG_KQEMU
                     && HOST_LONG_BITS == 32
 #endif
                     ) {
@@ -4809,7 +4809,7 @@
                 }
                 break;
 #endif
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
             case QEMU_OPTION_no_kqemu:
                 kqemu_allowed = 0;
                 break;
@@ -4820,7 +4820,7 @@
 #ifdef CONFIG_KVM
             case QEMU_OPTION_enable_kvm:
                 kvm_allowed = 1;
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
                 kqemu_allowed = 0;
 #endif
                 break;
@@ -4976,7 +4976,7 @@
         }
     }
 
-#if defined(CONFIG_KVM) && defined(USE_KQEMU)
+#if defined(CONFIG_KVM) && defined(CONFIG_KQEMU)
     if (kvm_allowed && kqemu_allowed) {
         fprintf(stderr,
                 "You can not enable both KVM and kqemu at the same time\n");
@@ -5055,7 +5055,7 @@
     }
 #endif
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (smp_cpus > 1)
         kqemu_allowed = 0;
 #endif
@@ -5148,7 +5148,7 @@
     if (ram_size == 0)
         ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     /* FIXME: This is a nasty hack because kqemu can't cope with dynamic
        guest ram allocation.  It needs to go away.  */
     if (kqemu_allowed) {
Index: softmmu_template.h
===================================================================
--- softmmu_template.h	(revision 7188)
+++ softmmu_template.h	(revision 7189)
@@ -76,7 +76,7 @@
     res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
 #endif
 #endif /* SHIFT > 2 */
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     env->last_io_time = cpu_get_time_fast();
 #endif
     return res;
@@ -221,7 +221,7 @@
     io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
 #endif
 #endif /* SHIFT > 2 */
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     env->last_io_time = cpu_get_time_fast();
 #endif
 }
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7188)
+++ qemu-options.hx	(revision 7189)
@@ -1304,7 +1304,7 @@
 Set the filename for the BIOS.
 ETEXI
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 DEF("kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu, \
     "-kernel-kqemu   enable KQEMU full virtualization (default is user mode only)\n")
 #endif
@@ -1313,7 +1313,7 @@
 Enable KQEMU full virtualization (default is user mode only).
 ETEXI
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 DEF("no-kqemu", 0, QEMU_OPTION_no_kqemu, \
     "-no-kqemu       disable KQEMU kernel module usage\n")
 #endif
Index: cpu-exec.c
===================================================================
--- cpu-exec.c	(revision 7188)
+++ cpu-exec.c	(revision 7189)
@@ -314,7 +314,7 @@
                 }
                 env->exception_index = -1;
             }
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
             if (kqemu_is_ok(env) && env->interrupt_request == 0 && env->exit_request == 0) {
                 int ret;
                 env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
@@ -594,7 +594,7 @@
                    jump. */
                 {
                     if (next_tb != 0 &&
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
                         (env->kqemu_enabled != 2) &&
 #endif
                         tb->page_addr[1] == -1) {
@@ -651,7 +651,7 @@
                 }
                 /* reset soft MMU for next block (it can currently
                    only be set by a memory fault) */
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
 #define MIN_CYCLE_BEFORE_SWITCH (100 * 1000)
                 if (kqemu_is_ok(env) &&
                     (cpu_get_time_fast() - env->last_io_time) >= MIN_CYCLE_BEFORE_SWITCH) {
Index: exec.c
===================================================================
--- exec.c	(revision 7188)
+++ exec.c	(revision 7189)
@@ -71,9 +71,9 @@
 #define TARGET_VIRT_ADDR_SPACE_BITS 42
 #elif defined(TARGET_PPC64)
 #define TARGET_PHYS_ADDR_SPACE_BITS 42
-#elif defined(TARGET_X86_64) && !defined(USE_KQEMU)
+#elif defined(TARGET_X86_64) && !defined(CONFIG_KQEMU)
 #define TARGET_PHYS_ADDR_SPACE_BITS 42
-#elif defined(TARGET_I386) && !defined(USE_KQEMU)
+#elif defined(TARGET_I386) && !defined(CONFIG_KQEMU)
 #define TARGET_PHYS_ADDR_SPACE_BITS 36
 #else
 /* Note: for compatibility with kqemu, we use 32 bits for x86_64 */
@@ -1760,7 +1760,7 @@
 
     memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env->kqemu_enabled) {
         kqemu_flush(env, flush_global);
     }
@@ -1809,7 +1809,7 @@
 
     tlb_flush_jmp_cache(env, addr);
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env->kqemu_enabled) {
         kqemu_flush_page(env, addr);
     }
@@ -1861,7 +1861,7 @@
     if (length == 0)
         return;
     len = length >> TARGET_PAGE_BITS;
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     /* XXX: should not depend on cpu context */
     env = first_cpu;
     if (env->kqemu_enabled) {
@@ -2328,7 +2328,7 @@
     ram_addr_t orig_size = size;
     void *subpage;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     /* XXX: should not depend on cpu context */
     env = first_cpu;
     if (env->kqemu_enabled) {
@@ -2429,7 +2429,7 @@
         kvm_uncoalesce_mmio_region(addr, size);
 }
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 /* XXX: better than nothing */
 static ram_addr_t kqemu_ram_alloc(ram_addr_t size)
 {
@@ -2449,7 +2449,7 @@
 {
     RAMBlock *new_block;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_phys_ram_base) {
         return kqemu_ram_alloc(size);
     }
@@ -2494,7 +2494,7 @@
     RAMBlock **prevp;
     RAMBlock *block;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_phys_ram_base) {
         return kqemu_phys_ram_base + addr;
     }
@@ -2532,7 +2532,7 @@
     RAMBlock *block;
     uint8_t *host = ptr;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_phys_ram_base) {
         return host - kqemu_phys_ram_base;
     }
@@ -2642,7 +2642,7 @@
 #endif
     }
     stb_p(qemu_get_ram_ptr(ram_addr), val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (cpu_single_env->kqemu_enabled &&
         (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
         kqemu_modify_page(cpu_single_env, ram_addr);
@@ -2667,7 +2667,7 @@
 #endif
     }
     stw_p(qemu_get_ram_ptr(ram_addr), val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (cpu_single_env->kqemu_enabled &&
         (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
         kqemu_modify_page(cpu_single_env, ram_addr);
@@ -2692,7 +2692,7 @@
 #endif
     }
     stl_p(qemu_get_ram_ptr(ram_addr), val);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (cpu_single_env->kqemu_enabled &&
         (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
         kqemu_modify_page(cpu_single_env, ram_addr);
@@ -2993,7 +2993,7 @@
 
     io_mem_watch = cpu_register_io_memory(0, watch_mem_read,
                                           watch_mem_write, NULL);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_phys_ram_base) {
         /* alloc dirty bits array */
         phys_ram_dirty = qemu_vmalloc(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
Index: monitor.c
===================================================================
--- monitor.c	(revision 7188)
+++ monitor.c	(revision 7189)
@@ -1368,7 +1368,7 @@
 
 static void do_info_kqemu(Monitor *mon)
 {
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     CPUState *env;
     int val;
     val = 0;
@@ -1445,7 +1445,7 @@
     kqemu_ret_int_count = 0;
     kqemu_ret_excp_count = 0;
     kqemu_ret_intr_count = 0;
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     kqemu_record_dump();
 #endif
 }
Index: exec-all.h
===================================================================
--- exec-all.h	(revision 7188)
+++ exec-all.h	(revision 7189)
@@ -352,7 +352,7 @@
 }
 #endif
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 #define KQEMU_MODIFY_PAGE_MASK (0xff & ~(VGA_DIRTY_FLAG | CODE_DIRTY_FLAG))
 
 #define MSR_QPI_COMMBASE 0xfabe0010
Index: configure
===================================================================
--- configure	(revision 7188)
+++ configure	(revision 7189)
@@ -1770,7 +1770,7 @@
     if test $kqemu = "yes" -a "$target_softmmu" = "yes"
     then
       echo "CONFIG_KQEMU=yes" >> $config_mak
-      echo "#define USE_KQEMU 1" >> $config_h
+      echo "#define CONFIG_KQEMU 1" >> $config_h
     fi
     if test "$kvm" = "yes" ; then
       echo "CONFIG_KVM=yes" >> $config_mak
@@ -1786,7 +1786,7 @@
     if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64"
     then
       echo "CONFIG_KQEMU=yes" >> $config_mak
-      echo "#define USE_KQEMU 1" >> $config_h
+      echo "#define CONFIG_KQEMU 1" >> $config_h
     fi
     if test "$kvm" = "yes" ; then
       echo "CONFIG_KVM=yes" >> $config_mak
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7188)
+++ sysemu.h	(revision 7189)
@@ -104,7 +104,7 @@
 extern int semihosting_enabled;
 extern int old_param;
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 extern int kqemu_allowed;
 #endif
 
Index: target-i386/helper.c
===================================================================
--- target-i386/helper.c	(revision 7188)
+++ target-i386/helper.c	(revision 7189)
@@ -930,7 +930,7 @@
 
 /* XXX: This value should match the one returned by CPUID
  * and in exec.c */
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
 #define PHYS_ADDR_MASK 0xfffff000LL
 #else
 # if defined(TARGET_X86_64)
@@ -1630,14 +1630,14 @@
 /* XXX: This value must match the one used in the MMU code. */ 
         if (env->cpuid_ext2_features & CPUID_EXT2_LM) {
             /* 64 bit processor */
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
             *eax = 0x00003020;	/* 48 bits virtual, 32 bits physical */
 #else
 /* XXX: The physical address space is limited to 42 bits in exec.c. */
             *eax = 0x00003028;	/* 48 bits virtual, 40 bits physical */
 #endif
         } else {
-#if defined(USE_KQEMU)
+#if defined(CONFIG_KQEMU)
             *eax = 0x00000020;	/* 32 bits physical */
 #else
             if (env->cpuid_features & CPUID_PSE36)
@@ -1689,7 +1689,7 @@
         return NULL;
     }
     cpu_reset(env);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     kqemu_init(env);
 #endif
     if (kvm_enabled())
Index: target-i386/cpu.h
===================================================================
--- target-i386/cpu.h	(revision 7188)
+++ target-i386/cpu.h	(revision 7189)
@@ -662,7 +662,7 @@
         uint64_t mask;
     } mtrr_var[8];
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     int kqemu_enabled;
     int last_io_time;
 #endif
@@ -820,7 +820,7 @@
 #define X86_DUMP_FPU  0x0001 /* dump FPU state too */
 #define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 static inline int cpu_get_time_fast(void)
 {
     int low, high;
Index: target-i386/op_helper.c
===================================================================
--- target-i386/op_helper.c	(revision 7188)
+++ target-i386/op_helper.c	(revision 7189)
@@ -1119,7 +1119,7 @@
         env->eflags |= IF_MASK;
         cpu_x86_set_cpl(env, 3);
     }
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_is_ok(env)) {
         if (env->hflags & HF_LMA_MASK)
             CC_OP = CC_OP_EFLAGS;
@@ -2478,7 +2478,7 @@
         SET_ESP(sp, sp_mask);
         EIP = offset;
     }
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_is_ok(env)) {
         env->exception_index = -1;
         cpu_loop_exit();
@@ -2764,7 +2764,7 @@
         helper_ret_protected(shift, 1, 0);
     }
     env->hflags2 &= ~HF2_NMI_MASK;
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_is_ok(env)) {
         CC_OP = CC_OP_EFLAGS;
         env->exception_index = -1;
@@ -2776,7 +2776,7 @@
 void helper_lret_protected(int shift, int addend)
 {
     helper_ret_protected(shift, 0, addend);
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_is_ok(env)) {
         env->exception_index = -1;
         cpu_loop_exit();
@@ -2854,7 +2854,7 @@
     }
     ESP = ECX;
     EIP = EDX;
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (kqemu_is_ok(env)) {
         env->exception_index = -1;
         cpu_loop_exit();
@@ -3167,7 +3167,7 @@
         val = env->kernelgsbase;
         break;
 #endif
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     case MSR_QPI_COMMBASE:
         if (env->kqemu_enabled) {
             val = kqemu_comm_base;
Index: hw/pc.c
===================================================================
--- hw/pc.c	(revision 7188)
+++ hw/pc.c	(revision 7189)
@@ -85,7 +85,7 @@
     /* Note: when using kqemu, it is more logical to return the host TSC
        because kqemu does not trap the RDTSC instruction for
        performance reasons */
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
     if (env->kqemu_enabled) {
         return cpu_get_real_ticks();
     } else
Index: cpu-all.h
===================================================================
--- cpu-all.h	(revision 7188)
+++ cpu-all.h	(revision 7189)
@@ -846,7 +846,7 @@
 #endif
 
 /* address in the RAM (different from a physical address) */
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 typedef uint32_t ram_addr_t;
 #else
 typedef unsigned long ram_addr_t;
Index: kqemu.c
===================================================================
--- kqemu.c	(revision 7188)
+++ kqemu.c	(revision 7189)
@@ -41,7 +41,7 @@
 #include "exec-all.h"
 #include "qemu-common.h"
 
-#ifdef USE_KQEMU
+#ifdef CONFIG_KQEMU
 
 #define DEBUG
 //#define PROFILE

 ------------------------------------------------------------------------
r7188 | aurel32 | 2009-04-19 04:15:50 -0500 (Sun, 19 Apr 2009) | 20 lines
Changed paths:
   M /trunk/hw/devices.h
   M /trunk/hw/r2d.c
   M /trunk/hw/sm501.c
   M /trunk/hw/usb-ohci.c

Adds SM501 usb host emulation feature.
It makes usb keyboard available for sh4/r2d system emulation.

The changes for "hw/usb-ohci.c" are as follows.
 - 'localmem_base' is introduced as OHCIState struct member.
   SM501 has a local memory, and it is used to pass and receive data with
   OHCI driver.  OHCI driver accesses it with SH4 physical memory address,
   and SM501 accesses it with SM501 local address.  'localmem_base' holds
   where the SM501 local memory is mapped into SH4 physical address space.
 - Memory access functions modified to adjust address with 'localmem_base'.
   The functions are, ohci_read_*(), ohci_put_*(), and ohci_copy_*().
 - ohci_read_hcca() and ohci_put_hcca() are introduced for more consistent
   implementation.

For other source files, it does,
 - introduces usb_ohci_init_sm501().
 - adds irq argument for SM501 initialization, to emulate USB interrupts.

Signed-off-by: Shin-ichiro KAWASAKI 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: hw/r2d.c
===================================================================
--- hw/r2d.c	(revision 7187)
+++ hw/r2d.c	(revision 7188)
@@ -222,7 +222,7 @@
     irq = r2d_fpga_init(0x04000000, sh7750_irl(s));
     pci = sh_pci_register_bus(r2d_pci_set_irq, r2d_pci_map_irq, irq, 0, 4);
 
-    sm501_init(0x10000000, SM501_VRAM_SIZE, serial_hds[2]);
+    sm501_init(0x10000000, SM501_VRAM_SIZE, irq[SM501], serial_hds[2]);
 
     /* onboard CF (True IDE mode, Master only). */
     if ((i = drive_get_index(IF_IDE, 0, 0)) != -1)
Index: hw/usb-ohci.c
===================================================================
--- hw/usb-ohci.c	(revision 7187)
+++ hw/usb-ohci.c	(revision 7188)
@@ -32,6 +32,7 @@
 #include "usb.h"
 #include "pci.h"
 #include "pxa.h"
+#include "devices.h"
 
 //#define DEBUG_OHCI
 /* Dump packet contents.  */
@@ -60,7 +61,8 @@
 
 enum ohci_type {
     OHCI_TYPE_PCI,
-    OHCI_TYPE_PXA
+    OHCI_TYPE_PXA,
+    OHCI_TYPE_SM501,
 };
 
 typedef struct {
@@ -108,6 +110,9 @@
     uint32_t hreset;
     uint32_t htest;
 
+    /* SM501 local memory offset */
+    target_phys_addr_t localmem_base;
+
     /* Active packets.  */
     uint32_t old_ctl;
     USBPacket usb_packet;
@@ -425,10 +430,13 @@
 }
 
 /* Get an array of dwords from main memory */
-static inline int get_dwords(uint32_t addr, uint32_t *buf, int num)
+static inline int get_dwords(OHCIState *ohci,
+                             uint32_t addr, uint32_t *buf, int num)
 {
     int i;
 
+    addr += ohci->localmem_base;
+
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         cpu_physical_memory_rw(addr, (uint8_t *)buf, sizeof(*buf), 0);
         *buf = le32_to_cpu(*buf);
@@ -438,10 +446,13 @@
 }
 
 /* Put an array of dwords in to main memory */
-static inline int put_dwords(uint32_t addr, uint32_t *buf, int num)
+static inline int put_dwords(OHCIState *ohci,
+                             uint32_t addr, uint32_t *buf, int num)
 {
     int i;
 
+    addr += ohci->localmem_base;
+
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
         cpu_physical_memory_rw(addr, (uint8_t *)&tmp, sizeof(tmp), 1);
@@ -451,10 +462,13 @@
 }
 
 /* Get an array of words from main memory */
-static inline int get_words(uint32_t addr, uint16_t *buf, int num)
+static inline int get_words(OHCIState *ohci,
+                            uint32_t addr, uint16_t *buf, int num)
 {
     int i;
 
+    addr += ohci->localmem_base;
+
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         cpu_physical_memory_rw(addr, (uint8_t *)buf, sizeof(*buf), 0);
         *buf = le16_to_cpu(*buf);
@@ -464,10 +478,13 @@
 }
 
 /* Put an array of words in to main memory */
-static inline int put_words(uint32_t addr, uint16_t *buf, int num)
+static inline int put_words(OHCIState *ohci,
+                            uint32_t addr, uint16_t *buf, int num)
 {
     int i;
 
+    addr += ohci->localmem_base;
+
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint16_t tmp = cpu_to_le16(*buf);
         cpu_physical_memory_rw(addr, (uint8_t *)&tmp, sizeof(tmp), 1);
@@ -476,40 +493,63 @@
     return 1;
 }
 
-static inline int ohci_read_ed(uint32_t addr, struct ohci_ed *ed)
+static inline int ohci_read_ed(OHCIState *ohci,
+                               uint32_t addr, struct ohci_ed *ed)
 {
-    return get_dwords(addr, (uint32_t *)ed, sizeof(*ed) >> 2);
+    return get_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
 }
 
-static inline int ohci_read_td(uint32_t addr, struct ohci_td *td)
+static inline int ohci_read_td(OHCIState *ohci,
+                               uint32_t addr, struct ohci_td *td)
 {
-    return get_dwords(addr, (uint32_t *)td, sizeof(*td) >> 2);
+    return get_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
-static inline int ohci_read_iso_td(uint32_t addr, struct ohci_iso_td *td)
+static inline int ohci_read_iso_td(OHCIState *ohci,
+                                   uint32_t addr, struct ohci_iso_td *td)
 {
-    return (get_dwords(addr, (uint32_t *)td, 4) &&
-            get_words(addr + 16, td->offset, 8));
+    return (get_dwords(ohci, addr, (uint32_t *)td, 4) &&
+            get_words(ohci, addr + 16, td->offset, 8));
 }
 
-static inline int ohci_put_ed(uint32_t addr, struct ohci_ed *ed)
+static inline int ohci_read_hcca(OHCIState *ohci,
+                                 uint32_t addr, struct ohci_hcca *hcca)
 {
-    return put_dwords(addr, (uint32_t *)ed, sizeof(*ed) >> 2);
+    cpu_physical_memory_rw(addr + ohci->localmem_base,
+                           (uint8_t *)hcca, sizeof(*hcca), 0);
+    return 1;
 }
 
-static inline int ohci_put_td(uint32_t addr, struct ohci_td *td)
+static inline int ohci_put_ed(OHCIState *ohci,
+                              uint32_t addr, struct ohci_ed *ed)
 {
-    return put_dwords(addr, (uint32_t *)td, sizeof(*td) >> 2);
+    return put_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
 }
 
-static inline int ohci_put_iso_td(uint32_t addr, struct ohci_iso_td *td)
+static inline int ohci_put_td(OHCIState *ohci,
+                              uint32_t addr, struct ohci_td *td)
 {
-    return (put_dwords(addr, (uint32_t *)td, 4) &&
-            put_words(addr + 16, td->offset, 8));
+    return put_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
+static inline int ohci_put_iso_td(OHCIState *ohci,
+                                  uint32_t addr, struct ohci_iso_td *td)
+{
+    return (put_dwords(ohci, addr, (uint32_t *)td, 4) &&
+            put_words(ohci, addr + 16, td->offset, 8));
+}
+
+static inline int ohci_put_hcca(OHCIState *ohci,
+                                uint32_t addr, struct ohci_hcca *hcca)
+{
+    cpu_physical_memory_rw(addr + ohci->localmem_base,
+                           (uint8_t *)hcca, sizeof(*hcca), 1);
+    return 1;
+}
+
 /* Read/Write the contents of a TD from/to main memory.  */
-static void ohci_copy_td(struct ohci_td *td, uint8_t *buf, int len, int write)
+static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
+                         uint8_t *buf, int len, int write)
 {
     uint32_t ptr;
     uint32_t n;
@@ -518,16 +558,17 @@
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr, buf, n, write);
+    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
     if (n == len)
         return;
     ptr = td->be & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr, buf, len - n, write);
+    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
 }
 
 /* Read/Write the contents of an ISO TD from/to main memory.  */
-static void ohci_copy_iso_td(uint32_t start_addr, uint32_t end_addr,
+static void ohci_copy_iso_td(OHCIState *ohci,
+                             uint32_t start_addr, uint32_t end_addr,
                              uint8_t *buf, int len, int write)
 {
     uint32_t ptr;
@@ -537,12 +578,12 @@
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr, buf, n, write);
+    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
     if (n == len)
         return;
     ptr = end_addr & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr, buf, len - n, write);
+    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
 }
 
 static void ohci_process_lists(OHCIState *ohci, int completion);
@@ -579,7 +620,7 @@
 
     addr = ed->head & OHCI_DPTR_MASK;
 
-    if (!ohci_read_iso_td(addr, &iso_td)) {
+    if (!ohci_read_iso_td(ohci, addr, &iso_td)) {
         printf("usb-ohci: ISO_TD read error at %x\n", addr);
         return 0;
     }
@@ -621,7 +662,7 @@
         i = OHCI_BM(iso_td.flags, TD_DI);
         if (i < ohci->done_count)
             ohci->done_count = i;
-        ohci_put_iso_td(addr, &iso_td);        
+        ohci_put_iso_td(ohci, addr, &iso_td);
         return 0;
     }
 
@@ -696,7 +737,7 @@
     }
 
     if (len && dir != OHCI_TD_DIR_IN) {
-        ohci_copy_iso_td(start_addr, end_addr, ohci->usb_buf, len, 0);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, 0);
     }
 
     if (completion) {
@@ -732,7 +773,7 @@
     /* Writeback */
     if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) {
         /* IN transfer succeeded */
-        ohci_copy_iso_td(start_addr, end_addr, ohci->usb_buf, ret, 1);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, 1);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
                     OHCI_CC_NOERROR);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE, ret);
@@ -788,7 +829,7 @@
         if (i < ohci->done_count)
             ohci->done_count = i;
     }
-    ohci_put_iso_td(addr, &iso_td);
+    ohci_put_iso_td(ohci, addr, &iso_td);
     return 1;
 }
 
@@ -818,7 +859,7 @@
 #endif
         return 1;
     }
-    if (!ohci_read_td(addr, &td)) {
+    if (!ohci_read_td(ohci, addr, &td)) {
         fprintf(stderr, "usb-ohci: TD read error at %x\n", addr);
         return 0;
     }
@@ -859,7 +900,7 @@
         }
 
         if (len && dir != OHCI_TD_DIR_IN && !completion) {
-            ohci_copy_td(&td, ohci->usb_buf, len, 0);
+            ohci_copy_td(ohci, &td, ohci->usb_buf, len, 0);
         }
     }
 
@@ -918,7 +959,7 @@
     }
     if (ret >= 0) {
         if (dir == OHCI_TD_DIR_IN) {
-            ohci_copy_td(&td, ohci->usb_buf, ret, 1);
+            ohci_copy_td(ohci, &td, ohci->usb_buf, ret, 1);
 #ifdef DEBUG_PACKET
             dprintf("  data:");
             for (i = 0; i < ret; i++)
@@ -987,7 +1028,7 @@
     i = OHCI_BM(td.flags, TD_DI);
     if (i < ohci->done_count)
         ohci->done_count = i;
-    ohci_put_td(addr, &td);
+    ohci_put_td(ohci, addr, &td);
     return OHCI_BM(td.flags, TD_CC) != OHCI_CC_NOERROR;
 }
 
@@ -1005,7 +1046,7 @@
         return 0;
 
     for (cur = head; cur; cur = next_ed) {
-        if (!ohci_read_ed(cur, &ed)) {
+        if (!ohci_read_ed(ohci, cur, &ed)) {
             fprintf(stderr, "usb-ohci: ED read error at %x\n", cur);
             return 0;
         }
@@ -1046,7 +1087,7 @@
             }
         }
 
-        ohci_put_ed(cur, &ed);
+        ohci_put_ed(ohci, cur, &ed);
     }
 
     return active;
@@ -1087,7 +1128,7 @@
     OHCIState *ohci = opaque;
     struct ohci_hcca hcca;
 
-    cpu_physical_memory_rw(ohci->hcca, (uint8_t *)&hcca, sizeof(hcca), 0);
+    ohci_read_hcca(ohci, ohci->hcca, &hcca);
 
     /* Process all the lists at the end of the frame */
     if (ohci->ctl & OHCI_CTL_PLE) {
@@ -1131,7 +1172,7 @@
     ohci_sof(ohci);
 
     /* Writeback HCCA */
-    cpu_physical_memory_rw(ohci->hcca, (uint8_t *)&hcca, sizeof(hcca), 1);
+    ohci_put_hcca(ohci, ohci->hcca, &hcca);
 }
 
 /* Start sending SOF tokens across the USB bus, lists are processed in
@@ -1620,7 +1661,8 @@
 };
 
 static void usb_ohci_init(OHCIState *ohci, int num_ports, int devfn,
-            qemu_irq irq, enum ohci_type type, const char *name)
+                          qemu_irq irq, enum ohci_type type,
+                          const char *name, uint32_t localmem_base)
 {
     int i;
 
@@ -1641,6 +1683,7 @@
     }
 
     ohci->mem = cpu_register_io_memory(0, ohci_readfn, ohci_writefn, ohci);
+    ohci->localmem_base = localmem_base;
     ohci->name = name;
 
     ohci->irq = irq;
@@ -1687,7 +1730,7 @@
     ohci->pci_dev.config[0x3d] = 0x01; /* interrupt pin 1 */
 
     usb_ohci_init(&ohci->state, num_ports, devfn, ohci->pci_dev.irq[0],
-                  OHCI_TYPE_PCI, ohci->pci_dev.name);
+                  OHCI_TYPE_PCI, ohci->pci_dev.name, 0);
 
     pci_register_io_region((struct PCIDevice *)ohci, 0, 256,
                            PCI_ADDRESS_SPACE_MEM, ohci_mapfunc);
@@ -1699,7 +1742,19 @@
     OHCIState *ohci = (OHCIState *)qemu_mallocz(sizeof(OHCIState));
 
     usb_ohci_init(ohci, num_ports, devfn, irq,
-                  OHCI_TYPE_PXA, "OHCI USB");
+                  OHCI_TYPE_PXA, "OHCI USB", 0);
 
     cpu_register_physical_memory(base, 0x1000, ohci->mem);
 }
+
+void usb_ohci_init_sm501(uint32_t mmio_base, uint32_t localmem_base,
+                         int num_ports, int devfn, qemu_irq irq)
+{
+    OHCIState *ohci = (OHCIState *)qemu_mallocz(sizeof(OHCIState));
+
+    usb_ohci_init(ohci, num_ports, devfn, irq,
+                  OHCI_TYPE_SM501, "OHCI USB", localmem_base);
+
+    cpu_register_physical_memory(mmio_base, 0x1000, ohci->mem);
+}
+
Index: hw/sm501.c
===================================================================
--- hw/sm501.c	(revision 7187)
+++ hw/sm501.c	(revision 7188)
@@ -1055,7 +1055,8 @@
 	sm501_draw_crt(s);
 }
 
-void sm501_init(uint32_t base, uint32_t local_mem_bytes, CharDriverState *chr)
+void sm501_init(uint32_t base, uint32_t local_mem_bytes, qemu_irq irq,
+                CharDriverState *chr)
 {
     SM501State * s;
     int sm501_system_config_index;
@@ -1089,6 +1090,10 @@
     cpu_register_physical_memory(base + MMIO_BASE_OFFSET + SM501_DC,
                                  0x1000, sm501_disp_ctrl_index);
 
+    /* bridge to usb host emulation module */
+    usb_ohci_init_sm501(base + MMIO_BASE_OFFSET + SM501_USB_HOST, base,
+                        2, -1, irq);
+
     /* bridge to serial emulation module */
     if (chr)
 	serial_mm_init(base + MMIO_BASE_OFFSET + SM501_UART0, 2,
Index: hw/devices.h
===================================================================
--- hw/devices.h	(revision 7187)
+++ hw/devices.h	(revision 7188)
@@ -74,5 +74,10 @@
 qemu_irq tc6393xb_l3v_get(struct tc6393xb_s *s);
 
 /* sm501.c */
-void sm501_init(uint32_t base, uint32_t local_mem_bytes, CharDriverState *chr);
+void sm501_init(uint32_t base, uint32_t local_mem_bytes, qemu_irq irq,
+                CharDriverState *chr);
+
+/* usb-ohci.c */
+void usb_ohci_init_sm501(uint32_t mmio_base, uint32_t localmem_base,
+                         int num_ports, int devfn, qemu_irq irq);
 #endif

 ------------------------------------------------------------------------
r7187 | aurel32 | 2009-04-19 03:52:17 -0500 (Sun, 19 Apr 2009) | 7 lines
Changed paths:
   M /trunk/linux-user/syscall.c

linux-user: Linux kernel's fchmodat and faccessat have three args (no 4th arg)

In Linux kernel, fchmodat() and faccessat() take tree args.
4th value  is only processed by libc.

Signed-off-by: Takashi YOSHII 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7186)
+++ linux-user/syscall.c	(revision 7187)
@@ -303,15 +303,15 @@
  */
 
 #ifdef TARGET_NR_faccessat
-static int sys_faccessat(int dirfd, const char *pathname, int mode, int flags)
+static int sys_faccessat(int dirfd, const char *pathname, int mode)
 {
-  return (faccessat(dirfd, pathname, mode, flags));
+  return (faccessat(dirfd, pathname, mode, 0));
 }
 #endif
 #ifdef TARGET_NR_fchmodat
-static int sys_fchmodat(int dirfd, const char *pathname, mode_t mode, int flags)
+static int sys_fchmodat(int dirfd, const char *pathname, mode_t mode)
 {
-  return (fchmodat(dirfd, pathname, mode, flags));
+  return (fchmodat(dirfd, pathname, mode, 0));
 }
 #endif
 #if defined(TARGET_NR_fchownat) && defined(USE_UID16)
@@ -425,11 +425,10 @@
  * Try direct syscalls instead
  */
 #if defined(TARGET_NR_faccessat) && defined(__NR_faccessat)
-_syscall4(int,sys_faccessat,int,dirfd,const char *,pathname,int,mode,int,flags)
+_syscall3(int,sys_faccessat,int,dirfd,const char *,pathname,int,mode)
 #endif
 #if defined(TARGET_NR_fchmodat) && defined(__NR_fchmodat)
-_syscall4(int,sys_fchmodat,int,dirfd,const char *,pathname,
-          mode_t,mode,int,flags)
+_syscall3(int,sys_fchmodat,int,dirfd,const char *,pathname, mode_t,mode)
 #endif
 #if defined(TARGET_NR_fchownat) && defined(__NR_fchownat) && defined(USE_UID16)
 _syscall5(int,sys_fchownat,int,dirfd,const char *,pathname,
@@ -4218,7 +4217,7 @@
     case TARGET_NR_faccessat:
         if (!(p = lock_user_string(arg2)))
             goto efault;
-        ret = get_errno(sys_faccessat(arg1, p, arg3, arg4));
+        ret = get_errno(sys_faccessat(arg1, p, arg3));
         unlock_user(p, arg2, 0);
         break;
 #endif
@@ -4944,7 +4943,7 @@
     case TARGET_NR_fchmodat:
         if (!(p = lock_user_string(arg2)))
             goto efault;
-        ret = get_errno(sys_fchmodat(arg1, p, arg3, arg4));
+        ret = get_errno(sys_fchmodat(arg1, p, arg3));
         unlock_user(p, arg2, 0);
         break;
 #endif

 ------------------------------------------------------------------------
r7186 | balrog | 2009-04-18 19:26:31 -0500 (Sat, 18 Apr 2009) | 5 lines
Changed paths:
   M /trunk/hw/twl92230.c

Fix indices in Menelaus save/load.

Version increase won't be helpful here.  Spotted by Sergei Steshenko / Blau
Wirbel.

 ------------------------------------------------------------------------

Index: hw/twl92230.c
===================================================================
--- hw/twl92230.c	(revision 7185)
+++ hw/twl92230.c	(revision 7186)
@@ -777,9 +777,9 @@
     qemu_put_8s(f, &s->vcore[2]);
     qemu_put_8s(f, &s->vcore[3]);
     qemu_put_8s(f, &s->vcore[4]);
-    qemu_put_8s(f, &s->dcdc[3]);
-    qemu_put_8s(f, &s->dcdc[3]);
-    qemu_put_8s(f, &s->dcdc[3]);
+    qemu_put_8s(f, &s->dcdc[0]);
+    qemu_put_8s(f, &s->dcdc[1]);
+    qemu_put_8s(f, &s->dcdc[2]);
     qemu_put_8s(f, &s->ldo[0]);
     qemu_put_8s(f, &s->ldo[1]);
     qemu_put_8s(f, &s->ldo[2]);
@@ -831,9 +831,9 @@
     qemu_get_8s(f, &s->vcore[2]);
     qemu_get_8s(f, &s->vcore[3]);
     qemu_get_8s(f, &s->vcore[4]);
-    qemu_get_8s(f, &s->dcdc[3]);
-    qemu_get_8s(f, &s->dcdc[3]);
-    qemu_get_8s(f, &s->dcdc[3]);
+    qemu_get_8s(f, &s->dcdc[0]);
+    qemu_get_8s(f, &s->dcdc[1]);
+    qemu_get_8s(f, &s->dcdc[2]);
     qemu_get_8s(f, &s->ldo[0]);
     qemu_get_8s(f, &s->ldo[1]);
     qemu_get_8s(f, &s->ldo[2]);

 ------------------------------------------------------------------------
r7185 | blueswir1 | 2009-04-18 14:25:43 -0500 (Sat, 18 Apr 2009) | 9 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/configure

kqemu: only compile kqemu.o if actually needed

kqemu.o is compiled even if kqemu support is disabled. This is useless
(kqemu.o should provide nothing that is actually used in that case) and
slightly confusing. So introduce CONFIG_KQEMU for optionally compiling
kqemu.o.

Signed-off-by: Paul Bolle 

 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7184)
+++ Makefile.target	(revision 7185)
@@ -131,8 +131,11 @@
 
 #########################################################
 # cpu emulator library
-LIBOBJS=exec.o kqemu.o translate-all.o cpu-exec.o\
+LIBOBJS=exec.o translate-all.o cpu-exec.o\
         translate.o host-utils.o
+ifdef CONFIG_KQEMU
+LIBOBJS+= kqemu.o
+endif
 # TCG code generator
 LIBOBJS+= tcg/tcg.o tcg/tcg-runtime.o
 CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH)
Index: configure
===================================================================
--- configure	(revision 7184)
+++ configure	(revision 7185)
@@ -1769,6 +1769,7 @@
     echo "#define TARGET_I386 1" >> $config_h
     if test $kqemu = "yes" -a "$target_softmmu" = "yes"
     then
+      echo "CONFIG_KQEMU=yes" >> $config_mak
       echo "#define USE_KQEMU 1" >> $config_h
     fi
     if test "$kvm" = "yes" ; then
@@ -1784,6 +1785,7 @@
     echo "#define TARGET_X86_64 1" >> $config_h
     if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64"
     then
+      echo "CONFIG_KQEMU=yes" >> $config_mak
       echo "#define USE_KQEMU 1" >> $config_h
     fi
     if test "$kvm" = "yes" ; then

 ------------------------------------------------------------------------
r7184 | aurel32 | 2009-04-18 11:16:12 -0500 (Sat, 18 Apr 2009) | 15 lines
Changed paths:
   M /trunk/linux-user/syscall.c

linux-user: fix IPCOP_sem* and implement sem*

Fix and cleanup IPCOP_sem* ipc calls handling and
implement sem* syscalls.

Riku:

1) Uglify whitespace so that diff gets smaller and easier
to review

2) use __get_user in target_to_host_sembuf

Signed-off-by: Kirill A. Shutemov 
Signed-off-by: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7183)
+++ linux-user/syscall.c	(revision 7184)
@@ -2006,7 +2006,8 @@
 
     if (!lock_user_struct(VERIFY_READ, target_sd, target_addr, 1))
         return -TARGET_EFAULT;
-    target_to_host_ipc_perm(&(host_sd->sem_perm),target_addr);
+    if (target_to_host_ipc_perm(&(host_sd->sem_perm),target_addr))
+        return -TARGET_EFAULT;
     host_sd->sem_nsems = tswapl(target_sd->sem_nsems);
     host_sd->sem_otime = tswapl(target_sd->sem_otime);
     host_sd->sem_ctime = tswapl(target_sd->sem_ctime);
@@ -2021,7 +2022,8 @@
 
     if (!lock_user_struct(VERIFY_WRITE, target_sd, target_addr, 0))
         return -TARGET_EFAULT;
-    host_to_target_ipc_perm(target_addr,&(host_sd->sem_perm));
+    if (host_to_target_ipc_perm(target_addr,&(host_sd->sem_perm)))
+        return -TARGET_EFAULT;;
     target_sd->sem_nsems = tswapl(host_sd->sem_nsems);
     target_sd->sem_otime = tswapl(host_sd->sem_otime);
     target_sd->sem_ctime = tswapl(host_sd->sem_ctime);
@@ -2029,135 +2031,214 @@
     return 0;
 }
 
+struct target_seminfo {
+    int semmap;
+    int semmni;
+    int semmns;
+    int semmnu;
+    int semmsl;
+    int semopm;
+    int semume;
+    int semusz;
+    int semvmx;
+    int semaem;
+};
+
+static inline abi_long host_to_target_seminfo(abi_ulong target_addr,
+                                              struct seminfo *host_seminfo)
+{
+    struct target_seminfo *target_seminfo;
+    if (!lock_user_struct(VERIFY_WRITE, target_seminfo, target_addr, 0))
+        return -TARGET_EFAULT;
+    __put_user(host_seminfo->semmap, &target_seminfo->semmap);
+    __put_user(host_seminfo->semmni, &target_seminfo->semmni);
+    __put_user(host_seminfo->semmns, &target_seminfo->semmns);
+    __put_user(host_seminfo->semmnu, &target_seminfo->semmnu);
+    __put_user(host_seminfo->semmsl, &target_seminfo->semmsl);
+    __put_user(host_seminfo->semopm, &target_seminfo->semopm);
+    __put_user(host_seminfo->semume, &target_seminfo->semume);
+    __put_user(host_seminfo->semusz, &target_seminfo->semusz);
+    __put_user(host_seminfo->semvmx, &target_seminfo->semvmx);
+    __put_user(host_seminfo->semaem, &target_seminfo->semaem);
+    unlock_user_struct(target_seminfo, target_addr, 1);
+    return 0;
+}
+
 union semun {
 	int val;
 	struct semid_ds *buf;
 	unsigned short *array;
+	struct seminfo *__buf;
 };
 
 union target_semun {
 	int val;
-	abi_long buf;
-	unsigned short int *array;
+	abi_ulong buf;
+	abi_ulong array;
+	abi_ulong __buf;
 };
 
-static inline abi_long target_to_host_semun(int cmd,
-                                            union semun *host_su,
-                                            abi_ulong target_addr,
-                                            struct semid_ds *ds)
+static inline abi_long target_to_host_semarray(int semid, unsigned short **host_array,
+                                               abi_ulong target_addr)
 {
-    union target_semun *target_su;
+    int nsems;
+    unsigned short *array;
+    union semun semun;
+    struct semid_ds semid_ds;
+    int i, ret;
 
-    switch( cmd ) {
-	case IPC_STAT:
-	case IPC_SET:
-           if (!lock_user_struct(VERIFY_READ, target_su, target_addr, 1))
-               return -TARGET_EFAULT;
-	   target_to_host_semid_ds(ds,target_su->buf);
-	   host_su->buf = ds;
-           unlock_user_struct(target_su, target_addr, 0);
-	   break;
-	case GETVAL:
-	case SETVAL:
-           if (!lock_user_struct(VERIFY_READ, target_su, target_addr, 1))
-               return -TARGET_EFAULT;
-	   host_su->val = tswapl(target_su->val);
-           unlock_user_struct(target_su, target_addr, 0);
-	   break;
-	case GETALL:
-	case SETALL:
-           if (!lock_user_struct(VERIFY_READ, target_su, target_addr, 1))
-               return -TARGET_EFAULT;
-	   *host_su->array = tswap16(*target_su->array);
-           unlock_user_struct(target_su, target_addr, 0);
-	   break;
-	default:
-           gemu_log("semun operation not fully supported: %d\n", (int)cmd);
+    semun.buf = &semid_ds;
+
+    ret = semctl(semid, 0, IPC_STAT, semun);
+    if (ret == -1)
+        return get_errno(ret);
+
+    nsems = semid_ds.sem_nsems;
+
+    *host_array = malloc(nsems*sizeof(unsigned short));
+    array = lock_user(VERIFY_READ, target_addr,
+                      nsems*sizeof(unsigned short), 1);
+    if (!array)
+        return -TARGET_EFAULT;
+
+    for(i=0; ibuf,ds);
-           unlock_user_struct(target_su, target_addr, 1);
-	   break;
-	case GETVAL:
-	case SETVAL:
-           if (lock_user_struct(VERIFY_WRITE, target_su, target_addr, 0))
-               return -TARGET_EFAULT;
-	   target_su->val = tswapl(host_su->val);
-           unlock_user_struct(target_su, target_addr, 1);
-	   break;
-	case GETALL:
-	case SETALL:
-           if (lock_user_struct(VERIFY_WRITE, target_su, target_addr, 0))
-               return -TARGET_EFAULT;
-	   *target_su->array = tswap16(*host_su->array);
-           unlock_user_struct(target_su, target_addr, 1);
-	   break;
-        default:
-           gemu_log("semun operation not fully supported: %d\n", (int)cmd);
+    semun.buf = &semid_ds;
+
+    ret = semctl(semid, 0, IPC_STAT, semun);
+    if (ret == -1)
+        return get_errno(ret);
+
+    nsems = semid_ds.sem_nsems;
+
+    array = lock_user(VERIFY_WRITE, target_addr,
+                      nsems*sizeof(unsigned short), 0);
+    if (!array)
+        return -TARGET_EFAULT;
+
+    for(i=0; i
 ------------------------------------------------------------------------
r7182 | aliguori | 2009-04-18 10:36:23 -0500 (Sat, 18 Apr 2009) | 8 lines
Changed paths:
   M /trunk/qemu-io.c

qemu-io: Verify read data by patterns (Kevin Wolf)

This patch adds a -P option to read and readv which allows to compare the read
data to a given pattern. This can be used to verify data written by write -P.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: qemu-io.c
===================================================================
--- qemu-io.c	(revision 7181)
+++ qemu-io.c	(revision 7182)
@@ -192,6 +192,7 @@
 " Reads a segment of the currently open file, optionally dumping it to the\n"
 " standard output stream (with -v option) for subsequent inspection.\n"
 " -p, -- use bdrv_pread to read the file\n"
+" -P, -- use a pattern to verify read data\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -v, -- dump buffer to standard output\n"
 " -q, -- quite mode, do not show I/O statistics\n"
@@ -207,8 +208,10 @@
 	char *buf;
 	int64_t offset;
 	int count, total;
+	int pattern = 0;
+	int Pflag = 0;
 
-	while ((c = getopt(argc, argv, "Cpqv")) != EOF) {
+	while ((c = getopt(argc, argv, "CpP:qv")) != EOF) {
 		switch (c) {
 		case 'C':
 			Cflag = 1;
@@ -216,6 +219,10 @@
 		case 'p':
 			pflag = 1;
 			break;
+		case 'P':
+			Pflag = 1;
+			pattern = atoi(optarg);
+			break;
 		case 'q':
 			qflag = 1;
 			break;
@@ -270,6 +277,17 @@
 		return 0;
 	}
 
+	if (Pflag) {
+		void* cmp_buf = malloc(count);
+		memset(cmp_buf, pattern, count);
+		if (memcmp(buf, cmp_buf, count)) {
+			printf("Pattern verification failed at offset %lld, "
+				"%d bytes\n",
+				(long long) offset, count);
+		}
+		free(cmp_buf);
+	}
+
 	if (qflag)
 		return 0;
 
@@ -291,7 +309,7 @@
 	.cfunc		= read_f,
 	.argmin		= 2,
 	.argmax		= -1,
-	.args		= "[-aCpqv] off len",
+	.args		= "[-aCpqv] [-P pattern ] off len",
 	.oneline	= "reads a number of bytes at a specified offset",
 	.help		= read_help,
 };
@@ -312,6 +330,7 @@
 " standard output stream (with -v option) for subsequent inspection.\n"
 " Uses multiple iovec buffers if more than one byte range is specified.\n"
 " -C, -- report statistics in a machine parsable format\n"
+" -P, -- use a pattern to verify read data\n"
 " -v, -- dump buffer to standard output\n"
 " -q, -- quite mode, do not show I/O statistics\n"
 "\n");
@@ -328,12 +347,18 @@
 	int count = 0, total;
 	int nr_iov, i;
 	QEMUIOVector qiov;
+	int pattern = 0;
+	int Pflag = 0;
 
-	while ((c = getopt(argc, argv, "Cqv")) != EOF) {
+	while ((c = getopt(argc, argv, "CP:qv")) != EOF) {
 		switch (c) {
 		case 'C':
 			Cflag = 1;
 			break;
+		case 'P':
+			Pflag = 1;
+			pattern = atoi(optarg);
+			break;
 		case 'q':
 			qflag = 1;
 			break;
@@ -406,6 +431,17 @@
 		return 0;
 	}
 
+	if (Pflag) {
+		void* cmp_buf = malloc(count);
+		memset(cmp_buf, pattern, count);
+		if (memcmp(buf, cmp_buf, count)) {
+			printf("Pattern verification failed at offset %lld, "
+				"%d bytes\n",
+				(long long) offset, count);
+		}
+		free(cmp_buf);
+	}
+
 	if (qflag)
 		return 0;
 
@@ -426,7 +462,7 @@
 	.cfunc		= readv_f,
 	.argmin		= 2,
 	.argmax		= -1,
-	.args		= "[-Cqv] off len [len..]",
+	.args		= "[-Cqv] [-P pattern ] off len [len..]",
 	.oneline	= "reads a number of bytes at a specified offset",
 	.help		= readv_help,
 };

 ------------------------------------------------------------------------
r7181 | aliguori | 2009-04-18 10:36:19 -0500 (Sat, 18 Apr 2009) | 8 lines
Changed paths:
   M /trunk/qemu-io.c

qemu-io: Fix handling of bdrv_is_allocated() return value (Kevin Wolf)

bdrv_is_allocated() returns a boolean which indicates if the offset is
allocated, not 0 on success and everything else is an error.

Signed-off-by: Kevin Wolf 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: qemu-io.c
===================================================================
--- qemu-io.c	(revision 7180)
+++ qemu-io.c	(revision 7181)
@@ -794,6 +794,7 @@
 	char s1[64];
 	int num;
 	int ret;
+	const char *retstr;
 
 	offset = cvtnum(argv[1]);
 	if (offset & 0x1ff) {
@@ -808,18 +809,15 @@
 		nb_sectors = 1;
 
 	ret = bdrv_is_allocated(bs, offset >> 9, nb_sectors, &num);
-	if (ret) {
-		printf("is_allocated: %s", strerror(ret));
-		return 0;
-	}
 
 	cvtstr(offset, s1, sizeof(s1));
 
+	retstr = ret ? "allocated" : "not allocated";
 	if (nb_sectors == 1)
-		printf("sector allocated at offset %s\n", s1);
+		printf("sector %s at offset %s\n", retstr, s1);
 	else
-		printf("%d/%d sectors allocated at offset %s\n",
-			num, nb_sectors, s1);
+		printf("%d/%d sectors %s at offset %s\n",
+			num, nb_sectors, retstr, s1);
 	return 0;
 }
 

 ------------------------------------------------------------------------
r7180 | aliguori | 2009-04-18 10:36:15 -0500 (Sat, 18 Apr 2009) | 9 lines
Changed paths:
   M /trunk/monitor.c

monitor: Update command help (Jan Kiszka)

Align some monitor help texts to the related command parameter
definitions. host_net_add is skipped intentionally, will be slightly
reworked in a separate patch later.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: monitor.c
===================================================================
--- monitor.c	(revision 7179)
+++ monitor.c	(revision 7180)
@@ -1640,7 +1640,7 @@
     { "commit", "s", do_commit,
       "device|all", "commit changes to the disk images (if -snapshot is used) or backing files" },
     { "info", "s?", do_info,
-      "subcommand", "show various information about the system state" },
+      "[subcommand]", "show various information about the system state" },
     { "q|quit", "", do_quit,
       "", "quit the emulator" },
     { "eject", "-fB", do_eject,
@@ -1654,7 +1654,7 @@
     { "log", "s", do_log,
       "item1[,...]", "activate logging of the specified items to '/tmp/qemu.log'" },
     { "savevm", "s?", do_savevm,
-      "tag|id", "save a VM snapshot. If no tag or id are provided, a new snapshot is created" },
+      "[tag|id]", "save a VM snapshot. If no tag or id are provided, a new snapshot is created" },
     { "loadvm", "s", do_loadvm,
       "tag|id", "restore a VM snapshot from its tag or id" },
     { "delvm", "s", do_delvm,
@@ -1667,7 +1667,7 @@
       "", "resume emulation", },
 #ifdef CONFIG_GDBSTUB
     { "gdbserver", "s?", do_gdbserver,
-      "[port]", "start gdbserver session (default port=1234)", },
+      "[device]", "start gdbserver on given device (default 'tcp::1234'), stop with 'none'", },
 #endif
     { "x", "/l", do_memory_dump,
       "/fmt addr", "virtual memory dump starting at 'addr'", },
@@ -1700,7 +1700,7 @@
       "index", "set which mouse device receives events" },
 #ifdef HAS_AUDIO
     { "wavcapture", "si?i?i?", do_wav_capture,
-      "path [frequency bits channels]",
+      "path [frequency [bits [channels]]]",
       "capture audio to a wave file (default frequency=44100 bits=16 channels=2)" },
 #endif
     { "stopcapture", "i", do_stop_capture,
@@ -1738,8 +1738,8 @@
     { "balloon", "i", do_balloon,
       "target", "request VM to change it's memory allocation (in MB)" },
     { "set_link", "ss", do_set_link,
-      "name [up|down]", "change the link status of a network adapter" },
-    { "acl", "sss?i?", do_acl, "  [] []\n",
+      "name up|down", "change the link status of a network adapter" },
+    { "acl", "sss?i?", do_acl, "  [ []]\n",
                                "acl show vnc.username\n"
                                "acl policy vnc.username deny\n"
                                "acl allow vnc.username fred\n"

 ------------------------------------------------------------------------
r7179 | aliguori | 2009-04-18 10:36:11 -0500 (Sat, 18 Apr 2009) | 25 lines
Changed paths:
   M /trunk/target-i386/cpu.h
   M /trunk/target-i386/helper.c

x86: Enhanced dump of segment registers (Jan Kiszka)

Parse the descriptor flags that segment registers refer to and show the
result in a more human-friendly format. The output of info registers eg.
then looks like this:

[...]
ES =007b 00000000 ffffffff 00cff300 DPL=3 DS   [-WA]
CS =0060 00000000 ffffffff 00c09b00 DPL=0 CS32 [-RA]
SS =0068 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
DS =007b 00000000 ffffffff 00cff300 DPL=3 DS   [-WA]
FS =0000 00000000 00000000 00000000
GS =0033 b7dd66c0 ffffffff b7dff3dd DPL=3 DS   [-WA]
LDT=0000 00000000 00000000 00008200 DPL=0 LDT
TR =0080 c06da700 0000206b 00008900 DPL=0 TSS32-avl
[...]

Changes in this version:
 - refactoring so that only a single helper is used for dumping the
   segment descriptor cache
 - tiny typo fixed that broke 64-bit segment type names

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: target-i386/helper.c
===================================================================
--- target-i386/helper.c	(revision 7178)
+++ target-i386/helper.c	(revision 7179)
@@ -570,6 +570,61 @@
     "SARQ",
 };
 
+static void
+cpu_x86_dump_seg_cache(CPUState *env, FILE *f,
+                       int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                       const char *name, struct SegmentCache *sc)
+{
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_CS64_MASK) {
+        cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
+                    sc->selector, sc->base, sc->limit, sc->flags);
+    } else
+#endif
+    {
+        cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
+                    (uint32_t)sc->base, sc->limit, sc->flags);
+    }
+
+    if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
+        goto done;
+
+    cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT);
+    if (sc->flags & DESC_S_MASK) {
+        if (sc->flags & DESC_CS_MASK) {
+            cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" :
+                           ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16"));
+            cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-',
+                        (sc->flags & DESC_R_MASK) ? 'R' : '-');
+        } else {
+            cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS  " : "DS16");
+            cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-',
+                        (sc->flags & DESC_W_MASK) ? 'W' : '-');
+        }
+        cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-');
+    } else {
+        static const char *sys_type_name[2][16] = {
+            { /* 32 bit mode */
+                "Reserved", "TSS16-avl", "LDT", "TSS16-busy",
+                "CallGate16", "TaskGate", "IntGate16", "TrapGate16",
+                "Reserved", "TSS32-avl", "Reserved", "TSS32-busy",
+                "CallGate32", "Reserved", "IntGate32", "TrapGate32"
+            },
+            { /* 64 bit mode */
+                "", "Reserved", "LDT", "Reserved", "Reserved",
+                "Reserved", "Reserved", "Reserved", "Reserved",
+                "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64",
+                "Reserved", "IntGate64", "TrapGate64"
+            }
+        };
+        cpu_fprintf(f, sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0]
+                                    [(sc->flags & DESC_TYPE_MASK)
+                                     >> DESC_TYPE_SHIFT]);
+    }
+done:
+    cpu_fprintf(f, "\n");
+}
+
 void cpu_dump_state(CPUState *env, FILE *f,
                     int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
                     int flags)
@@ -648,27 +703,15 @@
                     env->halted);
     }
 
+    for(i = 0; i < 6; i++) {
+        cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i],
+                               &env->segs[i]);
+    }
+    cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt);
+    cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr);
+
 #ifdef TARGET_X86_64
     if (env->hflags & HF_LMA_MASK) {
-        for(i = 0; i < 6; i++) {
-            SegmentCache *sc = &env->segs[i];
-            cpu_fprintf(f, "%s =%04x %016" PRIx64 " %08x %08x\n",
-                        seg_name[i],
-                        sc->selector,
-                        sc->base,
-                        sc->limit,
-                        sc->flags);
-        }
-        cpu_fprintf(f, "LDT=%04x %016" PRIx64 " %08x %08x\n",
-                    env->ldt.selector,
-                    env->ldt.base,
-                    env->ldt.limit,
-                    env->ldt.flags);
-        cpu_fprintf(f, "TR =%04x %016" PRIx64 " %08x %08x\n",
-                    env->tr.selector,
-                    env->tr.base,
-                    env->tr.limit,
-                    env->tr.flags);
         cpu_fprintf(f, "GDT=     %016" PRIx64 " %08x\n",
                     env->gdt.base, env->gdt.limit);
         cpu_fprintf(f, "IDT=     %016" PRIx64 " %08x\n",
@@ -685,25 +728,6 @@
     } else
 #endif
     {
-        for(i = 0; i < 6; i++) {
-            SegmentCache *sc = &env->segs[i];
-            cpu_fprintf(f, "%s =%04x %08x %08x %08x\n",
-                        seg_name[i],
-                        sc->selector,
-                        (uint32_t)sc->base,
-                        sc->limit,
-                        sc->flags);
-        }
-        cpu_fprintf(f, "LDT=%04x %08x %08x %08x\n",
-                    env->ldt.selector,
-                    (uint32_t)env->ldt.base,
-                    env->ldt.limit,
-                    env->ldt.flags);
-        cpu_fprintf(f, "TR =%04x %08x %08x %08x\n",
-                    env->tr.selector,
-                    (uint32_t)env->tr.base,
-                    env->tr.limit,
-                    env->tr.flags);
         cpu_fprintf(f, "GDT=     %08x %08x\n",
                     (uint32_t)env->gdt.base, env->gdt.limit);
         cpu_fprintf(f, "IDT=     %08x %08x\n",
Index: target-i386/cpu.h
===================================================================
--- target-i386/cpu.h	(revision 7178)
+++ target-i386/cpu.h	(revision 7179)
@@ -82,9 +82,10 @@
 #define DESC_AVL_MASK   (1 << 20)
 #define DESC_P_MASK     (1 << 15)
 #define DESC_DPL_SHIFT  13
-#define DESC_DPL_MASK   (1 << DESC_DPL_SHIFT)
+#define DESC_DPL_MASK   (3 << DESC_DPL_SHIFT)
 #define DESC_S_MASK     (1 << 12)
 #define DESC_TYPE_SHIFT 8
+#define DESC_TYPE_MASK  (15 << DESC_TYPE_SHIFT)
 #define DESC_A_MASK     (1 << 8)
 
 #define DESC_CS_MASK    (1 << 11) /* 1=code segment 0=data segment */

 ------------------------------------------------------------------------
r7178 | aliguori | 2009-04-18 10:36:06 -0500 (Sat, 18 Apr 2009) | 12 lines
Changed paths:
   M /trunk/qemu-io.c

a couple of qemu-io fixes (Christoph Hellwig)

Fix two stupid bugs that I forgot to push out:


 - qiov->size already is in bytes, no need to shift it
 - actually use the supplied patter in the writev command


Signed-off-by: Christoph Hellwig 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: qemu-io.c
===================================================================
--- qemu-io.c	(revision 7177)
+++ qemu-io.c	(revision 7178)
@@ -172,7 +172,7 @@
 	while (async_ret == NOT_DONE)
 		qemu_aio_wait();
 
-	*total = qiov->size >> 9;
+	*total = qiov->size;
 	return async_ret < 0 ? async_ret : 1;
 }
 
@@ -634,7 +634,7 @@
 
 	nr_iov = argc - optind;
 	qemu_iovec_init(&qiov, nr_iov);
-	buf = p = qemu_io_alloc(count, 0xab);
+	buf = p = qemu_io_alloc(count, pattern);
 	for (i = 0; i < nr_iov; i++) {
 	        size_t len;
 

 ------------------------------------------------------------------------
r7177 | aliguori | 2009-04-18 10:36:02 -0500 (Sat, 18 Apr 2009) | 7 lines
Changed paths:
   M /trunk/Makefile
   M /trunk/configure

buildsytem: consistently use install (Christoph Egger)

attached patch makes qemu use install consistently.

Signed-off-by: Christoph Egger 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: Makefile
===================================================================
--- Makefile	(revision 7176)
+++ Makefile	(revision 7177)
@@ -255,30 +255,30 @@
 endif
 
 install-doc: $(DOCS)
-	mkdir -p "$(DESTDIR)$(docdir)"
-	$(INSTALL) -m 644 qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
+	$(INSTALL_DIR) "$(DESTDIR)$(docdir)"
+	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
 ifndef CONFIG_WIN32
-	mkdir -p "$(DESTDIR)$(mandir)/man1"
-	$(INSTALL) -m 644 qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
-	mkdir -p "$(DESTDIR)$(mandir)/man8"
-	$(INSTALL) -m 644 qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
+	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
+	$(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
+	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
+	$(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
 endif
 
 install: all $(if $(BUILD_DOCS),install-doc)
-	mkdir -p "$(DESTDIR)$(bindir)"
+	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
 ifneq ($(TOOLS),)
-	$(INSTALL) -m 755 $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
+	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
 ifneq ($(BLOBS),)
-	mkdir -p "$(DESTDIR)$(datadir)"
+	$(INSTALL_DIR) "$(DESTDIR)$(datadir)"
 	set -e; for x in $(BLOBS); do \
-		$(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \
+		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \
 	done
 endif
 ifndef CONFIG_WIN32
-	mkdir -p "$(DESTDIR)$(datadir)/keymaps"
+	$(INSTALL_DIR) "$(DESTDIR)$(datadir)/keymaps"
 	set -e; for x in $(KEYMAPS); do \
-		$(INSTALL) -m 644 $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \
+		$(INSTALL_DATA) $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \
 	done
 endif
 	for d in $(TARGET_DIRS); do \
Index: configure
===================================================================
--- configure	(revision 7176)
+++ configure	(revision 7177)
@@ -1339,6 +1339,9 @@
 echo "#define CONFIG_QEMU_SHAREDIR \"$prefix$datasuffix\"" >> $config_h
 echo "MAKE=$make" >> $config_mak
 echo "INSTALL=$install" >> $config_mak
+echo "INSTALL_DIR=$install -d -m0755 -p" >> $config_mak
+echo "INSTALL_DATA=$install -m0644 -p" >> $config_mak
+echo "INSTALL_PROG=$install -m0755 -p" >> $config_mak
 echo "CC=$cc" >> $config_mak
 echo "HOST_CC=$host_cc" >> $config_mak
 echo "AR=$ar" >> $config_mak

 ------------------------------------------------------------------------
r7176 | blueswir1 | 2009-04-18 02:32:41 -0500 (Sat, 18 Apr 2009) | 1 line
Changed paths:
   M /trunk/slirp/tcp_timer.c
   M /trunk/slirp/udp.c

Use ANSI prototypes to please sparse
 ------------------------------------------------------------------------

Index: slirp/tcp_timer.c
===================================================================
--- slirp/tcp_timer.c	(revision 7175)
+++ slirp/tcp_timer.c	(revision 7176)
@@ -44,7 +44,7 @@
  * Fast timeout routine for processing delayed acks
  */
 void
-tcp_fasttimo()
+tcp_fasttimo(void)
 {
 	register struct socket *so;
 	register struct tcpcb *tp;
@@ -69,7 +69,7 @@
  * causes finite state machine actions if timers expire.
  */
 void
-tcp_slowtimo()
+tcp_slowtimo(void)
 {
 	register struct socket *ip, *ipnxt;
 	register struct tcpcb *tp;
@@ -113,8 +113,7 @@
  * Cancel all timers for TCP tp.
  */
 void
-tcp_canceltimers(tp)
-	struct tcpcb *tp;
+tcp_canceltimers(struct tcpcb *tp)
 {
 	register int i;
 
Index: slirp/udp.c
===================================================================
--- slirp/udp.c	(revision 7175)
+++ slirp/udp.c	(revision 7176)
@@ -63,7 +63,7 @@
 struct	socket *udp_last_so = &udb;
 
 void
-udp_init()
+udp_init(void)
 {
 	udb.so_next = udb.so_prev = &udb;
 }
@@ -72,9 +72,7 @@
  * ip->ip_len length data (IPDU)
  */
 void
-udp_input(m, iphlen)
-	register struct mbuf *m;
-	int iphlen;
+udp_input(register struct mbuf *m, int iphlen)
 {
 	register struct ip *ip;
 	register struct udphdr *uh;
@@ -330,8 +328,7 @@
 }
 
 int
-udp_attach(so)
-     struct socket *so;
+udp_attach(struct socket *so)
 {
   struct sockaddr_in addr;
 
@@ -363,8 +360,7 @@
 }
 
 void
-udp_detach(so)
-	struct socket *so;
+udp_detach(struct socket *so)
 {
 	closesocket(so->s);
 	/* if (so->so_m) m_free(so->so_m);    done by sofree */
@@ -631,11 +627,7 @@
 }
 
 struct socket *
-udp_listen(port, laddr, lport, flags)
-	u_int port;
-	u_int32_t laddr;
-	u_int lport;
-	int flags;
+udp_listen(u_int port, u_int32_t laddr, u_int lport, int flags)
 {
 	struct sockaddr_in addr;
 	struct socket *so;

 ------------------------------------------------------------------------
r7175 | blueswir1 | 2009-04-18 02:29:59 -0500 (Sat, 18 Apr 2009) | 1 line
Changed paths:
   M /trunk/gdbstub.c

Add 'static' to avoid a sparse warning
 ------------------------------------------------------------------------

Index: gdbstub.c
===================================================================
--- gdbstub.c	(revision 7174)
+++ gdbstub.c	(revision 7175)
@@ -334,7 +334,7 @@
 
 static gdb_syscall_complete_cb gdb_current_syscall_cb;
 
-enum {
+static enum {
     GDB_SYS_UNKNOWN,
     GDB_SYS_ENABLED,
     GDB_SYS_DISABLED,

 ------------------------------------------------------------------------
r7174 | blueswir1 | 2009-04-18 02:29:30 -0500 (Sat, 18 Apr 2009) | 1 line
Changed paths:
   M /trunk/slirp/libslirp.h
   M /trunk/sysemu.h

Move bootp_filename to avoid a sparse warning
 ------------------------------------------------------------------------

Index: slirp/libslirp.h
===================================================================
--- slirp/libslirp.h	(revision 7173)
+++ slirp/libslirp.h	(revision 7174)
@@ -25,6 +25,7 @@
 
 extern const char *tftp_prefix;
 extern char slirp_hostname[33];
+extern const char *bootp_filename;
 
 void slirp_stats(void);
 void slirp_socket_recv(int addr_low_byte, int guest_port, const uint8_t *buf,
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7173)
+++ sysemu.h	(revision 7174)
@@ -103,7 +103,6 @@
 extern int no_quit;
 extern int semihosting_enabled;
 extern int old_param;
-extern const char *bootp_filename;
 
 #ifdef USE_KQEMU
 extern int kqemu_allowed;

 ------------------------------------------------------------------------
r7169 | aliguori | 2009-04-17 16:01:11 -0500 (Fri, 17 Apr 2009) | 10 lines
Changed paths:
   A /trunk/pc-bios/bios-pq/0013_fix-non-acpi-timer-interrupt-routing.patch
   M /trunk/pc-bios/bios-pq/series
   M /trunk/pc-bios/bios.bin

Fix non-ACPI Timer Interrupt Routing (Beth Kon)

Replicate ACPI irq0->inti2 override in mp table for non-acpi case.

v1 -> v2 adds comment suggested by Ryan.

Signed-off-by: Beth Kon 
Signed-off-by: Anthony Liguori 


 ------------------------------------------------------------------------

Index: pc-bios/bios.bin
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: pc-bios/bios-pq/0013_fix-non-acpi-timer-interrupt-routing.patch
===================================================================
--- pc-bios/bios-pq/0013_fix-non-acpi-timer-interrupt-routing.patch	(revision 0)
+++ pc-bios/bios-pq/0013_fix-non-acpi-timer-interrupt-routing.patch	(revision 7169)
@@ -0,0 +1,38 @@
+Fix non-ACPI Timer Interrupt Routing (Beth Kon)
+
+Replicate ACPI irq0->inti2 override in mp table for non-acpi case.
+
+v1 -> v2 adds comment suggested by Ryan.
+
+Signed-off-by: Beth Kon 
+Signed-off-by: Anthony Liguori 
+
+diff --git a/bios/rombios32.c b/bios/rombios32.c
+index 7be4216..dc7b5f3 100644
+--- a/bios/rombios32.c
++++ b/bios/rombios32.c
+@@ -1168,6 +1168,12 @@ static void mptable_init(void)
+ 
+     /* irqs */
+     for(i = 0; i < 16; i++) {
++#ifdef BX_QEMU
++        /* One entry per ioapic input. Input 2 is covered by 
++           irq0->inti2 override (i == 0). irq 2 is unused */
++        if (i == 2)
++            continue;
++#endif        
+         putb(&q, 3); /* entry type = I/O interrupt */
+         putb(&q, 0); /* interrupt type = vectored interrupt */
+         putb(&q, 0); /* flags: po=0, el=0 */
+@@ -1175,7 +1181,11 @@ static void mptable_init(void)
+         putb(&q, 0); /* source bus ID = ISA */
+         putb(&q, i); /* source bus IRQ */
+         putb(&q, ioapic_id); /* dest I/O APIC ID */
++#ifdef BX_QEMU
++        putb(&q, i == 0 ? 2 : i); /* dest I/O APIC interrupt in */
++#else
+         putb(&q, i); /* dest I/O APIC interrupt in */
++#endif        
+     }
+     /* patch length */
+     len = q - mp_config_table;
Index: pc-bios/bios-pq/series
===================================================================
--- pc-bios/bios-pq/series	(revision 7168)
+++ pc-bios/bios-pq/series	(revision 7169)
@@ -10,3 +10,4 @@
 0010_bios-mark-the-acpi-sci-interrupt-as-connected-to-irq-9.patch
 0011_read-additional-acpi-tables-from-a-vm.patch
 0012-load-smbios-entries-and-files-from-qemu.patch
+0013_fix-non-acpi-timer-interrupt-routing.patch

 ------------------------------------------------------------------------
r7168 | aliguori | 2009-04-17 15:50:58 -0500 (Fri, 17 Apr 2009) | 27 lines
Changed paths:
   M /trunk/hw/hpet.c

hpet: Fix emulation of HPET_TN_SETVAL (Jan Kiszka)

While Intel's spec is not that clear here, latest changes to Linux' HPET
code (commit c23e253e67c9d8a91a0ffa33c1f571a17f0a2403, "x86: hpet: stop
HPET_COUNTER when programming periodic mode") strongly suggest that
HPET_TN_SETVAL rather means: Set _both_ the comparator value and
register.

With this patch applied, I'm again able to boot 2.6.30-rc kernels as
they no longer panic like this (which was due to the comparator
register remaining 0):

ENABLING IO-APIC IRQs
..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
..MP-BIOS bug: 8254 timer not connected to IO-APIC
...trying to set up timer (IRQ0) through the 8259A ...
..... (found apic 0 pin 2) ...
....... failed.
...trying to set up timer as Virtual Wire IRQ...
..... failed.
...trying to set up timer as ExtINT IRQ...
..... failed :(.
Kernel panic - not syncing: IO-APIC + timer doesn't work! [...]

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/hpet.c
===================================================================
--- hw/hpet.c	(revision 7167)
+++ hw/hpet.c	(revision 7168)
@@ -411,7 +411,7 @@
                            (timer->config & HPET_TN_SETVAL))
                     timer->cmp = (timer->cmp & 0xffffffff00000000ULL)
                                   | new_val;
-                else {
+                if (timer_is_periodic(timer)) {
                     /*
                      * FIXME: Clamp period to reasonable min value?
                      * Clamp period to reasonable max value

 ------------------------------------------------------------------------
r7167 | aliguori | 2009-04-17 15:50:54 -0500 (Fri, 17 Apr 2009) | 12 lines
Changed paths:
   M /trunk/target-i386/kvm.c

kvm: Fix cpuid initialization (Jan Kiszka)

Fix (more or less) spurious guest boot failures due to corrupted cpuid
states. The reason was insufficient initialization of cpuid entries
before passing them to the kernel.

At this chance also fix improper entry pointer progression and simplify
the code a bit.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: target-i386/kvm.c
===================================================================
--- target-i386/kvm.c	(revision 7166)
+++ target-i386/kvm.c	(revision 7167)
@@ -41,12 +41,11 @@
         struct kvm_cpuid_entry2 entries[100];
     } __attribute__((packed)) cpuid_data;
     uint32_t limit, i, j, cpuid_i;
-    uint32_t eax, ebx, ecx, edx;
+    uint32_t unused;
 
     cpuid_i = 0;
 
-    cpu_x86_cpuid(env, 0, 0, &eax, &ebx, &ecx, &edx);
-    limit = eax;
+    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0; i <= limit; i++) {
         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
@@ -56,26 +55,17 @@
             /* Keep reading function 2 till all the input is received */
             int times;
 
-            cpu_x86_cpuid(env, i, 0, &eax, &ebx, &ecx, &edx);
-            times = eax & 0xff;
-
             c->function = i;
-            c->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
-            c->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-            c->eax = eax;
-            c->ebx = ebx;
-            c->ecx = ecx;
-            c->edx = edx;
+            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
+                       KVM_CPUID_FLAG_STATE_READ_NEXT;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            times = c->eax & 0xff;
 
             for (j = 1; j < times; ++j) {
-                cpu_x86_cpuid(env, i, 0, &eax, &ebx, &ecx, &edx);
+                c = &cpuid_data.entries[cpuid_i++];
                 c->function = i;
-                c->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
-                c->eax = eax;
-                c->ebx = ebx;
-                c->ecx = ecx;
-                c->edx = edx;
-                c = &cpuid_data.entries[++cpuid_i];
+                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
+                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
             }
             break;
         }
@@ -83,46 +73,36 @@
         case 0xb:
         case 0xd:
             for (j = 0; ; j++) {
-                cpu_x86_cpuid(env, i, j, &eax, &ebx, &ecx, &edx);
                 c->function = i;
                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                 c->index = j;
-                c->eax = eax;
-                c->ebx = ebx;
-                c->ecx = ecx;
-                c->edx = edx;
-                c = &cpuid_data.entries[++cpuid_i];
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
 
-                if (i == 4 && eax == 0)
+                if (i == 4 && c->eax == 0)
                     break;
-                if (i == 0xb && !(ecx & 0xff00))
+                if (i == 0xb && !(c->ecx & 0xff00))
                     break;
-                if (i == 0xd && eax == 0)
+                if (i == 0xd && c->eax == 0)
                     break;
+
+                c = &cpuid_data.entries[cpuid_i++];
             }
             break;
         default:
-            cpu_x86_cpuid(env, i, 0, &eax, &ebx, &ecx, &edx);
             c->function = i;
-            c->eax = eax;
-            c->ebx = ebx;
-            c->ecx = ecx;
-            c->edx = edx;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
             break;
         }
     }
-    cpu_x86_cpuid(env, 0x80000000, 0, &eax, &ebx, &ecx, &edx);
-    limit = eax;
+    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0x80000000; i <= limit; i++) {
         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
 
-        cpu_x86_cpuid(env, i, 0, &eax, &ebx, &ecx, &edx);
         c->function = i;
-        c->eax = eax;
-        c->ebx = ebx;
-        c->ecx = ecx;
-        c->edx = edx;
+        c->flags = 0;
+        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
     }
 
     cpuid_data.cpuid.nent = cpuid_i;

 ------------------------------------------------------------------------
r7165 | aliguori | 2009-04-17 15:44:06 -0500 (Fri, 17 Apr 2009) | 20 lines
Changed paths:
   M /trunk/block-qcow2.c

qcow2 corruption: Fix alloc_cluster_link_l2 (Kevin Wolf)

This patch fixes a qcow2 corruption bug introduced in SVN Rev 5861. L2 tables
are big endian, so entries must be converted before being passed to functions.

This bug is easy to trigger. The following script will create and destroy a
qcow2 image (the header is gone after three loop iterations):

    #!/bin/bash
    qemu-img create -f qcow2 test.qcow 1M
    for i in $(seq 1 10); do
    qemu-system-x86_64 -hda test.qcow -monitor stdio > /dev/null 2>&1 <
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 7164)
+++ block-qcow2.c	(revision 7165)
@@ -1007,7 +1007,7 @@
         goto err;
 
     for (i = 0; i < j; i++)
-        free_any_clusters(bs, old_cluster[i], 1);
+        free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
 
     ret = 0;
 err:

 ------------------------------------------------------------------------
r7164 | blueswir1 | 2009-04-17 15:01:12 -0500 (Fri, 17 Apr 2009) | 1 line
Changed paths:
   M /trunk/Makefile

Make the sed script also work with OpenBSD and OpenSolaris seds
 ------------------------------------------------------------------------

Index: Makefile
===================================================================
--- Makefile	(revision 7163)
+++ Makefile	(revision 7164)
@@ -45,7 +45,7 @@
 config-host.mak: configure
 ifneq ($(wildcard config-host.mak),)
 	@echo $@ is out-of-date, running configure
-	@sed -n "/.*Configured with/{s/[^:]*: //p;q}" $@ | sh
+	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 endif
 
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))

 ------------------------------------------------------------------------
r7163 | aliguori | 2009-04-17 13:59:56 -0500 (Fri, 17 Apr 2009) | 33 lines
Changed paths:
   M /trunk/Makefile.target
   M /trunk/hw/pc.c
   A /trunk/hw/smbios.c
   A /trunk/hw/smbios.h
   A /trunk/pc-bios/bios-pq/0012-load-smbios-entries-and-files-from-qemu.patch
   M /trunk/pc-bios/bios-pq/series
   M /trunk/pc-bios/bios.bin
   M /trunk/qemu-options.hx
   M /trunk/vl.c

qemu: Add support for SMBIOS command line otions (Alex Williamson)

Create a new -smbios option (x86-only) to allow binary SMBIOS entries
to be passed through to the BIOS or modify the default values of
individual fields of type 0 and 1 entries on the command line.

Binary SMBIOS entries can be generated as follows:

dmidecode -t 1 -u | grep $'^\t\t[^"]' | xargs -n1 | \
        perl -lne 'printf "%c", hex($_)' > smbios_type_1.bin

These can then be passed to the BIOS using this switch:

 -smbios file=smbios_type_1.bin

Command line generation supports the following syntax:

 -smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d]
 -smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str]
              [,uuid=$(uuidgen)][,sku=str][,family=str]

For instance, to add a serial number to the type 1 table:

 -smbios type=1,serial=0123456789

Interface is extensible to support more fields/tables as needed.

aliguori: remove texi formatting from help output

Signed-off-by: Alex Williamson 
Signed-off-by: Anthony Liguori 


 ------------------------------------------------------------------------

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 7162)
+++ Makefile.target	(revision 7163)
@@ -576,7 +576,7 @@
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o ioapic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o hpet.o
-OBJS += device-hotplug.o pci-hotplug.o
+OBJS += device-hotplug.o pci-hotplug.o smbios.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
Index: vl.c
===================================================================
--- vl.c	(revision 7162)
+++ vl.c	(revision 7163)
@@ -138,6 +138,7 @@
 #include "hw/isa.h"
 #include "hw/baum.h"
 #include "hw/bt.h"
+#include "hw/smbios.h"
 #include "bt-host.h"
 #include "net.h"
 #include "monitor.h"
@@ -4214,6 +4215,10 @@
     if(ret != 16)
         return -1;
 
+#ifdef TARGET_I386
+    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
+#endif
+
     return 0;
 }
 
@@ -4797,6 +4802,12 @@
                     exit(1);
                 }
                 break;
+            case QEMU_OPTION_smbios:
+                if(smbios_entry_add(optarg) < 0) {
+                    fprintf(stderr, "Wrong smbios provided\n");
+                    exit(1);
+                }
+                break;
 #endif
 #ifdef USE_KQEMU
             case QEMU_OPTION_no_kqemu:
Index: qemu-options.hx
===================================================================
--- qemu-options.hx	(revision 7162)
+++ qemu-options.hx	(revision 7163)
@@ -683,6 +683,27 @@
 ETEXI
 
 #ifdef TARGET_I386
+DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
+    "-smbios file=binary\n"
+    "                Load SMBIOS entry from binary file\n"
+    "-smbios type=0[,vendor=str][,version=str][,date=str][,release=%%d.%%d]\n"
+    "                Specify SMBIOS type 0 fields\n"
+    "-smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str]\n"
+    "              [,uuid=uuid][,sku=str][,family=str]\n"
+    "                Specify SMBIOS type 1 fields\n")
+#endif
+STEXI
+@item -smbios file=@var{binary}
+Load SMBIOS entry from binary file.
+
+@item -smbios type=0[,vendor=@var{str}][,version=@var{str}][,date=@var{str}][,release=@var{%d.%d}]
+Specify SMBIOS type 0 fields
+
+@item -smbios type=1[,manufacturer=@var{str}][,product=@var{str}][,version=@var{str}][,serial=@var{str}][,uuid=@var{uuid}][,sku=@var{str}][,family=@var{str}]
+Specify SMBIOS type 1 fields
+ETEXI
+
+#ifdef TARGET_I386
 DEFHEADING()
 #endif
 STEXI
Index: hw/smbios.c
===================================================================
--- hw/smbios.c	(revision 0)
+++ hw/smbios.c	(revision 7163)
@@ -0,0 +1,224 @@
+/*
+ * SMBIOS Support
+ *
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors:
+ *  Alex Williamson 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "sysemu.h"
+#include "smbios.h"
+
+/*
+ * Structures shared with the BIOS
+ */
+struct smbios_header {
+    uint16_t length;
+    uint8_t type;
+} __attribute__((__packed__));
+
+struct smbios_field {
+    struct smbios_header header;
+    uint8_t type;
+    uint16_t offset;
+    uint8_t data[];
+} __attribute__((__packed__));
+
+struct smbios_table {
+    struct smbios_header header;
+    uint8_t data[];
+} __attribute__((__packed__));
+
+#define SMBIOS_FIELD_ENTRY 0
+#define SMBIOS_TABLE_ENTRY 1
+
+
+static uint8_t *smbios_entries;
+static size_t smbios_entries_len;
+
+uint8_t *smbios_get_table(size_t *length)
+{
+    *length = smbios_entries_len;
+    return smbios_entries;
+}
+
+/*
+ * To avoid unresolvable overlaps in data, don't allow both
+ * tables and fields for the same smbios type.
+ */
+static void smbios_check_collision(int type, int entry)
+{
+    uint16_t *num_entries = (uint16_t *)smbios_entries;
+    struct smbios_header *header;
+    char *p;
+    int i;
+
+    if (!num_entries)
+        return;
+
+    p = (char *)(num_entries + 1);
+
+    for (i = 0; i < *num_entries; i++) {
+        header = (struct smbios_header *)p;
+        if (entry == SMBIOS_TABLE_ENTRY && header->type == SMBIOS_FIELD_ENTRY) {
+            struct smbios_field *field = (void *)header;
+            if (type == field->type) {
+                fprintf(stderr, "SMBIOS type %d field already defined, "
+                                "cannot add table\n", type);
+                exit(1);
+            }
+        } else if (entry == SMBIOS_FIELD_ENTRY &&
+                   header->type == SMBIOS_TABLE_ENTRY) {
+            struct smbios_structure_header *table = (void *)(header + 1);
+            if (type == table->type) {
+                fprintf(stderr, "SMBIOS type %d table already defined, "
+                                "cannot add field\n", type);
+                exit(1);
+            }
+        }
+        p += le16_to_cpu(header->length);
+    }
+}
+
+void smbios_add_field(int type, int offset, int len, void *data)
+{
+    struct smbios_field *field;
+
+    smbios_check_collision(type, SMBIOS_FIELD_ENTRY);
+
+    if (!smbios_entries) {
+        smbios_entries_len = sizeof(uint16_t);
+        smbios_entries = qemu_mallocz(smbios_entries_len);
+    }
+    smbios_entries = qemu_realloc(smbios_entries, smbios_entries_len +
+                                                  sizeof(*field) + len);
+    field = (struct smbios_field *)(smbios_entries + smbios_entries_len);
+    field->header.type = SMBIOS_FIELD_ENTRY;
+    field->header.length = cpu_to_le16(sizeof(*field) + len);
+
+    field->type = type;
+    field->offset = cpu_to_le16(offset);
+    memcpy(field->data, data, len);
+
+    smbios_entries_len += sizeof(*field) + len;
+    (*(uint16_t *)smbios_entries) =
+            cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
+}
+
+static void smbios_build_type_0_fields(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "vendor", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0, vendor_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "version", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0, bios_version_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "date", t))
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     bios_release_date_str),
+                                     strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "release", t)) {
+        int major, minor;
+        sscanf(buf, "%d.%d", &major, &minor);
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     system_bios_major_release), 1, &major);
+        smbios_add_field(0, offsetof(struct smbios_type_0,
+                                     system_bios_minor_release), 1, &minor);
+    }
+}
+
+static void smbios_build_type_1_fields(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "manufacturer", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, manufacturer_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "product", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, product_name_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "version", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, version_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "serial", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, serial_number_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "uuid", t)) {
+        if (qemu_uuid_parse(buf, qemu_uuid) != 0) {
+            fprintf(stderr, "Invalid SMBIOS UUID string\n");
+            exit(1);
+        }
+    }
+    if (get_param_value(buf, sizeof(buf), "sku", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, sku_number_str),
+                         strlen(buf) + 1, buf);
+    if (get_param_value(buf, sizeof(buf), "family", t))
+        smbios_add_field(1, offsetof(struct smbios_type_1, family_str),
+                         strlen(buf) + 1, buf);
+}
+
+int smbios_entry_add(const char *t)
+{
+    char buf[1024];
+
+    if (get_param_value(buf, sizeof(buf), "file", t)) {
+        struct smbios_structure_header *header;
+        struct smbios_table *table;
+        int size = get_image_size(buf);
+
+        if (size < sizeof(struct smbios_structure_header)) {
+            fprintf(stderr, "Cannot read smbios file %s", buf);
+            exit(1);
+        }
+
+        if (!smbios_entries) {
+            smbios_entries_len = sizeof(uint16_t);
+            smbios_entries = qemu_mallocz(smbios_entries_len);
+        }
+
+        smbios_entries = qemu_realloc(smbios_entries, smbios_entries_len +
+                                                      sizeof(*table) + size);
+        table = (struct smbios_table *)(smbios_entries + smbios_entries_len);
+        table->header.type = SMBIOS_TABLE_ENTRY;
+        table->header.length = cpu_to_le16(sizeof(*table) + size);
+
+        if (load_image(buf, table->data) != size) {
+            fprintf(stderr, "Failed to load smbios file %s", buf);
+            exit(1);
+        }
+
+        header = (struct smbios_structure_header *)(table->data);
+        smbios_check_collision(header->type, SMBIOS_TABLE_ENTRY);
+
+        smbios_entries_len += sizeof(*table) + size;
+        (*(uint16_t *)smbios_entries) =
+                cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
+        return 0;
+    }
+
+    if (get_param_value(buf, sizeof(buf), "type", t)) {
+        unsigned long type = strtoul(buf, NULL, 0);
+        switch (type) {
+        case 0:
+            smbios_build_type_0_fields(t);
+            return 0;
+        case 1:
+            smbios_build_type_1_fields(t);
+            return 0;
+        default:
+            fprintf(stderr, "Don't know how to build fields for SMBIOS type "
+                    "%ld\n", type);
+            exit(1);
+        }
+    }
+
+    fprintf(stderr, "smbios: must specify type= or file=\n");
+    return -1;
+}
Index: hw/smbios.h
===================================================================
--- hw/smbios.h	(revision 0)
+++ hw/smbios.h	(revision 7163)
@@ -0,0 +1,162 @@
+#ifndef QEMU_SMBIOS_H
+#define QEMU_SMBIOS_H
+/*
+ * SMBIOS Support
+ *
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors:
+ *  Alex Williamson 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+int smbios_entry_add(const char *t);
+void smbios_add_field(int type, int offset, int len, void *data);
+uint8_t *smbios_get_table(size_t *length);
+
+/*
+ * SMBIOS spec defined tables
+ */
+
+/* This goes at the beginning of every SMBIOS structure. */
+struct smbios_structure_header {
+    uint8_t type;
+    uint8_t length;
+    uint16_t handle;
+} __attribute__((__packed__));
+
+/* SMBIOS type 0 - BIOS Information */
+struct smbios_type_0 {
+    struct smbios_structure_header header;
+    uint8_t vendor_str;
+    uint8_t bios_version_str;
+    uint16_t bios_starting_address_segment;
+    uint8_t bios_release_date_str;
+    uint8_t bios_rom_size;
+    uint8_t bios_characteristics[8];
+    uint8_t bios_characteristics_extension_bytes[2];
+    uint8_t system_bios_major_release;
+    uint8_t system_bios_minor_release;
+    uint8_t embedded_controller_major_release;
+    uint8_t embedded_controller_minor_release;
+} __attribute__((__packed__));
+
+/* SMBIOS type 1 - System Information */
+struct smbios_type_1 {
+    struct smbios_structure_header header;
+    uint8_t manufacturer_str;
+    uint8_t product_name_str;
+    uint8_t version_str;
+    uint8_t serial_number_str;
+    uint8_t uuid[16];
+    uint8_t wake_up_type;
+    uint8_t sku_number_str;
+    uint8_t family_str;
+} __attribute__((__packed__));
+
+/* SMBIOS type 3 - System Enclosure (v2.3) */
+struct smbios_type_3 {
+    struct smbios_structure_header header;
+    uint8_t manufacturer_str;
+    uint8_t type;
+    uint8_t version_str;
+    uint8_t serial_number_str;
+    uint8_t asset_tag_number_str;
+    uint8_t boot_up_state;
+    uint8_t power_supply_state;
+    uint8_t thermal_state;
+    uint8_t security_status;
+    uint32_t oem_defined;
+    uint8_t height;
+    uint8_t number_of_power_cords;
+    uint8_t contained_element_count;
+    // contained elements follow
+} __attribute__((__packed__));
+
+/* SMBIOS type 4 - Processor Information (v2.0) */
+struct smbios_type_4 {
+    struct smbios_structure_header header;
+    uint8_t socket_designation_str;
+    uint8_t processor_type;
+    uint8_t processor_family;
+    uint8_t processor_manufacturer_str;
+    uint32_t processor_id[2];
+    uint8_t processor_version_str;
+    uint8_t voltage;
+    uint16_t external_clock;
+    uint16_t max_speed;
+    uint16_t current_speed;
+    uint8_t status;
+    uint8_t processor_upgrade;
+    uint16_t l1_cache_handle;
+    uint16_t l2_cache_handle;
+    uint16_t l3_cache_handle;
+} __attribute__((__packed__));
+
+/* SMBIOS type 16 - Physical Memory Array
+ *   Associated with one type 17 (Memory Device).
+ */
+struct smbios_type_16 {
+    struct smbios_structure_header header;
+    uint8_t location;
+    uint8_t use;
+    uint8_t error_correction;
+    uint32_t maximum_capacity;
+    uint16_t memory_error_information_handle;
+    uint16_t number_of_memory_devices;
+} __attribute__((__packed__));
+/* SMBIOS type 17 - Memory Device
+ *   Associated with one type 19
+ */
+struct smbios_type_17 {
+    struct smbios_structure_header header;
+    uint16_t physical_memory_array_handle;
+    uint16_t memory_error_information_handle;
+    uint16_t total_width;
+    uint16_t data_width;
+    uint16_t size;
+    uint8_t form_factor;
+    uint8_t device_set;
+    uint8_t device_locator_str;
+    uint8_t bank_locator_str;
+    uint8_t memory_type;
+    uint16_t type_detail;
+} __attribute__((__packed__));
+
+/* SMBIOS type 19 - Memory Array Mapped Address */
+struct smbios_type_19 {
+    struct smbios_structure_header header;
+    uint32_t starting_address;
+    uint32_t ending_address;
+    uint16_t memory_array_handle;
+    uint8_t partition_width;
+} __attribute__((__packed__));
+
+/* SMBIOS type 20 - Memory Device Mapped Address */
+struct smbios_type_20 {
+    struct smbios_structure_header header;
+    uint32_t starting_address;
+    uint32_t ending_address;
+    uint16_t memory_device_handle;
+    uint16_t memory_array_mapped_address_handle;
+    uint8_t partition_row_position;
+    uint8_t interleave_position;
+    uint8_t interleaved_data_depth;
+} __attribute__((__packed__));
+
+/* SMBIOS type 32 - System Boot Information */
+struct smbios_type_32 {
+    struct smbios_structure_header header;
+    uint8_t reserved[6];
+    uint8_t boot_status;
+} __attribute__((__packed__));
+
+/* SMBIOS type 127 -- End-of-table */
+struct smbios_type_127 {
+    struct smbios_structure_header header;
+} __attribute__((__packed__));
+
+#endif /*QEMU_SMBIOS_H */
Index: hw/pc.c
===================================================================
--- hw/pc.c	(revision 7162)
+++ hw/pc.c	(revision 7163)
@@ -37,6 +37,7 @@
 #include "virtio-balloon.h"
 #include "virtio-console.h"
 #include "hpet_emul.h"
+#include "smbios.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -51,6 +52,7 @@
 #define ACPI_DATA_SIZE       0x10000
 #define BIOS_CFG_IOPORT 0x510
 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
+#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
 
 #define MAX_IDE_BUS 2
 
@@ -425,6 +427,8 @@
 static void bochs_bios_init(void)
 {
     void *fw_cfg;
+    uint8_t *smbios_table;
+    size_t smbios_len;
 
     register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
@@ -442,6 +446,11 @@
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
                      acpi_tables_len);
+
+    smbios_table = smbios_get_table(&smbios_len);
+    if (smbios_table)
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
+                         smbios_table, smbios_len);
 }
 
 /* Generate an initial boot sector which sets state and jump to
Index: pc-bios/bios.bin
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: pc-bios/bios-pq/0012-load-smbios-entries-and-files-from-qemu.patch
===================================================================
--- pc-bios/bios-pq/0012-load-smbios-entries-and-files-from-qemu.patch	(revision 0)
+++ pc-bios/bios-pq/0012-load-smbios-entries-and-files-from-qemu.patch	(revision 7163)
@@ -0,0 +1,470 @@
+qemu:bios: Load SMBIOS entries and files from qemu (Alex Williamson)
+
+Allow SMBIOS fields to be overridden and entries replaced by those
+read from qemu.
+
+Signed-off-by: Alex Williamson 
+Signed-off-by: Anthony Liguori 
+
+diff --git a/bios/rombios32.c b/bios/rombios32.c
+index 7be4216..1a1ed64 100644
+--- a/bios/rombios32.c
++++ b/bios/rombios32.c
+@@ -441,7 +441,6 @@ uint32_t cpuid_features;
+ uint32_t cpuid_ext_features;
+ unsigned long ram_size;
+ uint64_t ram_end;
+-uint8_t bios_uuid[16];
+ #ifdef BX_USE_EBDA_TABLES
+ unsigned long ebda_cur_addr;
+ #endif
+@@ -471,6 +470,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
+ #define QEMU_CFG_UUID       0x02
+ #define QEMU_CFG_ARCH_LOCAL     0x8000
+ #define QEMU_CFG_ACPI_TABLES  (QEMU_CFG_ARCH_LOCAL + 0)
++#define QEMU_CFG_SMBIOS_ENTRIES  (QEMU_CFG_ARCH_LOCAL + 1)
+ 
+ int qemu_cfg_port;
+ 
+@@ -519,19 +519,17 @@ static int acpi_load_table(int i, uint32_t addr, uint16_t *len)
+     qemu_cfg_read((uint8_t*)addr, *len);
+     return 0;
+ }
+-#endif
+ 
+-void uuid_probe(void)
++static uint16_t smbios_entries(void)
+ {
+-#ifdef BX_QEMU
+-    if(qemu_cfg_port) {
+-        qemu_cfg_select(QEMU_CFG_UUID);
+-        qemu_cfg_read(bios_uuid, 16);
+-        return;
+-    }
+-#endif
+-    memset(bios_uuid, 0, 16);
++    uint16_t cnt;
++
++    qemu_cfg_select(QEMU_CFG_SMBIOS_ENTRIES);
++    qemu_cfg_read((uint8_t*)&cnt, sizeof(cnt));
++
++    return cnt;
+ }
++#endif
+ 
+ void cpu_probe(void)
+ {
+@@ -1963,21 +1961,105 @@ smbios_entry_point_init(void *start,
+     ep->intermediate_checksum = -sum;
+     }
+ 
++struct smbios_header {
++    uint16_t length;
++    uint8_t type;
++} __attribute__((__packed__));
++
++struct smbios_field {
++    struct smbios_header header;
++    uint8_t type;
++    uint16_t offset;
++    uint8_t data[];
++} __attribute__((__packed__));
++
++struct smbios_table {
++    struct smbios_header header;
++    uint8_t data[];
++} __attribute__((__packed__));
++
++#define SMBIOS_FIELD_ENTRY 0
++#define SMBIOS_TABLE_ENTRY 1
++
++static size_t
++smbios_load_field(int type, size_t offset, void *addr)
++{
++#ifdef BX_QEMU
++    int i;
++
++    for (i = smbios_entries(); i > 0; i--) {
++        struct smbios_field field;
++
++        qemu_cfg_read((uint8_t *)&field, sizeof(struct smbios_header));
++        field.header.length -= sizeof(struct smbios_header);
++
++        if (field.header.type != SMBIOS_FIELD_ENTRY) {
++            while (field.header.length--)
++                inb(QEMU_CFG_DATA_PORT);
++            continue;
++        }
++
++        qemu_cfg_read((uint8_t *)&field.type,
++                      sizeof(field) - sizeof(struct smbios_header));
++        field.header.length -= sizeof(field) - sizeof(struct smbios_header);
++
++        if (field.type != type || field.offset != offset) {
++            while (field.header.length--)
++                inb(QEMU_CFG_DATA_PORT);
++            continue;
++        }
++
++        qemu_cfg_read(addr, field.header.length);
++        return (size_t)field.header.length;
++    }
++#endif
++    return 0;
++}
++ 
++#define load_str_field_with_default(type, field, def) do {             \
++    size = smbios_load_field(type, offsetof(struct smbios_type_##type, \
++                                            field), end);              \
++    if (size > 0) {                                                    \
++        end += size;                                                   \
++    } else {                                                           \
++        memcpy(end, def, sizeof(def));                                 \
++        end += sizeof(def);                                            \
++    }                                                                  \
++    p->field = ++str_index;                                            \
++} while (0)
++
++#define load_str_field_or_skip(type, field) do {                       \
++    size = smbios_load_field(type, offsetof(struct smbios_type_##type, \
++                                            field), end);              \
++    if (size > 0) {                                                    \
++        end += size;                                                   \
++        p->field = ++str_index;                                        \
++    } else {                                                           \
++        p->field = 0;                                                  \
++    }                                                                  \
++} while (0)
++
+ /* Type 0 -- BIOS Information */
+ #define RELEASE_DATE_STR "01/01/2007"
+ static void *
+-smbios_type_0_init(void *start)
++smbios_init_type_0(void *start)
+ {
+     struct smbios_type_0 *p = (struct smbios_type_0 *)start;
++    char *end = (char *)start + sizeof(struct smbios_type_0);
++    size_t size;
++    int str_index = 0;
+ 
+     p->header.type = 0;
+     p->header.length = sizeof(struct smbios_type_0);
+     p->header.handle = 0;
+ 
+-    p->vendor_str = 1;
+-    p->bios_version_str = 1;
++    load_str_field_with_default(0, vendor_str, BX_APPNAME);
++    load_str_field_with_default(0, bios_version_str, BX_APPNAME);
++
+     p->bios_starting_address_segment = 0xe800;
+-    p->bios_release_date_str = 2;
++
++    load_str_field_with_default(0, bios_release_date_str, RELEASE_DATE_STR);
++
+     p->bios_rom_size = 0; /* FIXME */
+ 
+     memset(p->bios_characteristics, 0, 8);
+@@ -1985,50 +2067,66 @@ smbios_type_0_init(void *start)
+     p->bios_characteristics_extension_bytes[0] = 0;
+     p->bios_characteristics_extension_bytes[1] = 0;
+ 
+-    p->system_bios_major_release = 1;
+-    p->system_bios_minor_release = 0;
++    if (!smbios_load_field(0, offsetof(struct smbios_type_0,
++                                       system_bios_major_release),
++                           &p->system_bios_major_release))
++        p->system_bios_major_release = 1;
++
++    if (!smbios_load_field(0, offsetof(struct smbios_type_0,
++                                       system_bios_minor_release),
++                           &p->system_bios_minor_release))
++        p->system_bios_minor_release = 0;
++
+     p->embedded_controller_major_release = 0xff;
+     p->embedded_controller_minor_release = 0xff;
+ 
+-    start += sizeof(struct smbios_type_0);
+-    memcpy((char *)start, BX_APPNAME, sizeof(BX_APPNAME));
+-    start += sizeof(BX_APPNAME);
+-    memcpy((char *)start, RELEASE_DATE_STR, sizeof(RELEASE_DATE_STR));
+-    start += sizeof(RELEASE_DATE_STR);
+-    *((uint8_t *)start) = 0;
++    *end = 0;
++    end++;
+ 
+-    return start+1;
++    return end;
+ }
+ 
+ /* Type 1 -- System Information */
+ static void *
+-smbios_type_1_init(void *start)
++smbios_init_type_1(void *start)
+ {
+     struct smbios_type_1 *p = (struct smbios_type_1 *)start;
++    char *end = (char *)start + sizeof(struct smbios_type_1);
++    size_t size;
++    int str_index = 0;
++
+     p->header.type = 1;
+     p->header.length = sizeof(struct smbios_type_1);
+     p->header.handle = 0x100;
+ 
+-    p->manufacturer_str = 0;
+-    p->product_name_str = 0;
+-    p->version_str = 0;
+-    p->serial_number_str = 0;
++    load_str_field_or_skip(1, manufacturer_str);
++    load_str_field_or_skip(1, product_name_str);
++    load_str_field_or_skip(1, version_str);
++    load_str_field_or_skip(1, serial_number_str);
+ 
+-    memcpy(p->uuid, bios_uuid, 16);
++    size = smbios_load_field(1, offsetof(struct smbios_type_1,
++                                         uuid), &p->uuid);
++    if (size == 0)
++        memset(p->uuid, 0, 16);
+ 
+     p->wake_up_type = 0x06; /* power switch */
+-    p->sku_number_str = 0;
+-    p->family_str = 0;
+ 
+-    start += sizeof(struct smbios_type_1);
+-    *((uint16_t *)start) = 0;
++    load_str_field_or_skip(1, sku_number_str);
++    load_str_field_or_skip(1, family_str);
+ 
+-    return start+2;
++    *end = 0;
++    end++;
++    if (!str_index) {
++        *end = 0;
++        end++;
++    }
++
++    return end;
+ }
+ 
+ /* Type 3 -- System Enclosure */
+ static void *
+-smbios_type_3_init(void *start)
++smbios_init_type_3(void *start)
+ {
+     struct smbios_type_3 *p = (struct smbios_type_3 *)start;
+ 
+@@ -2058,7 +2156,7 @@ smbios_type_3_init(void *start)
+ 
+ /* Type 4 -- Processor Information */
+ static void *
+-smbios_type_4_init(void *start, unsigned int cpu_number)
++smbios_init_type_4(void *start, unsigned int cpu_number)
+ {
+     struct smbios_type_4 *p = (struct smbios_type_4 *)start;
+ 
+@@ -2098,7 +2196,7 @@ smbios_type_4_init(void *start, unsigned int cpu_number)
+ 
+ /* Type 16 -- Physical Memory Array */
+ static void *
+-smbios_type_16_init(void *start, uint32_t memsize, int nr_mem_devs)
++smbios_init_type_16(void *start, uint32_t memsize, int nr_mem_devs)
+ {
+     struct smbios_type_16 *p = (struct smbios_type_16*)start;
+ 
+@@ -2121,7 +2219,7 @@ smbios_type_16_init(void *start, uint32_t memsize, int nr_mem_devs)
+ 
+ /* Type 17 -- Memory Device */
+ static void *
+-smbios_type_17_init(void *start, uint32_t memory_size_mb, int instance)
++smbios_init_type_17(void *start, uint32_t memory_size_mb, int instance)
+ {
+     struct smbios_type_17 *p = (struct smbios_type_17 *)start;
+ 
+@@ -2151,7 +2249,7 @@ smbios_type_17_init(void *start, uint32_t memory_size_mb, int instance)
+ 
+ /* Type 19 -- Memory Array Mapped Address */
+ static void *
+-smbios_type_19_init(void *start, uint32_t memory_size_mb, int instance)
++smbios_init_type_19(void *start, uint32_t memory_size_mb, int instance)
+ {
+     struct smbios_type_19 *p = (struct smbios_type_19 *)start;
+ 
+@@ -2172,7 +2270,7 @@ smbios_type_19_init(void *start, uint32_t memory_size_mb, int instance)
+ 
+ /* Type 20 -- Memory Device Mapped Address */
+ static void *
+-smbios_type_20_init(void *start, uint32_t memory_size_mb, int instance)
++smbios_init_type_20(void *start, uint32_t memory_size_mb, int instance)
+ {
+     struct smbios_type_20 *p = (struct smbios_type_20 *)start;
+ 
+@@ -2196,7 +2294,7 @@ smbios_type_20_init(void *start, uint32_t memory_size_mb, int instance)
+ 
+ /* Type 32 -- System Boot Information */
+ static void *
+-smbios_type_32_init(void *start)
++smbios_init_type_32(void *start)
+ {
+     struct smbios_type_32 *p = (struct smbios_type_32 *)start;
+ 
+@@ -2214,7 +2312,7 @@ smbios_type_32_init(void *start)
+ 
+ /* Type 127 -- End of Table */
+ static void *
+-smbios_type_127_init(void *start)
++smbios_init_type_127(void *start)
+ {
+     struct smbios_type_127 *p = (struct smbios_type_127 *)start;
+ 
+@@ -2228,6 +2326,78 @@ smbios_type_127_init(void *start)
+     return start + 2;
+ }
+ 
++static int
++smbios_load_external(int type, char **p, unsigned *nr_structs,
++                     unsigned *max_struct_size)
++{
++#ifdef BX_QEMU
++    static uint64_t used_bitmap[4] = { 0 };
++    char *start = *p;
++    int i;
++
++    /* Check if we've already reported these tables */
++    if (used_bitmap[(type >> 6) & 0x3] & (1ULL << (type & 0x3f)))
++        return 1;
++
++    /* Don't introduce spurious end markers */
++    if (type == 127)
++        return 0;
++
++    for (i = smbios_entries(); i > 0; i--) {
++        struct smbios_table table;
++        struct smbios_structure_header *header = (void *)*p;
++        int string;
++
++        qemu_cfg_read((uint8_t *)&table, sizeof(struct smbios_header));
++        table.header.length -= sizeof(struct smbios_header);
++
++        if (table.header.type != SMBIOS_TABLE_ENTRY) {
++            while (table.header.length--)
++                inb(QEMU_CFG_DATA_PORT);
++            continue;
++        }
++
++        qemu_cfg_read((uint8_t *)*p, sizeof(struct smbios_structure_header));
++        table.header.length -= sizeof(struct smbios_structure_header);
++
++        if (header->type != type) {
++            while (table.header.length--)
++                inb(QEMU_CFG_DATA_PORT);
++            continue;
++        }
++
++        *p += sizeof(struct smbios_structure_header);
++
++        /* Entries end with a double NULL char, if there's a string at
++         * the end (length is greater than formatted length), the string
++         * terminator provides the first NULL. */
++        string = header->length < table.header.length +
++                 sizeof(struct smbios_structure_header);
++
++        /* Read the rest and terminate the entry */
++        qemu_cfg_read((uint8_t *)*p, table.header.length);
++        *p += table.header.length;
++        *((uint8_t*)*p) = 0;
++        (*p)++;
++        if (!string) {
++            *((uint8_t*)*p) = 0;
++            (*p)++;
++        }
++
++        (*nr_structs)++;
++        if (*p - (char *)header > *max_struct_size)
++            *max_struct_size = *p - (char *)header;
++    }
++
++    /* Mark that we've reported on this type */
++    used_bitmap[(type >> 6) & 0x3] |= (1ULL << (type & 0x3f));
++
++    return (start != *p);
++#else /* !BX_QEMU */
++    return 0;
++#endif
++}
++
+ void smbios_init(void)
+ {
+     unsigned cpu_num, nr_structs = 0, max_struct_size = 0;
+@@ -2246,34 +2416,39 @@ void smbios_init(void)
+ 
+ 	p = (char *)start + sizeof(struct smbios_entry_point);
+ 
+-#define add_struct(fn) do{ \
+-    q = (fn); \
+-    nr_structs++; \
+-    if ((q - p) > max_struct_size) \
+-        max_struct_size = q - p; \
+-    p = q; \
+-}while (0)
+-
+-    add_struct(smbios_type_0_init(p));
+-    add_struct(smbios_type_1_init(p));
+-    add_struct(smbios_type_3_init(p));
++#define add_struct(type, args...) do {                                    \
++    if (!smbios_load_external(type, &p, &nr_structs, &max_struct_size)) { \
++        q = smbios_init_type_##type(args);                                \
++        nr_structs++;                                                     \
++        if ((q - p) > max_struct_size)                                    \
++            max_struct_size = q - p;                                      \
++        p = q;                                                            \
++    }                                                                     \
++} while (0)
++
++    add_struct(0, p);
++    add_struct(1, p);
++    add_struct(3, p);
+     for (cpu_num = 1; cpu_num <= smp_cpus; cpu_num++)
+-        add_struct(smbios_type_4_init(p, cpu_num));
++        add_struct(4, p, cpu_num);
+ 
+     /* Each 'memory device' covers up to 16GB of address space. */
+     nr_mem_devs = (memsize + 0x3fff) >> 14;
+-    add_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
++    add_struct(16, p, memsize, nr_mem_devs);
+     for ( i = 0; i < nr_mem_devs; i++ )
+     {
+         uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
+                                 ? (((memsize-1) & 0x3fff)+1) : 0x4000);
+-        add_struct(smbios_type_17_init(p, dev_memsize, i));
+-        add_struct(smbios_type_19_init(p, dev_memsize, i));
+-        add_struct(smbios_type_20_init(p, dev_memsize, i));
++        add_struct(17, p, dev_memsize, i);
++        add_struct(19, p, dev_memsize, i);
++        add_struct(20, p, dev_memsize, i);
+     }
+ 
+-    add_struct(smbios_type_32_init(p));
+-    add_struct(smbios_type_127_init(p));
++    add_struct(32, p);
++    /* Add any remaining provided entries before the end marker */
++    for (i = 0; i < 256; i++)
++        smbios_load_external(i, &p, &nr_structs, &max_struct_size);
++    add_struct(127, p);
+ 
+ #undef add_struct
+ 
+@@ -2380,8 +2555,6 @@ void rombios32_init(uint32_t *s3_resume_vector, uint8_t *shutdown_flag)
+ 
+         mptable_init();
+ 
+-        uuid_probe();
+-
+         smbios_init();
+ 
+         if (acpi_enabled)
+
+
+--
+To unsubscribe from this list: send the line "unsubscribe kvm" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html
+
Index: pc-bios/bios-pq/series
===================================================================
--- pc-bios/bios-pq/series	(revision 7162)
+++ pc-bios/bios-pq/series	(revision 7163)
@@ -9,3 +9,4 @@
 0009_qemu-bios-pci-hotplug-support.patch
 0010_bios-mark-the-acpi-sci-interrupt-as-connected-to-irq-9.patch
 0011_read-additional-acpi-tables-from-a-vm.patch
+0012-load-smbios-entries-and-files-from-qemu.patch

 ------------------------------------------------------------------------
r7162 | aliguori | 2009-04-17 13:58:14 -0500 (Fri, 17 Apr 2009) | 7 lines
Changed paths:
   M /trunk/sysemu.h
   M /trunk/vl.c

qemu: Add prototype and make qemu_uuid_parse() non-static (Alex Williamson)

SMBIOS parameters can also provide a UUID outside of vl.c.

Signed-off-by: Alex Williamson 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7161)
+++ vl.c	(revision 7162)
@@ -4200,7 +4200,7 @@
 }
 #endif
 
-static int qemu_uuid_parse(const char *str, uint8_t *uuid)
+int qemu_uuid_parse(const char *str, uint8_t *uuid)
 {
     int ret;
 
Index: sysemu.h
===================================================================
--- sysemu.h	(revision 7161)
+++ sysemu.h	(revision 7162)
@@ -15,6 +15,7 @@
 extern int vm_running;
 extern const char *qemu_name;
 extern uint8_t qemu_uuid[];
+int qemu_uuid_parse(const char *str, uint8_t *uuid);
 #define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
 
 typedef struct vm_change_state_entry VMChangeStateEntry;

 ------------------------------------------------------------------------
r7151 | aliguori | 2009-04-17 12:11:12 -0500 (Fri, 17 Apr 2009) | 11 lines
Changed paths:
   M /trunk/net.c

Free VLANClientState using qemu_free() (Mark McLoughlin)

It's allocated using qemu_mallocz(), so ...

The name and model strings are strdup() allocated, so free()
is still appropriate for them.

Reported-by: Jan Kiszka 
Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7150)
+++ net.c	(revision 7151)
@@ -369,7 +369,7 @@
             }
             free(vc->name);
             free(vc->model);
-            free(vc);
+            qemu_free(vc);
             break;
         } else
             pvc = &(*pvc)->next;

 ------------------------------------------------------------------------
r7150 | aliguori | 2009-04-17 12:11:08 -0500 (Fri, 17 Apr 2009) | 8 lines
Changed paths:
   M /trunk/hw/dp8393x.c
   M /trunk/hw/e1000.c
   M /trunk/hw/eepro100.c
   M /trunk/hw/etraxfs_eth.c
   M /trunk/hw/mcf_fec.c
   M /trunk/hw/mipsnet.c
   M /trunk/hw/musicpal.c
   M /trunk/hw/ne2000.c
   M /trunk/hw/pcnet.c
   M /trunk/hw/rtl8139.c
   M /trunk/hw/smc91c111.c
   M /trunk/hw/stellaris_enet.c
   M /trunk/hw/usb-net.c
   M /trunk/hw/virtio-net.c
   M /trunk/hw/virtio.c
   M /trunk/hw/virtio.h
   M /trunk/net.c
   M /trunk/net.h
   M /trunk/tap-win32.c

Introduce VLANClientState::cleanup() (Mark McLoughlin)

We're currently leaking memory and file descriptors on device
hot-unplug.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: tap-win32.c
===================================================================
--- tap-win32.c	(revision 7149)
+++ tap-win32.c	(revision 7150)
@@ -638,6 +638,18 @@
      tap_win32_overlapped_t *handle;
  } TAPState;
 
+static void tap_cleanup(VLANClientState *vc)
+{
+    TAPState *s = vc->opaque;
+
+    qemu_del_wait_object(s->handle->tap_semaphore, NULL, NULL);
+
+    /* FIXME: need to kill thread and close file handle:
+       tap_win32_close(s);
+    */
+    qemu_free(s);
+}
+
 static void tap_receive(void *opaque, const uint8_t *buf, int size)
 {
     TAPState *s = opaque;
@@ -672,7 +684,8 @@
         return -1;
     }
 
-    s->vc = qemu_new_vlan_client(vlan, model, name, tap_receive, NULL, s);
+    s->vc = qemu_new_vlan_client(vlan, model, name, tap_receive,
+                                 NULL, tap_cleanup, s);
 
     snprintf(s->vc->info_str, sizeof(s->vc->info_str),
              "tap: ifname=%s", ifname);
Index: net.c
===================================================================
--- net.c	(revision 7149)
+++ net.c	(revision 7150)
@@ -333,6 +333,7 @@
                                       const char *name,
                                       IOReadHandler *fd_read,
                                       IOCanRWHandler *fd_can_read,
+                                      NetCleanup *cleanup,
                                       void *opaque)
 {
     VLANClientState *vc, **pvc;
@@ -344,6 +345,7 @@
         vc->name = assign_name(vc, model);
     vc->fd_read = fd_read;
     vc->fd_can_read = fd_can_read;
+    vc->cleanup = cleanup;
     vc->opaque = opaque;
     vc->vlan = vlan;
 
@@ -362,6 +364,9 @@
     while (*pvc != NULL)
         if (*pvc == vc) {
             *pvc = vc->next;
+            if (vc->cleanup) {
+                vc->cleanup(vc);
+            }
             free(vc->name);
             free(vc->model);
             free(vc);
@@ -521,7 +526,7 @@
         slirp_init(slirp_restrict, slirp_ip);
     }
     slirp_vc = qemu_new_vlan_client(vlan, model, name,
-                                    slirp_receive, NULL, NULL);
+                                    slirp_receive, NULL, NULL, NULL);
     slirp_vc->info_str[0] = '\0';
     return 0;
 }
@@ -702,6 +707,8 @@
     char down_script_arg[128];
 } TAPState;
 
+static int launch_script(const char *setup_script, const char *ifname, int fd);
+
 static ssize_t tap_receive_iov(void *opaque, const struct iovec *iov,
                                int iovcnt)
 {
@@ -748,6 +755,18 @@
     }
 }
 
+static void tap_cleanup(VLANClientState *vc)
+{
+    TAPState *s = vc->opaque;
+
+    if (s->down_script[0])
+        launch_script(s->down_script, s->down_script_arg, s->fd);
+
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+    close(s->fd);
+    qemu_free(s);
+}
+
 /* fd support */
 
 static TAPState *net_tap_fd_init(VLANState *vlan,
@@ -759,7 +778,8 @@
 
     s = qemu_mallocz(sizeof(TAPState));
     s->fd = fd;
-    s->vc = qemu_new_vlan_client(vlan, model, name, tap_receive, NULL, s);
+    s->vc = qemu_new_vlan_client(vlan, model, name, tap_receive,
+                                 NULL, tap_cleanup, s);
     s->vc->fd_readv = tap_receive_iov;
     qemu_set_fd_handler(s->fd, tap_send, NULL, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
@@ -1058,6 +1078,14 @@
     }
 }
 
+static void vde_cleanup(VLANClientState *vc)
+{
+    VDEState *s = vc->opaque;
+    qemu_set_fd_handler(vde_datafd(s->vde), NULL, NULL, NULL);
+    vde_close(s->vde);
+    qemu_free(s);
+}
+
 static int net_vde_init(VLANState *vlan, const char *model,
                         const char *name, const char *sock,
                         int port, const char *group, int mode)
@@ -1078,7 +1106,8 @@
         free(s);
         return -1;
     }
-    s->vc = qemu_new_vlan_client(vlan, model, name, vde_from_qemu, NULL, s);
+    s->vc = qemu_new_vlan_client(vlan, model, name, vde_from_qemu,
+                                 NULL, vde_cleanup, s);
     qemu_set_fd_handler(vde_datafd(s->vde), vde_to_qemu, NULL, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "sock=%s,fd=%d",
              sock, vde_datafd(s->vde));
@@ -1263,6 +1292,14 @@
     return -1;
 }
 
+static void net_socket_cleanup(VLANClientState *vc)
+{
+    NetSocketState *s = vc->opaque;
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+    close(s->fd);
+    qemu_free(s);
+}
+
 static NetSocketState *net_socket_fd_init_dgram(VLANState *vlan,
                                                 const char *model,
                                                 const char *name,
@@ -1307,7 +1344,8 @@
     s = qemu_mallocz(sizeof(NetSocketState));
     s->fd = fd;
 
-    s->vc = qemu_new_vlan_client(vlan, model, name, net_socket_receive_dgram, NULL, s);
+    s->vc = qemu_new_vlan_client(vlan, model, name, net_socket_receive_dgram,
+                                 NULL, net_socket_cleanup, s);
     qemu_set_fd_handler(s->fd, net_socket_send_dgram, NULL, s);
 
     /* mcast: save bound address as dst */
@@ -1334,8 +1372,8 @@
     NetSocketState *s;
     s = qemu_mallocz(sizeof(NetSocketState));
     s->fd = fd;
-    s->vc = qemu_new_vlan_client(vlan, model, name,
-                                 net_socket_receive, NULL, s);
+    s->vc = qemu_new_vlan_client(vlan, model, name, net_socket_receive,
+                                 NULL, net_socket_cleanup, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str),
              "socket: fd=%d", fd);
     if (is_connected) {
@@ -1895,29 +1933,20 @@
 
 void net_cleanup(void)
 {
-#if !defined(_WIN32)
     VLANState *vlan;
 
     /* close network clients */
     for(vlan = first_vlan; vlan != NULL; vlan = vlan->next) {
-        VLANClientState *vc;
+        VLANClientState *vc = vlan->first_client;
 
-        for(vc = vlan->first_client; vc != NULL; vc = vc->next) {
-            if (vc->fd_read == tap_receive) {
-                TAPState *s = vc->opaque;
+        while (vc) {
+            VLANClientState *next = vc->next;
 
-                if (s->down_script[0])
-                    launch_script(s->down_script, s->down_script_arg, s->fd);
-            }
-#if defined(CONFIG_VDE)
-            if (vc->fd_read == vde_from_qemu) {
-                VDEState *s = vc->opaque;
-                vde_close(s->vde);
-            }
-#endif
+            qemu_del_vlan_client(vc);
+
+            vc = next;
         }
     }
-#endif
 }
 
 void net_client_check(void)
Index: net.h
===================================================================
--- net.h	(revision 7149)
+++ net.h	(revision 7150)
@@ -9,6 +9,7 @@
 
 typedef struct VLANClientState VLANClientState;
 
+typedef void (NetCleanup) (VLANClientState *);
 typedef void (LinkStatusChanged)(VLANClientState *);
 
 struct VLANClientState {
@@ -17,6 +18,7 @@
     /* Packets may still be sent if this returns zero.  It's used to
        rate-limit the slirp code.  */
     IOCanRWHandler *fd_can_read;
+    NetCleanup *cleanup;
     LinkStatusChanged *link_status_changed;
     int link_down;
     void *opaque;
@@ -40,6 +42,7 @@
                                       const char *name,
                                       IOReadHandler *fd_read,
                                       IOCanRWHandler *fd_can_read,
+                                      NetCleanup *cleanup,
                                       void *opaque);
 void qemu_del_vlan_client(VLANClientState *vc);
 VLANClientState *qemu_find_vlan_client(VLANState *vlan, void *opaque);
Index: hw/pcnet.c
===================================================================
--- hw/pcnet.c	(revision 7149)
+++ hw/pcnet.c	(revision 7150)
@@ -75,6 +75,7 @@
     uint8_t buffer[4096];
     int tx_busy;
     qemu_irq irq;
+    qemu_irq *reset_irq;
     void (*phys_mem_read)(void *dma_opaque, target_phys_addr_t addr,
                          uint8_t *buf, int len, int do_bswap);
     void (*phys_mem_write)(void *dma_opaque, target_phys_addr_t addr,
@@ -1929,15 +1930,24 @@
     return 0;
 }
 
-static void pcnet_common_init(PCNetState *d, NICInfo *nd)
+static void pcnet_common_cleanup(PCNetState *d)
 {
+    unregister_savevm("pcnet", d);
+
+    qemu_del_timer(d->poll_timer);
+    qemu_free_timer(d->poll_timer);
+}
+
+static void pcnet_common_init(PCNetState *d, NICInfo *nd, NetCleanup *cleanup)
+{
     d->poll_timer = qemu_new_timer(vm_clock, pcnet_poll_timer, d);
 
     d->nd = nd;
 
     if (nd && nd->vlan) {
         d->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                     pcnet_receive, pcnet_can_receive, d);
+                                     pcnet_receive, pcnet_can_receive,
+                                     cleanup, d);
 
         qemu_format_nic_info_str(d->vc, d->nd->macaddr);
     } else {
@@ -1985,6 +1995,22 @@
     cpu_physical_memory_read(addr, buf, len);
 }
 
+static void pci_pcnet_cleanup(VLANClientState *vc)
+{
+    PCNetState *d = vc->opaque;
+
+    pcnet_common_cleanup(d);
+}
+
+static int pci_pcnet_uninit(PCIDevice *dev)
+{
+    PCNetState *d = (PCNetState *)dev;
+
+    cpu_unregister_io_memory(d->mmio_index);
+
+    return 0;
+}
+
 PCIDevice *pci_pcnet_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
     PCNetState *d;
@@ -1997,7 +2023,7 @@
 
     d = (PCNetState *)pci_register_device(bus, "PCNet", sizeof(PCNetState),
                                           devfn, NULL, NULL);
-
+    d->dev.unregister = pci_pcnet_uninit;
     pci_conf = d->dev.config;
 
     pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_AMD);
@@ -2031,7 +2057,8 @@
     d->phys_mem_write = pci_physical_memory_write;
     d->pci_dev = &d->dev;
 
-    pcnet_common_init(d, nd);
+    pcnet_common_init(d, nd, pci_pcnet_cleanup);
+
     return (PCIDevice *)d;
 }
 
@@ -2081,29 +2108,42 @@
     NULL,
 };
 
+static void lance_cleanup(VLANClientState *vc)
+{
+    PCNetState *d = vc->opaque;
+
+    pcnet_common_cleanup(d);
+
+    qemu_free_irqs(d->reset_irq);
+
+    cpu_unregister_io_memory(d->mmio_index);
+
+    qemu_free(d);
+}
+
 void lance_init(NICInfo *nd, target_phys_addr_t leaddr, void *dma_opaque,
                 qemu_irq irq, qemu_irq *reset)
 {
     PCNetState *d;
-    int lance_io_memory;
 
     qemu_check_nic_model(nd, "lance");
 
     d = qemu_mallocz(sizeof(PCNetState));
 
-    lance_io_memory =
+    d->mmio_index =
         cpu_register_io_memory(0, lance_mem_read, lance_mem_write, d);
 
     d->dma_opaque = dma_opaque;
 
-    *reset = *qemu_allocate_irqs(parent_lance_reset, d, 1);
+    d->reset_irq = qemu_allocate_irqs(parent_lance_reset, d, 1);
+    *reset = *d->reset_irq;
 
-    cpu_register_physical_memory(leaddr, 4, lance_io_memory);
+    cpu_register_physical_memory(leaddr, 4, d->mmio_index);
 
     d->irq = irq;
     d->phys_mem_read = ledma_memory_read;
     d->phys_mem_write = ledma_memory_write;
 
-    pcnet_common_init(d, nd);
+    pcnet_common_init(d, nd, lance_cleanup);
 }
 #endif /* TARGET_SPARC */
Index: hw/usb-net.c
===================================================================
--- hw/usb-net.c	(revision 7149)
+++ hw/usb-net.c	(revision 7150)
@@ -1415,14 +1415,20 @@
     return !s->in_len;
 }
 
+static void usbnet_cleanup(VLANClientState *vc)
+{
+    USBNetState *s = vc->opaque;
+
+    rndis_clear_responsequeue(s);
+    qemu_free(s);
+}
+
 static void usb_net_handle_destroy(USBDevice *dev)
 {
     USBNetState *s = (USBNetState *) dev;
 
     /* TODO: remove the nd_table[] entry */
     qemu_del_vlan_client(s->vc);
-    rndis_clear_responsequeue(s);
-    qemu_free(s);
 }
 
 USBDevice *usb_net_init(NICInfo *nd)
@@ -1452,7 +1458,9 @@
     pstrcpy(s->dev.devname, sizeof(s->dev.devname),
                     "QEMU USB Network Interface");
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                    usbnet_receive, usbnet_can_receive, s);
+                                 usbnet_receive,
+                                 usbnet_can_receive,
+                                 usbnet_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, s->mac);
 
Index: hw/mcf_fec.c
===================================================================
--- hw/mcf_fec.c	(revision 7149)
+++ hw/mcf_fec.c	(revision 7150)
@@ -24,6 +24,7 @@
 
 typedef struct {
     qemu_irq *irq;
+    int mmio_index;
     VLANClientState *vc;
     uint32_t irq_state;
     uint32_t eir;
@@ -441,21 +442,30 @@
    mcf_fec_write
 };
 
+static void mcf_fec_cleanup(VLANClientState *vc)
+{
+    mcf_fec_state *s = vc->opaque;
+
+    cpu_unregister_io_memory(s->mmio_index);
+
+    qemu_free(s);
+}
+
 void mcf_fec_init(NICInfo *nd, target_phys_addr_t base, qemu_irq *irq)
 {
     mcf_fec_state *s;
-    int iomemtype;
 
     qemu_check_nic_model(nd, "mcf_fec");
 
     s = (mcf_fec_state *)qemu_mallocz(sizeof(mcf_fec_state));
     s->irq = irq;
-    iomemtype = cpu_register_io_memory(0, mcf_fec_readfn,
-                                       mcf_fec_writefn, s);
-    cpu_register_physical_memory(base, 0x400, iomemtype);
+    s->mmio_index = cpu_register_io_memory(0, mcf_fec_readfn,
+                                           mcf_fec_writefn, s);
+    cpu_register_physical_memory(base, 0x400, s->mmio_index);
 
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 mcf_fec_receive, mcf_fec_can_receive, s);
+                                 mcf_fec_receive, mcf_fec_can_receive,
+                                 mcf_fec_cleanup, s);
     memcpy(s->macaddr, nd->macaddr, 6);
     qemu_format_nic_info_str(s->vc, s->macaddr);
 }
Index: hw/etraxfs_eth.c
===================================================================
--- hw/etraxfs_eth.c	(revision 7149)
+++ hw/etraxfs_eth.c	(revision 7150)
@@ -554,6 +554,16 @@
 	ð_writel,
 };
 
+static void eth_cleanup(VLANClientState *vc)
+{
+        struct fs_eth *eth = vc->opaque;
+
+        cpu_unregister_io_memory(eth->ethregs);
+
+        qemu_free(eth->dma_out);
+        qemu_free(eth);
+}
+
 void *etraxfs_eth_init(NICInfo *nd, CPUState *env, 
 		       qemu_irq *irq, target_phys_addr_t base, int phyaddr)
 {
@@ -585,7 +595,8 @@
 	cpu_register_physical_memory (base, 0x5c, eth->ethregs);
 
 	eth->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-				       eth_receive, eth_can_receive, eth);
+				       eth_receive, eth_can_receive,
+				       eth_cleanup, eth);
 	eth->vc->opaque = eth;
 	eth->vc->link_status_changed = eth_set_link;
 
Index: hw/stellaris_enet.c
===================================================================
--- hw/stellaris_enet.c	(revision 7149)
+++ hw/stellaris_enet.c	(revision 7150)
@@ -69,6 +69,7 @@
     VLANClientState *vc;
     qemu_irq irq;
     uint8_t macaddr[6];
+    int mmio_index;
 } stellaris_enet_state;
 
 static void stellaris_enet_update(stellaris_enet_state *s)
@@ -384,23 +385,35 @@
     return 0;
 }
 
+static void stellaris_enet_cleanup(VLANClientState *vc)
+{
+    stellaris_enet_state *s = vc->opaque;
+
+    unregister_savevm("stellaris_enet", s);
+
+    cpu_unregister_io_memory(s->mmio_index);
+
+    qemu_free(s);
+}
+
 void stellaris_enet_init(NICInfo *nd, uint32_t base, qemu_irq irq)
 {
     stellaris_enet_state *s;
-    int iomemtype;
 
     qemu_check_nic_model(nd, "stellaris");
 
     s = (stellaris_enet_state *)qemu_mallocz(sizeof(stellaris_enet_state));
-    iomemtype = cpu_register_io_memory(0, stellaris_enet_readfn,
-                                       stellaris_enet_writefn, s);
-    cpu_register_physical_memory(base, 0x00001000, iomemtype);
+    s->mmio_index = cpu_register_io_memory(0, stellaris_enet_readfn,
+                                           stellaris_enet_writefn, s);
+    cpu_register_physical_memory(base, 0x00001000, s->mmio_index);
     s->irq = irq;
     memcpy(s->macaddr, nd->macaddr, 6);
 
     if (nd->vlan) {
         s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                     stellaris_enet_receive, stellaris_enet_can_receive, s);
+                                     stellaris_enet_receive,
+                                     stellaris_enet_can_receive,
+                                     stellaris_enet_cleanup, s);
         qemu_format_nic_info_str(s->vc, s->macaddr);
     }
 
Index: hw/eepro100.c
===================================================================
--- hw/eepro100.c	(revision 7149)
+++ hw/eepro100.c	(revision 7150)
@@ -1710,6 +1710,25 @@
     qemu_put_buffer(f, s->configuration, sizeof(s->configuration));
 }
 
+static void nic_cleanup(VLANClientState *vc)
+{
+    EEPRO100State *s = vc->opaque;
+
+    unregister_savevm(vc->model, s);
+
+    eeprom93xx_free(s->eeprom);
+}
+
+static int pci_nic_uninit(PCIDevice *dev)
+{
+    PCIEEPRO100State *d = (PCIEEPRO100State *) dev;
+    EEPRO100State *s = &d->eepro100;
+
+    cpu_unregister_io_memory(s->mmio_index);
+
+    return 0;
+}
+
 static PCIDevice *nic_init(PCIBus * bus, NICInfo * nd, uint32_t device)
 {
     PCIEEPRO100State *d;
@@ -1720,6 +1739,7 @@
     d = (PCIEEPRO100State *) pci_register_device(bus, nd->model,
                                                  sizeof(PCIEEPRO100State), -1,
                                                  NULL, NULL);
+    d->dev.unregister = pci_nic_uninit;
 
     s = &d->eepro100;
     s->device = device;
@@ -1750,7 +1770,8 @@
     nic_reset(s);
 
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 nic_receive, nic_can_receive, s);
+                                 nic_receive, nic_can_receive,
+                                 nic_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
Index: hw/ne2000.c
===================================================================
--- hw/ne2000.c	(revision 7149)
+++ hw/ne2000.c	(revision 7150)
@@ -140,6 +140,7 @@
     uint8_t curpag;
     uint8_t mult[8]; /* multicast mask array */
     qemu_irq irq;
+    int isa_io_base;
     PCIDevice *pci_dev;
     VLANClientState *vc;
     uint8_t macaddr[6];
@@ -718,6 +719,19 @@
 	return 0;
 }
 
+static void isa_ne2000_cleanup(VLANClientState *vc)
+{
+    NE2000State *s = vc->opaque;
+
+    unregister_savevm("ne2000", s);
+
+    isa_unassign_ioport(s->isa_io_base, 16);
+    isa_unassign_ioport(s->isa_io_base + 0x10, 2);
+    isa_unassign_ioport(s->isa_io_base + 0x1f, 1);
+
+    qemu_free(s);
+}
+
 void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd)
 {
     NE2000State *s;
@@ -736,13 +750,15 @@
 
     register_ioport_write(base + 0x1f, 1, 1, ne2000_reset_ioport_write, s);
     register_ioport_read(base + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
+    s->isa_io_base = base;
     s->irq = irq;
     memcpy(s->macaddr, nd->macaddr, 6);
 
     ne2000_reset(s);
 
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 ne2000_receive, ne2000_can_receive, s);
+                                 ne2000_receive, ne2000_can_receive,
+                                 isa_ne2000_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
@@ -777,6 +793,13 @@
     register_ioport_read(addr + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
 }
 
+static void ne2000_cleanup(VLANClientState *vc)
+{
+    NE2000State *s = vc->opaque;
+
+    unregister_savevm("ne2000", s);
+}
+
 PCIDevice *pci_ne2000_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
     PCINE2000State *d;
@@ -802,7 +825,8 @@
     memcpy(s->macaddr, nd->macaddr, 6);
     ne2000_reset(s);
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 ne2000_receive, ne2000_can_receive, s);
+                                 ne2000_receive, ne2000_can_receive,
+                                 ne2000_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
Index: hw/virtio-net.c
===================================================================
--- hw/virtio-net.c	(revision 7149)
+++ hw/virtio-net.c	(revision 7150)
@@ -570,6 +570,21 @@
     return 0;
 }
 
+static void virtio_net_cleanup(VLANClientState *vc)
+{
+    VirtIONet *n = vc->opaque;
+
+    unregister_savevm("virtio-net", n);
+
+    qemu_free(n->mac_table.macs);
+    qemu_free(n->vlans);
+
+    qemu_del_timer(n->tx_timer);
+    qemu_free_timer(n->tx_timer);
+
+    virtio_cleanup(&n->vdev);
+}
+
 PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
     VirtIONet *n;
@@ -598,7 +613,9 @@
     memcpy(n->mac, nd->macaddr, ETH_ALEN);
     n->status = VIRTIO_NET_S_LINK_UP;
     n->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 virtio_net_receive, virtio_net_can_receive, n);
+                                 virtio_net_receive,
+                                 virtio_net_can_receive,
+                                 virtio_net_cleanup, n);
     n->vc->link_status_changed = virtio_net_set_link_status;
 
     qemu_format_nic_info_str(n->vc, n->mac);
Index: hw/virtio.c
===================================================================
--- hw/virtio.c	(revision 7149)
+++ hw/virtio.c	(revision 7150)
@@ -750,6 +750,13 @@
     virtio_update_irq(vdev);
 }
 
+void virtio_cleanup(VirtIODevice *vdev)
+{
+    if (vdev->config)
+        qemu_free(vdev->config);
+    qemu_free(vdev->vq);
+}
+
 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
                               uint16_t vendor, uint16_t device,
                               uint16_t subvendor, uint16_t subdevice,
Index: hw/virtio.h
===================================================================
--- hw/virtio.h	(revision 7149)
+++ hw/virtio.h	(revision 7150)
@@ -117,6 +117,8 @@
 
 void virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
+void virtio_cleanup(VirtIODevice *vdev);
+
 void virtio_notify_config(VirtIODevice *vdev);
 
 void virtio_queue_set_notification(VirtQueue *vq, int enable);
Index: hw/e1000.c
===================================================================
--- hw/e1000.c	(revision 7149)
+++ hw/e1000.c	(revision 7150)
@@ -1033,6 +1033,14 @@
                                      excluded_regs[i] - 4);
 }
 
+static void
+e1000_cleanup(VLANClientState *vc)
+{
+    E1000State *d = vc->opaque;
+
+    unregister_savevm("e1000", d);
+}
+
 static int
 pci_e1000_uninit(PCIDevice *dev)
 {
@@ -1094,7 +1102,8 @@
     memset(&d->tx, 0, sizeof d->tx);
 
     d->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 e1000_receive, e1000_can_receive, d);
+                                 e1000_receive, e1000_can_receive,
+                                 e1000_cleanup, d);
     d->vc->link_status_changed = e1000_set_link_status;
 
     qemu_format_nic_info_str(d->vc, nd->macaddr);
Index: hw/rtl8139.c
===================================================================
--- hw/rtl8139.c	(revision 7149)
+++ hw/rtl8139.c	(revision 7150)
@@ -3414,6 +3414,33 @@
 }
 #endif /* RTL8139_ONBOARD_TIMER */
 
+static void rtl8139_cleanup(VLANClientState *vc)
+{
+    RTL8139State *s = vc->opaque;
+
+    if (s->cplus_txbuffer) {
+        qemu_free(s->cplus_txbuffer);
+        s->cplus_txbuffer = NULL;
+    }
+
+#ifdef RTL8139_ONBOARD_TIMER
+    qemu_del_timer(s->timer);
+    qemu_free_timer(s->timer);
+#endif
+
+    unregister_savevm("rtl8139", s);
+}
+
+static int pci_rtl8139_uninit(PCIDevice *dev)
+{
+    PCIRTL8139State *d = (PCIRTL8139State *)dev;
+    RTL8139State *s = &d->rtl8139;
+
+    cpu_unregister_io_memory(s->rtl8139_mmio_io_addr);
+
+    return 0;
+}
+
 PCIDevice *pci_rtl8139_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
     PCIRTL8139State *d;
@@ -3424,6 +3451,7 @@
                                               "RTL8139", sizeof(PCIRTL8139State),
                                               devfn,
                                               NULL, NULL);
+    d->dev.unregister = pci_rtl8139_uninit;
     pci_conf = d->dev.config;
     pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REALTEK);
     pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REALTEK_8139);
@@ -3450,7 +3478,8 @@
     memcpy(s->macaddr, nd->macaddr, 6);
     rtl8139_reset(s);
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 rtl8139_receive, rtl8139_can_receive, s);
+                                 rtl8139_receive, rtl8139_can_receive,
+                                 rtl8139_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
Index: hw/musicpal.c
===================================================================
--- hw/musicpal.c	(revision 7149)
+++ hw/musicpal.c	(revision 7150)
@@ -536,6 +536,7 @@
     uint32_t smir;
     uint32_t icr;
     uint32_t imr;
+    int mmio_index;
     int vlan_header;
     uint32_t tx_queue[2];
     uint32_t rx_queue[4];
@@ -745,20 +746,29 @@
     mv88w8618_eth_write
 };
 
+static void eth_cleanup(VLANClientState *vc)
+{
+    mv88w8618_eth_state *s = vc->opaque;
+
+    cpu_unregister_io_memory(s->mmio_index);
+
+    qemu_free(s);
+}
+
 static void mv88w8618_eth_init(NICInfo *nd, uint32_t base, qemu_irq irq)
 {
     mv88w8618_eth_state *s;
-    int iomemtype;
 
     qemu_check_nic_model(nd, "mv88w8618");
 
     s = qemu_mallocz(sizeof(mv88w8618_eth_state));
     s->irq = irq;
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 eth_receive, eth_can_receive, s);
-    iomemtype = cpu_register_io_memory(0, mv88w8618_eth_readfn,
-                                       mv88w8618_eth_writefn, s);
-    cpu_register_physical_memory(base, MP_ETH_SIZE, iomemtype);
+                                 eth_receive, eth_can_receive,
+                                 eth_cleanup, s);
+    s->mmio_index = cpu_register_io_memory(0, mv88w8618_eth_readfn,
+                                           mv88w8618_eth_writefn, s);
+    cpu_register_physical_memory(base, MP_ETH_SIZE, s->mmio_index);
 }
 
 /* LCD register offsets */
Index: hw/mipsnet.c
===================================================================
--- hw/mipsnet.c	(revision 7149)
+++ hw/mipsnet.c	(revision 7150)
@@ -33,6 +33,7 @@
     uint32_t intctl;
     uint8_t rx_buffer[MAX_ETH_FRAME_SIZE];
     uint8_t tx_buffer[MAX_ETH_FRAME_SIZE];
+    int io_base;
     qemu_irq irq;
     VLANClientState *vc;
 } MIPSnetState;
@@ -231,6 +232,17 @@
     return 0;
 }
 
+static void mipsnet_cleanup(VLANClientState *vc)
+{
+    MIPSnetState *s = vc->opaque;
+
+    unregister_savevm("mipsnet", s);
+
+    isa_unassign_ioport(s->io_base, 36);
+
+    qemu_free(s);
+}
+
 void mipsnet_init (int base, qemu_irq irq, NICInfo *nd)
 {
     MIPSnetState *s;
@@ -246,10 +258,12 @@
     register_ioport_write(base, 36, 4, mipsnet_ioport_write, s);
     register_ioport_read(base, 36, 4, mipsnet_ioport_read, s);
 
+    s->io_base = base;
     s->irq = irq;
     if (nd && nd->vlan) {
         s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                     mipsnet_receive, mipsnet_can_receive, s);
+                                     mipsnet_receive, mipsnet_can_receive,
+                                     mipsnet_cleanup, s);
     } else {
         s->vc = NULL;
     }
Index: hw/smc91c111.c
===================================================================
--- hw/smc91c111.c	(revision 7149)
+++ hw/smc91c111.c	(revision 7150)
@@ -42,6 +42,7 @@
     uint8_t int_level;
     uint8_t int_mask;
     uint8_t macaddr[6];
+    int mmio_index;
 } smc91c111_state;
 
 #define RCR_SOFT_RST  0x8000
@@ -690,24 +691,32 @@
     smc91c111_writel
 };
 
+static void smc91c111_cleanup(VLANClientState *vc)
+{
+    smc91c111_state *s = vc->opaque;
+
+    cpu_unregister_io_memory(s->mmio_index);
+    qemu_free(s);
+}
+
 void smc91c111_init(NICInfo *nd, uint32_t base, qemu_irq irq)
 {
     smc91c111_state *s;
-    int iomemtype;
 
     qemu_check_nic_model(nd, "smc91c111");
 
     s = (smc91c111_state *)qemu_mallocz(sizeof(smc91c111_state));
-    iomemtype = cpu_register_io_memory(0, smc91c111_readfn,
-                                       smc91c111_writefn, s);
-    cpu_register_physical_memory(base, 16, iomemtype);
+    s->mmio_index = cpu_register_io_memory(0, smc91c111_readfn,
+                                           smc91c111_writefn, s);
+    cpu_register_physical_memory(base, 16, s->mmio_index);
     s->irq = irq;
     memcpy(s->macaddr, nd->macaddr, 6);
 
     smc91c111_reset(s);
 
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 smc91c111_receive, smc91c111_can_receive, s);
+                                 smc91c111_receive, smc91c111_can_receive,
+                                 smc91c111_cleanup, s);
     qemu_format_nic_info_str(s->vc, s->macaddr);
     /* ??? Save/restore.  */
 }
Index: hw/dp8393x.c
===================================================================
--- hw/dp8393x.c	(revision 7149)
+++ hw/dp8393x.c	(revision 7150)
@@ -156,6 +156,7 @@
     QEMUTimer *watchdog;
     int64_t wt_last_update;
     VLANClientState *vc;
+    int mmio_index;
 
     /* Registers */
     uint8_t cam[16][6];
@@ -858,12 +859,23 @@
     dp8393x_update_irq(s);
 }
 
+static void nic_cleanup(VLANClientState *vc)
+{
+    dp8393xState *s = vc->opaque;
+
+    cpu_unregister_io_memory(s->mmio_index);
+
+    qemu_del_timer(s->watchdog);
+    qemu_free_timer(s->watchdog);
+
+    qemu_free(s);
+}
+
 void dp83932_init(NICInfo *nd, target_phys_addr_t base, int it_shift,
                   qemu_irq irq, void* mem_opaque,
                   void (*memory_rw)(void *opaque, target_phys_addr_t addr, uint8_t *buf, int len, int is_write))
 {
     dp8393xState *s;
-    int io;
 
     qemu_check_nic_model(nd, "dp83932");
 
@@ -877,12 +889,12 @@
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
     s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
-                                 nic_receive, nic_can_receive, s);
+                                 nic_receive, nic_can_receive, nic_cleanup, s);
 
     qemu_format_nic_info_str(s->vc, nd->macaddr);
     qemu_register_reset(nic_reset, s);
     nic_reset(s);
 
-    io = cpu_register_io_memory(0, dp8393x_read, dp8393x_write, s);
-    cpu_register_physical_memory(base, 0x40 << it_shift, io);
+    s->mmio_index = cpu_register_io_memory(0, dp8393x_read, dp8393x_write, s);
+    cpu_register_physical_memory(base, 0x40 << it_shift, s->mmio_index);
 }

 ------------------------------------------------------------------------
r7149 | aliguori | 2009-04-17 12:11:03 -0500 (Fri, 17 Apr 2009) | 11 lines
Changed paths:
   M /trunk/hw/eepro100.c

Use NICInfo::model for eepro100 savevm ID string (Mark McLoughlin)

NICInfo::model will always be identical to the device name strings
we're currently passing to nic_init(). Just re-use NICInfo::model.

This makes it clear why we use vc->model for unregister_savevm()
in a subsequent patch.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/eepro100.c
===================================================================
--- hw/eepro100.c	(revision 7148)
+++ hw/eepro100.c	(revision 7149)
@@ -1710,15 +1710,14 @@
     qemu_put_buffer(f, s->configuration, sizeof(s->configuration));
 }
 
-static PCIDevice *nic_init(PCIBus * bus, NICInfo * nd,
-                     const char *name, uint32_t device)
+static PCIDevice *nic_init(PCIBus * bus, NICInfo * nd, uint32_t device)
 {
     PCIEEPRO100State *d;
     EEPRO100State *s;
 
     logout("\n");
 
-    d = (PCIEEPRO100State *) pci_register_device(bus, name,
+    d = (PCIEEPRO100State *) pci_register_device(bus, nd->model,
                                                  sizeof(PCIEEPRO100State), -1,
                                                  NULL, NULL);
 
@@ -1757,24 +1756,23 @@
 
     qemu_register_reset(nic_reset, s);
 
-    register_savevm(name, -1, 3, nic_save, nic_load, s);
+    register_savevm(s->vc->model, -1, 3, nic_save, nic_load, s);
     return (PCIDevice *)d;
 }
 
 PCIDevice *pci_i82551_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-    return nic_init(bus, nd, "i82551", i82551);
-    //~ uint8_t *pci_conf = d->dev.config;
+    return nic_init(bus, nd, i82551);
 }
 
 PCIDevice *pci_i82557b_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-    return nic_init(bus, nd, "i82557b", i82557B);
+    return nic_init(bus, nd, i82557B);
 }
 
 PCIDevice *pci_i82559er_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-    return nic_init(bus, nd, "i82559er", i82559ER);
+    return nic_init(bus, nd, i82559ER);
 }
 
 /* eof */

 ------------------------------------------------------------------------
r7148 | aliguori | 2009-04-17 12:10:59 -0500 (Fri, 17 Apr 2009) | 9 lines
Changed paths:
   M /trunk/hw/hw.h
   M /trunk/savevm.c

Add unregister_savevm() (Mark McLoughlin)

Currently there's no way to unregister a savevm callback, so
e.g. if a NIC is hot-unplugged and a savevm is issued, we'll
segfault.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: savevm.c
===================================================================
--- savevm.c	(revision 7147)
+++ savevm.c	(revision 7148)
@@ -647,6 +647,22 @@
                                 NULL, save_state, load_state, opaque);
 }
 
+void unregister_savevm(const char *idstr, void *opaque)
+{
+    SaveStateEntry **pse;
+
+    pse = &first_se;
+    while (*pse != NULL) {
+        if (strcmp((*pse)->idstr, idstr) == 0 && (*pse)->opaque == opaque) {
+            SaveStateEntry *next = (*pse)->next;
+            qemu_free(*pse);
+            *pse = next;
+            continue;
+        }
+        pse = &(*pse)->next;
+    }
+}
+
 #define QEMU_VM_FILE_MAGIC           0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
 #define QEMU_VM_FILE_VERSION         0x00000003
Index: hw/hw.h
===================================================================
--- hw/hw.h	(revision 7147)
+++ hw/hw.h	(revision 7148)
@@ -239,6 +239,8 @@
                          LoadStateHandler *load_state,
                          void *opaque);
 
+void unregister_savevm(const char *idstr, void *opaque);
+
 typedef void QEMUResetHandler(void *opaque);
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque);

 ------------------------------------------------------------------------
r7147 | aliguori | 2009-04-17 12:10:56 -0500 (Fri, 17 Apr 2009) | 8 lines
Changed paths:
   M /trunk/hw/e1000.c
   M /trunk/hw/mipsnet.c

Remove NICInfo from e1000 and mipsnet state (Mark McLoughlin)

NICInfo isn't used after initialization, so remove it from the driver
state structures.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/e1000.c
===================================================================
--- hw/e1000.c	(revision 7146)
+++ hw/e1000.c	(revision 7147)
@@ -75,7 +75,6 @@
 typedef struct E1000State_st {
     PCIDevice dev;
     VLANClientState *vc;
-    NICInfo *nd;
     int mmio_index;
 
     uint32_t mac_reg[0x8000];
@@ -1078,7 +1077,6 @@
     pci_register_io_region((PCIDevice *)d, 1, IOPORT_SIZE,
                            PCI_ADDRESS_SPACE_IO, ioport_map);
 
-    d->nd = nd;
     memmove(d->eeprom_data, e1000_eeprom_template,
         sizeof e1000_eeprom_template);
     for (i = 0; i < 3; i++)
@@ -1099,7 +1097,7 @@
                                  e1000_receive, e1000_can_receive, d);
     d->vc->link_status_changed = e1000_set_link_status;
 
-    qemu_format_nic_info_str(d->vc, d->nd->macaddr);
+    qemu_format_nic_info_str(d->vc, nd->macaddr);
 
     register_savevm(info_str, -1, 2, nic_save, nic_load, d);
     d->dev.unregister = pci_e1000_uninit;
Index: hw/mipsnet.c
===================================================================
--- hw/mipsnet.c	(revision 7146)
+++ hw/mipsnet.c	(revision 7147)
@@ -35,7 +35,6 @@
     uint8_t tx_buffer[MAX_ETH_FRAME_SIZE];
     qemu_irq irq;
     VLANClientState *vc;
-    NICInfo *nd;
 } MIPSnetState;
 
 static void mipsnet_reset(MIPSnetState *s)
@@ -248,7 +247,6 @@
     register_ioport_read(base, 36, 4, mipsnet_ioport_read, s);
 
     s->irq = irq;
-    s->nd = nd;
     if (nd && nd->vlan) {
         s->vc = qemu_new_vlan_client(nd->vlan, nd->model, nd->name,
                                      mipsnet_receive, mipsnet_can_receive, s);
@@ -256,7 +254,7 @@
         s->vc = NULL;
     }
 
-    qemu_format_nic_info_str(s->vc, s->nd->macaddr);
+    qemu_format_nic_info_str(s->vc, nd->macaddr);
 
     mipsnet_reset(s);
     register_savevm("mipsnet", 0, 0, mipsnet_save, mipsnet_load, s);

 ------------------------------------------------------------------------
r7146 | aliguori | 2009-04-17 12:10:51 -0500 (Fri, 17 Apr 2009) | 8 lines
Changed paths:
   M /trunk/net.c

Remove some useless malloc() checking (Mark McLoughlin)

Now that we abort() on malloc, neither qemu_find_vlan() nor
net_tap_fd_init() can fail.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7145)
+++ net.c	(revision 7146)
@@ -1015,8 +1015,6 @@
 	    return -1;
     }
     s = net_tap_fd_init(vlan, model, name, fd);
-    if (!s)
-        return -1;
     snprintf(s->vc->info_str, sizeof(s->vc->info_str),
              "ifname=%s,script=%s,downscript=%s",
              ifname, setup_script, down_script);
@@ -1596,10 +1594,7 @@
         vlan_id = strtol(buf, NULL, 0);
     }
     vlan = qemu_find_vlan(vlan_id);
-    if (!vlan) {
-        fprintf(stderr, "Could not create vlan %d\n", vlan_id);
-        return -1;
-    }
+
     if (get_param_value(buf, sizeof(buf), "name", p)) {
         name = strdup(buf);
     }
@@ -1707,9 +1702,8 @@
         if (get_param_value(buf, sizeof(buf), "fd", p) > 0) {
             fd = strtol(buf, NULL, 0);
             fcntl(fd, F_SETFL, O_NONBLOCK);
-            ret = -1;
-            if (net_tap_fd_init(vlan, device, name, fd))
-                ret = 0;
+            net_tap_fd_init(vlan, device, name, fd);
+            ret = 0;
         } else {
             if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) {
                 ifname[0] = '\0';
@@ -1825,10 +1819,6 @@
     VLANClientState *vc;
 
     vlan = qemu_find_vlan(vlan_id);
-    if (!vlan) {
-        monitor_printf(mon, "can't find vlan %d\n", vlan_id);
-        return;
-    }
 
    for(vc = vlan->first_client; vc != NULL; vc = vc->next)
         if (!strcmp(vc->name, device))

 ------------------------------------------------------------------------
r7145 | aliguori | 2009-04-17 12:10:47 -0500 (Fri, 17 Apr 2009) | 9 lines
Changed paths:
   M /trunk/hw/pci-hotplug.c

Don't fail PCI hotplug if no NIC model is supplied (Mark McLoughlin)

It's perfectly fine to not supply a NIC model when adding
a new NIC - we supply the default model to pci_nic_init()
and it uses that if one wasn't explicitly supplied.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/pci-hotplug.c
===================================================================
--- hw/pci-hotplug.c	(revision 7144)
+++ hw/pci-hotplug.c	(revision 7145)
@@ -37,10 +37,10 @@
 {
     int ret;
 
-    ret = net_client_init ("nic", opts);
-    if (ret < 0 || !nd_table[ret].model)
+    ret = net_client_init("nic", opts);
+    if (ret < 0)
         return NULL;
-    return pci_nic_init (pci_bus, &nd_table[ret], -1, "rtl8139");
+    return pci_nic_init(pci_bus, &nd_table[ret], -1, "rtl8139");
 }
 
 void drive_hot_add(Monitor *mon, const char *pci_addr, const char *opts)

 ------------------------------------------------------------------------
r7144 | aliguori | 2009-04-17 12:10:43 -0500 (Fri, 17 Apr 2009) | 7 lines
Changed paths:
   M /trunk/net.c

Fix error handling in net_client_init() (Mark McLoughlin)

We weren't freeing the name string everywhere.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7143)
+++ net.c	(revision 7144)
@@ -1610,7 +1610,8 @@
 
         if (idx == -1 || nb_nics >= MAX_NICS) {
             fprintf(stderr, "Too Many NICs\n");
-            return -1;
+            ret = -1;
+            goto out;
         }
         nd = &nd_table[idx];
         macaddr = nd->macaddr;
@@ -1624,7 +1625,8 @@
         if (get_param_value(buf, sizeof(buf), "macaddr", p)) {
             if (parse_macaddr(macaddr, buf) < 0) {
                 fprintf(stderr, "invalid syntax for ethernet address\n");
-                return -1;
+                ret = -1;
+                goto out;
             }
         }
         if (get_param_value(buf, sizeof(buf), "model", p)) {
@@ -1664,8 +1666,9 @@
         port = strtol(p, &devname, 10);
         devname++;
         if (port < 1 || port > 65535) {
-            fprintf(stderr, "vmchannel wrong port number\n"); 
-            return -1;
+            fprintf(stderr, "vmchannel wrong port number\n");
+            ret = -1;
+            goto out;
         }
         vmc = malloc(sizeof(struct VMChannel));
         snprintf(name, 20, "vmchannel%ld", port);
@@ -1673,7 +1676,8 @@
         if (!vmc->hd) {
             fprintf(stderr, "qemu: could not open vmchannel device"
                     "'%s'\n", devname);
-            return -1;
+            ret = -1;
+            goto out;
         }
         vmc->port = port;
         slirp_add_exec(3, vmc->hd, 4, port);
@@ -1687,7 +1691,8 @@
         char ifname[64];
         if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) {
             fprintf(stderr, "tap: no interface name\n");
-            return -1;
+            ret = -1;
+            goto out;
         }
         vlan->nb_host_devs++;
         ret = tap_win32_init(vlan, device, name, ifname);
@@ -1734,7 +1739,8 @@
             ret = net_socket_mcast_init(vlan, device, name, buf);
         } else {
             fprintf(stderr, "Unknown socket options: %s\n", p);
-            return -1;
+            ret = -1;
+            goto out;
         }
         vlan->nb_host_devs++;
     } else
@@ -1764,13 +1770,13 @@
 #endif
     {
         fprintf(stderr, "Unknown network device: %s\n", device);
-        if (name)
-            free(name);
-        return -1;
+        ret = -1;
+        goto out;
     }
     if (ret < 0) {
         fprintf(stderr, "Could not initialize device '%s'\n", device);
     }
+out:
     if (name)
         free(name);
     return ret;

 ------------------------------------------------------------------------
r7143 | aliguori | 2009-04-17 12:10:39 -0500 (Fri, 17 Apr 2009) | 8 lines
Changed paths:
   M /trunk/net.c

struct iovec is now universally available (Mark McLoughlin)

struct iovec is now defined in qemu-common.h if needed, so we don't need
the tap code to handle !defined(HAVE_IOVEC).

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: net.c
===================================================================
--- net.c	(revision 7142)
+++ net.c	(revision 7143)
@@ -702,7 +702,6 @@
     char down_script_arg[128];
 } TAPState;
 
-#ifdef HAVE_IOVEC
 static ssize_t tap_receive_iov(void *opaque, const struct iovec *iov,
                                int iovcnt)
 {
@@ -715,7 +714,6 @@
 
     return len;
 }
-#endif
 
 static void tap_receive(void *opaque, const uint8_t *buf, int size)
 {
@@ -762,9 +760,7 @@
     s = qemu_mallocz(sizeof(TAPState));
     s->fd = fd;
     s->vc = qemu_new_vlan_client(vlan, model, name, tap_receive, NULL, s);
-#ifdef HAVE_IOVEC
     s->vc->fd_readv = tap_receive_iov;
-#endif
     qemu_set_fd_handler(s->fd, tap_send, NULL, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
     return s;

 ------------------------------------------------------------------------
r7142 | aliguori | 2009-04-17 12:10:34 -0500 (Fri, 17 Apr 2009) | 7 lines
Changed paths:
   M /trunk/hw/virtio-net.c

Remove stray GSO code from virtio_net (Mark McLoughlin)

Obviously merged from kvm-userspace accidentally.

Signed-off-by: Mark McLoughlin 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/virtio-net.c
===================================================================
--- hw/virtio-net.c	(revision 7141)
+++ hw/virtio-net.c	(revision 7142)
@@ -338,11 +338,6 @@
     if (n->promisc)
         return 1;
 
-#ifdef TAP_VNET_HDR
-    if (tap_has_vnet_hdr(n->vc->vlan->first_client))
-        ptr += sizeof(struct virtio_net_hdr);
-#endif
-
     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
         int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))

 ------------------------------------------------------------------------
r7141 | aliguori | 2009-04-17 09:26:41 -0500 (Fri, 17 Apr 2009) | 11 lines
Changed paths:
   M /trunk/hw/cirrus_vga.c
   M /trunk/hw/vga.c
   M /trunk/hw/vga_int.h

vga: Cleanup dirty logging (Jan Kiszka)

In theory, there are no more quirks in the KVM slot management that
requires dirty log start/stop all over the place. We just have to start
the logging each time the mapping may have changed. This patch drops
vga_dirty_log_stop for both standard and cirrus VGA. It also reverts
#6851 as it was obviously a tribute to the old slot system.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/cirrus_vga.c
===================================================================
--- hw/cirrus_vga.c	(revision 7140)
+++ hw/cirrus_vga.c	(revision 7141)
@@ -2618,8 +2618,6 @@
 
 static void map_linear_vram(CirrusVGAState *s)
 {
-    vga_dirty_log_stop((VGAState *)s);
-
     if (!s->map_addr && s->lfb_addr && s->lfb_end) {
         s->map_addr = s->lfb_addr;
         s->map_end = s->lfb_end;
@@ -2631,16 +2629,11 @@
 
     s->lfb_vram_mapped = 0;
 
-    cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000,
-                                (s->vram_offset + s->cirrus_bank_base[0]) | IO_MEM_UNASSIGNED);
-    cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000,
-                                (s->vram_offset + s->cirrus_bank_base[1]) | IO_MEM_UNASSIGNED);
     if (!(s->cirrus_srcptr != s->cirrus_srcptr_end)
         && !((s->sr[0x07] & 0x01) == 0)
         && !((s->gr[0x0B] & 0x14) == 0x14)
         && !(s->gr[0x0B] & 0x02)) {
 
-        vga_dirty_log_stop((VGAState *)s);
         cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000,
                                     (s->vram_offset + s->cirrus_bank_base[0]) | IO_MEM_RAM);
         cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000,
@@ -2658,15 +2651,11 @@
 
 static void unmap_linear_vram(CirrusVGAState *s)
 {
-    vga_dirty_log_stop((VGAState *)s);
-
     if (s->map_addr && s->lfb_addr && s->lfb_end)
         s->map_addr = s->map_end = 0;
 
     cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000,
                                  s->vga_io_memory);
-
-    vga_dirty_log_start((VGAState *)s);
 }
 
 /* Compute the memory access functions */
@@ -3313,8 +3302,6 @@
 {
     CirrusVGAState *s = &((PCICirrusVGAState *)d)->cirrus_vga;
 
-    vga_dirty_log_stop((VGAState *)s);
-
     /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
 				 s->cirrus_linear_io_addr);
@@ -3346,14 +3333,10 @@
     PCICirrusVGAState *pvs = container_of(d, PCICirrusVGAState, dev);
     CirrusVGAState *s = &pvs->cirrus_vga;
 
-    vga_dirty_log_stop((VGAState *)s);
-
     pci_default_write_config(d, address, val, len);
     if (s->map_addr && pvs->dev.io_regions[0].addr == -1)
         s->map_addr = 0;
     cirrus_update_memory_access(s);
-
-    vga_dirty_log_start((VGAState *)s);
 }
 
 void pci_cirrus_vga_init(PCIBus *bus, int vga_ram_size)
Index: hw/vga_int.h
===================================================================
--- hw/vga_int.h	(revision 7140)
+++ hw/vga_int.h	(revision 7141)
@@ -196,7 +196,6 @@
 void vga_reset(void *s);
 
 void vga_dirty_log_start(VGAState *s);
-void vga_dirty_log_stop(VGAState *s);
 
 uint32_t vga_mem_readb(void *opaque, target_phys_addr_t addr);
 void vga_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val);
Index: hw/vga.c
===================================================================
--- hw/vga.c	(revision 7140)
+++ hw/vga.c	(revision 7141)
@@ -1279,8 +1279,6 @@
     vga_draw_glyph8_func *vga_draw_glyph8;
     vga_draw_glyph9_func *vga_draw_glyph9;
 
-    vga_dirty_log_stop(s);
-
     /* compute font data address (in plane 2) */
     v = s->sr[3];
     offset = (((v >> 4) & 1) | ((v << 1) & 6)) * 8192 * 4 + 2;
@@ -1579,7 +1577,6 @@
         cpu_physical_sync_dirty_bitmap(isa_mem_base + 0xa0000, 0xa8000);
         cpu_physical_sync_dirty_bitmap(isa_mem_base + 0xa8000, 0xb0000);
     }
-    vga_dirty_log_start(s);
 }
 
 /*
@@ -1810,7 +1807,6 @@
         return;
     if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
         return;
-    vga_dirty_log_stop(s);
 
     s->rgb_to_pixel =
         rgb_to_pixel_dup_table[get_depth_index(s->ds)];
@@ -2238,17 +2234,6 @@
     }
 }
 
-void vga_dirty_log_stop(VGAState *s)
-{
-    if (kvm_enabled() && s->map_addr)
-        kvm_log_stop(s->map_addr, s->map_end - s->map_addr);
-
-    if (kvm_enabled() && s->lfb_vram_mapped) {
-        kvm_log_stop(isa_mem_base + 0xa0000, 0x8000);
-        kvm_log_stop(isa_mem_base + 0xa8000, 0x8000);
-    }
-}
-
 static void vga_map(PCIDevice *pci_dev, int region_num,
                     uint32_t addr, uint32_t size, int type)
 {
@@ -2489,11 +2474,9 @@
     PCIVGAState *pvs = container_of(d, PCIVGAState, dev);
     VGAState *s = &pvs->vga_state;
 
-    vga_dirty_log_stop(s);
     pci_default_write_config(d, address, val, len);
     if (s->map_addr && pvs->dev.io_regions[0].addr == -1)
         s->map_addr = 0;
-    vga_dirty_log_start(s);
 }
 
 int pci_vga_init(PCIBus *bus, int vga_ram_size,

 ------------------------------------------------------------------------
r7140 | aliguori | 2009-04-17 09:26:37 -0500 (Fri, 17 Apr 2009) | 9 lines
Changed paths:
   M /trunk/hw/vga.c

vga: Fix inconsistent tracking of map_addr (Jan Kiszka)

Only track video RAM mapping in map_addr and use the correct RAM size.
Furthermore, make sure the reset the address in case unmapping took
place via PCI reconfiguration.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/vga.c
===================================================================
--- hw/vga.c	(revision 7139)
+++ hw/vga.c	(revision 7140)
@@ -2258,12 +2258,10 @@
         cpu_register_physical_memory(addr, s->bios_size, s->bios_offset);
     } else {
         cpu_register_physical_memory(addr, s->vram_size, s->vram_offset);
+        s->map_addr = addr;
+        s->map_end = addr + s->vram_size;
+        vga_dirty_log_start(s);
     }
-
-    s->map_addr = addr;
-    s->map_end = addr + VGA_RAM_SIZE;
-
-    vga_dirty_log_start(s);
 }
 
 void vga_common_init(VGAState *s, int vga_ram_size)
@@ -2493,6 +2491,8 @@
 
     vga_dirty_log_stop(s);
     pci_default_write_config(d, address, val, len);
+    if (s->map_addr && pvs->dev.io_regions[0].addr == -1)
+        s->map_addr = 0;
     vga_dirty_log_start(s);
 }
 

 ------------------------------------------------------------------------
r7139 | aliguori | 2009-04-17 09:26:33 -0500 (Fri, 17 Apr 2009) | 16 lines
Changed paths:
   M /trunk/kvm-all.c

kvm: improve handling of overlapping slots (Jan Kiszka)

This reworks the slot management to handle more patterns of
cpu_register_physical_memory*, finally allowing to reset KVM guests (so
far address remapping on reset broke the slot management).

We could actually handle all possible ones without failing, but a KVM
kernel bug in older versions would force us to track all previous
fragmentations and maintain them (as that bug prevents registering
larger slots that overlap also deleted ones). To remain backward
compatible but avoid overly complicated workarounds, we apply a simpler
workaround that covers all currently used patterns.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: kvm-all.c
===================================================================
--- kvm-all.c	(revision 7138)
+++ kvm-all.c	(revision 7139)
@@ -98,19 +98,31 @@
     return NULL;
 }
 
-static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
+/*
+ * Find overlapping slot with lowest start address
+ */
+static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
+                                            target_phys_addr_t start_addr,
+                                            target_phys_addr_t end_addr)
 {
+    KVMSlot *found = NULL;
     int i;
 
     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
         KVMSlot *mem = &s->slots[i];
 
-        if (start_addr >= mem->start_addr &&
-            start_addr < (mem->start_addr + mem->memory_size))
-            return mem;
+        if (mem->memory_size == 0 ||
+            (found && found->start_addr < mem->start_addr)) {
+            continue;
+        }
+
+        if (end_addr > mem->start_addr &&
+            start_addr < mem->start_addr + mem->memory_size) {
+            found = mem;
+        }
     }
 
-    return NULL;
+    return found;
 }
 
 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
@@ -567,7 +579,8 @@
 {
     KVMState *s = kvm_state;
     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
-    KVMSlot *mem;
+    KVMSlot *mem, old;
+    int err;
 
     if (start_addr & ~TARGET_PAGE_MASK) {
         fprintf(stderr, "Only page-aligned memory slots supported\n");
@@ -577,55 +590,100 @@
     /* KVM does not support read-only slots */
     phys_offset &= ~IO_MEM_ROM;
 
-    mem = kvm_lookup_slot(s, start_addr);
-    if (mem) {
-        if (flags >= IO_MEM_UNASSIGNED) {
-            mem->memory_size = 0;
-            mem->start_addr = start_addr;
-            mem->phys_offset = 0;
+    while (1) {
+        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        if (!mem) {
+            break;
+        }
+
+        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
+            (start_addr + size <= mem->start_addr + mem->memory_size) &&
+            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
+            /* The new slot fits into the existing one and comes with
+             * identical parameters - nothing to be done. */
+            return;
+        }
+
+        old = *mem;
+
+        /* unregister the overlapping slot */
+        mem->memory_size = 0;
+        err = kvm_set_user_memory_region(s, mem);
+        if (err) {
+            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
+                    __func__, strerror(-err));
+            abort();
+        }
+
+        /* Workaround for older KVM versions: we can't join slots, even not by
+         * unregistering the previous ones and then registering the larger
+         * slot. We have to maintain the existing fragmentation. Sigh.
+         *
+         * This workaround assumes that the new slot starts at the same
+         * address as the first existing one. If not or if some overlapping
+         * slot comes around later, we will fail (not seen in practice so far)
+         * - and actually require a recent KVM version. */
+        if (old.start_addr == start_addr && old.memory_size < size &&
+            flags < IO_MEM_UNASSIGNED) {
+            mem = kvm_alloc_slot(s);
+            mem->memory_size = old.memory_size;
+            mem->start_addr = old.start_addr;
+            mem->phys_offset = old.phys_offset;
             mem->flags = 0;
 
-            kvm_set_user_memory_region(s, mem);
-        } else if (start_addr >= mem->start_addr &&
-                   (start_addr + size) <= (mem->start_addr +
-                                           mem->memory_size)) {
-            KVMSlot slot;
-            target_phys_addr_t mem_start;
-            ram_addr_t mem_size, mem_offset;
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
+                        strerror(-err));
+                abort();
+            }
 
-            /* Not splitting */
-            if ((phys_offset - (start_addr - mem->start_addr)) == 
-                mem->phys_offset)
-                return;
+            start_addr += old.memory_size;
+            phys_offset += old.memory_size;
+            size -= old.memory_size;
+            continue;
+        }
 
-            /* unregister whole slot */
-            memcpy(&slot, mem, sizeof(slot));
-            mem->memory_size = 0;
-            kvm_set_user_memory_region(s, mem);
+        /* register prefix slot */
+        if (old.start_addr < start_addr) {
+            mem = kvm_alloc_slot(s);
+            mem->memory_size = start_addr - old.start_addr;
+            mem->start_addr = old.start_addr;
+            mem->phys_offset = old.phys_offset;
+            mem->flags = 0;
 
-            /* register prefix slot */
-            mem_start = slot.start_addr;
-            mem_size = start_addr - slot.start_addr;
-            mem_offset = slot.phys_offset;
-            if (mem_size)
-                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering prefix slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+        }
 
-            /* register new slot */
-            kvm_set_phys_mem(start_addr, size, phys_offset);
+        /* register suffix slot */
+        if (old.start_addr + old.memory_size > start_addr + size) {
+            ram_addr_t size_delta;
 
-            /* register suffix slot */
-            mem_start = start_addr + size;
-            mem_offset += mem_size + size;
-            mem_size = slot.memory_size - mem_size - size;
-            if (mem_size)
-                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
+            mem = kvm_alloc_slot(s);
+            mem->start_addr = start_addr + size;
+            size_delta = mem->start_addr - old.start_addr;
+            mem->memory_size = old.memory_size - size_delta;
+            mem->phys_offset = old.phys_offset + size_delta;
+            mem->flags = 0;
 
-            return;
-        } else {
-            printf("Registering overlapping slot\n");
-            abort();
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering suffix slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
         }
     }
+
+    /* in case the KVM bug workaround already "consumed" the new slot */
+    if (!size)
+        return;
+
     /* KVM does not need to know about this memory */
     if (flags >= IO_MEM_UNASSIGNED)
         return;
@@ -636,8 +694,12 @@
     mem->phys_offset = phys_offset;
     mem->flags = 0;
 
-    kvm_set_user_memory_region(s, mem);
-    /* FIXME deal with errors */
+    err = kvm_set_user_memory_region(s, mem);
+    if (err) {
+        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+                strerror(-err));
+        abort();
+    }
 }
 
 int kvm_ioctl(KVMState *s, int type, ...)

 ------------------------------------------------------------------------
r7138 | aliguori | 2009-04-17 09:26:29 -0500 (Fri, 17 Apr 2009) | 14 lines
Changed paths:
   M /trunk/kvm-all.c
   M /trunk/kvm.h

kvm: Add sanity checks to slot management (Jan Kiszka)

Fail loudly if we run out of memory slot.

Make sure that dirty log start/stop works with consistent memory regions
by reporting invalid parameters. This reveals several inconsistencies in
the vga code, patch to fix them follows later in this series.

And, for simplicity reasons, also catch and report unaligned memory
regions passed to kvm_set_phys_mem (KVM works on page basis).

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: kvm-all.c
===================================================================
--- kvm-all.c	(revision 7137)
+++ kvm-all.c	(revision 7138)
@@ -76,6 +76,25 @@
             return &s->slots[i];
     }
 
+    fprintf(stderr, "%s: no free slot available\n", __func__);
+    abort();
+}
+
+static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
+                                         target_phys_addr_t start_addr,
+                                         target_phys_addr_t end_addr)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
+        KVMSlot *mem = &s->slots[i];
+
+        if (start_addr == mem->start_addr &&
+            end_addr == mem->start_addr + mem->memory_size) {
+            return mem;
+        }
+    }
+
     return NULL;
 }
 
@@ -163,14 +182,16 @@
 /*
  * dirty pages logging control
  */
-static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
-                                      unsigned flags,
+static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
+                                      ram_addr_t size, unsigned flags,
                                       unsigned mask)
 {
     KVMState *s = kvm_state;
-    KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
+    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
     if (mem == NULL)  {
-            dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
+            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
+                    TARGET_FMT_plx "\n", __func__, phys_addr,
+                    phys_addr + size - 1);
             return -EINVAL;
     }
 
@@ -184,16 +205,16 @@
     return kvm_set_user_memory_region(s, mem);
 }
 
-int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
+int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
 {
-        return kvm_dirty_pages_log_change(phys_addr, end_addr,
+        return kvm_dirty_pages_log_change(phys_addr, size,
                                           KVM_MEM_LOG_DIRTY_PAGES,
                                           KVM_MEM_LOG_DIRTY_PAGES);
 }
 
-int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
+int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
 {
-        return kvm_dirty_pages_log_change(phys_addr, end_addr,
+        return kvm_dirty_pages_log_change(phys_addr, size,
                                           0,
                                           KVM_MEM_LOG_DIRTY_PAGES);
 }
@@ -203,21 +224,24 @@
  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
  * This means all bits are set to dirty.
  *
- * @start_add: start of logged region. This is what we use to search the memslot
+ * @start_add: start of logged region.
  * @end_addr: end of logged region.
  */
-void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
+void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+                                    target_phys_addr_t end_addr)
 {
     KVMState *s = kvm_state;
     KVMDirtyLog d;
-    KVMSlot *mem = kvm_lookup_slot(s, start_addr);
+    KVMSlot *mem = kvm_lookup_matching_slot(s, start_addr, end_addr);
     unsigned long alloc_size;
     ram_addr_t addr;
     target_phys_addr_t phys_addr = start_addr;
 
-    dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
+    dprintf("sync addr: " TARGET_FMT_lx " into %lx\n", start_addr,
+            mem->phys_offset);
     if (mem == NULL) {
-            fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
+            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
+                    TARGET_FMT_plx "\n", __func__, phys_addr, end_addr - 1);
             return;
     }
 
@@ -545,6 +569,11 @@
     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
     KVMSlot *mem;
 
+    if (start_addr & ~TARGET_PAGE_MASK) {
+        fprintf(stderr, "Only page-aligned memory slots supported\n");
+        abort();
+    }
+
     /* KVM does not support read-only slots */
     phys_offset &= ~IO_MEM_ROM;
 
Index: kvm.h
===================================================================
--- kvm.h	(revision 7137)
+++ kvm.h	(revision 7138)
@@ -40,10 +40,11 @@
                       ram_addr_t size,
                       ram_addr_t phys_offset);
 
-void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr);
+void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+                                    target_phys_addr_t end_addr);
 
-int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len);
-int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len);
+int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size);
+int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size);
 
 int kvm_has_sync_mmu(void);
 

 ------------------------------------------------------------------------
r7137 | aliguori | 2009-04-17 09:26:25 -0500 (Fri, 17 Apr 2009) | 10 lines
Changed paths:
   M /trunk/kvm-all.c

kvm: Cleanup unmap condition in kvm_set_phys_mem (Jan Kiszka)

Testing for TLB_MMIO on unmap makes no sense as A) that flag belongs to
CPUTLBEntry and not to io_memory slots or physical addresses and B) we
already use a different condition before mapping. So make this test
consistent.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: kvm-all.c
===================================================================
--- kvm-all.c	(revision 7136)
+++ kvm-all.c	(revision 7137)
@@ -550,7 +550,7 @@
 
     mem = kvm_lookup_slot(s, start_addr);
     if (mem) {
-        if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
+        if (flags >= IO_MEM_UNASSIGNED) {
             mem->memory_size = 0;
             mem->start_addr = start_addr;
             mem->phys_offset = 0;

 ------------------------------------------------------------------------
r7136 | aliguori | 2009-04-17 09:26:21 -0500 (Fri, 17 Apr 2009) | 5 lines
Changed paths:
   M /trunk/hw/acpi.c

kvm: Apply SMM-already-initialized workaround on reset (Jan Kiszka)

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: hw/acpi.c
===================================================================
--- hw/acpi.c	(revision 7135)
+++ hw/acpi.c	(revision 7136)
@@ -483,13 +483,18 @@
 
 static void piix4_reset(void *opaque)
 {
-	PIIX4PMState *s = opaque;
-	uint8_t *pci_conf = s->dev.config;
+    PIIX4PMState *s = opaque;
+    uint8_t *pci_conf = s->dev.config;
 
-	pci_conf[0x58] = 0;
-	pci_conf[0x59] = 0;
-	pci_conf[0x5a] = 0;
-	pci_conf[0x5b] = 0;
+    pci_conf[0x58] = 0;
+    pci_conf[0x59] = 0;
+    pci_conf[0x5a] = 0;
+    pci_conf[0x5b] = 0;
+
+    if (kvm_enabled()) {
+        /* Mark SMM as already inited (until KVM supports SMM). */
+        pci_conf[0x5B] = 0x02;
+    }
 }
 
 i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,

 ------------------------------------------------------------------------
r7135 | aliguori | 2009-04-17 09:26:17 -0500 (Fri, 17 Apr 2009) | 7 lines
Changed paths:
   M /trunk/vl.c

kvm: Sync CPU state on reset (Jan Kiszka)

Make sure KVM gets informed about the reset CPU state.

Signed-off-by: Jan Kiszka 
Signed-off-by: Anthony Liguori 

 ------------------------------------------------------------------------

Index: vl.c
===================================================================
--- vl.c	(revision 7134)
+++ vl.c	(revision 7135)
@@ -3623,6 +3623,8 @@
     for(re = first_reset_entry; re != NULL; re = re->next) {
         re->func(re->opaque);
     }
+    if (kvm_enabled())
+        kvm_sync_vcpus();
 }
 
 void qemu_system_reset_request(void)

 ------------------------------------------------------------------------
r7134 | aurel32 | 2009-04-17 08:50:32 -0500 (Fri, 17 Apr 2009) | 7 lines
Changed paths:
   M /trunk/configure
   M /trunk/linux-user/syscall.c

linux-user: fix inotify syscalls

Configure test was broken, so the breakage of the #ifdef'd
code was not noticed.

Signed-off-by: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7133)
+++ linux-user/syscall.c	(revision 7134)
@@ -486,6 +486,7 @@
 #endif /* CONFIG_ATFILE */
 
 #ifdef CONFIG_INOTIFY
+#include 
 
 #if defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)
 static int sys_inotify_init(void)
@@ -502,7 +503,7 @@
 #if defined(TARGET_NR_inotify_rm_watch) && defined(__NR_inotify_rm_watch)
 static int sys_inotify_rm_watch(int fd, int32_t wd)
 {
-  return (inotify_rm_watch(fd,pathname, wd));
+  return (inotify_rm_watch(fd, wd));
 }
 #endif
 #else
Index: configure
===================================================================
--- configure	(revision 7133)
+++ configure	(revision 7134)
@@ -1195,7 +1195,7 @@
 main(void)
 {
 	/* try to start inotify */
-	return inotify_init(void);
+	return inotify_init();
 }
 EOF
   if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then

 ------------------------------------------------------------------------
r7133 | aurel32 | 2009-04-17 08:17:26 -0500 (Fri, 17 Apr 2009) | 3 lines
Changed paths:
   M /trunk/target-mips/translate.c

Revert "target-mips: fix call to check_*() functions"

This reverts commit r7127, r7132 is a better fix for that.
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7132)
+++ target-mips/translate.c	(revision 7133)
@@ -6378,13 +6378,10 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
-            }
-
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
                 gen_cmpabs_s(func-48, fp0, fp1, cc);
                 opn = condnames_abs[func-48];
             } else {
@@ -6743,17 +6740,16 @@
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
-            if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
-            }
-            check_cp1_registers(ctx, fs | ft);
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
+                check_cp1_registers(ctx, fs | ft);
                 gen_cmpabs_d(func-48, fp0, fp1, cc);
                 opn = condnames_abs[func-48];
             } else {
+                check_cp1_registers(ctx, fs | ft);
                 gen_cmp_d(func-48, fp0, fp1, cc);
                 opn = condnames[func-48];
             }
@@ -7222,22 +7218,6 @@
     int store = 0;
     TCGv t0 = tcg_temp_new();
 
-    switch (opc) {
-    case OPC_LWXC1:
-    case OPC_SWXC1:
-        check_cop1x(ctx);
-        break;
-    case OPC_LDXC1:
-    case OPC_SDXC1:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd);
-        break;
-    case OPC_LUXC1:
-    case OPC_SUXC1:
-        check_cp1_64bitmode(ctx);
-        break;
-    }
-
     if (base == 0) {
         gen_load_gpr(t0, index);
     } else if (index == 0) {
@@ -7251,6 +7231,7 @@
     save_cpu_state(ctx, 0);
     switch (opc) {
     case OPC_LWXC1:
+        check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -7262,6 +7243,8 @@
         opn = "lwxc1";
         break;
     case OPC_LDXC1:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -7272,6 +7255,7 @@
         opn = "ldxc1";
         break;
     case OPC_LUXC1:
+        check_cp1_64bitmode(ctx);
         tcg_gen_andi_tl(t0, t0, ~0x7);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7283,6 +7267,7 @@
         opn = "luxc1";
         break;
     case OPC_SWXC1:
+        check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv t1 = tcg_temp_new();
@@ -7297,6 +7282,8 @@
         store = 1;
         break;
     case OPC_SDXC1:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fs);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -7308,6 +7295,7 @@
         store = 1;
         break;
     case OPC_SUXC1:
+        check_cp1_64bitmode(ctx);
         tcg_gen_andi_tl(t0, t0, ~0x7);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();

 ------------------------------------------------------------------------
r7132 | aurel32 | 2009-04-17 08:11:42 -0500 (Fri, 17 Apr 2009) | 6 lines
Changed paths:
   M /trunk/target-mips/translate.c

target-mips: simplify exception generation

There is no need to exit the tb after a call to helper_raise_exception
as it already calls cpu_loop_exit().

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7131)
+++ target-mips/translate.c	(revision 7132)
@@ -793,8 +793,6 @@
     gen_helper_raise_exception_err(texcp, terr);
     tcg_temp_free_i32(terr);
     tcg_temp_free_i32(texcp);
-    gen_helper_interrupt_restart();
-    tcg_gen_exit_tb(0);
 }
 
 static inline void
@@ -802,8 +800,6 @@
 {
     save_cpu_state(ctx, 1);
     gen_helper_0i(raise_exception, excp);
-    gen_helper_interrupt_restart();
-    tcg_gen_exit_tb(0);
 }
 
 /* Addresses computation */

 ------------------------------------------------------------------------
r7131 | pbrook | 2009-04-16 10:17:02 -0500 (Thu, 16 Apr 2009) | 4 lines
Changed paths:
   M /trunk/linux-user/qemu.h
   M /trunk/linux-user/signal.c
   M /trunk/linux-user/syscall.c

Translate signal values in exit status.

Signed-off-by: Paul Brook 

 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7130)
+++ linux-user/syscall.c	(revision 7131)
@@ -3643,6 +3643,20 @@
 }
 #endif
 
+/* Map host to target signal numbers for the wait family of syscalls.
+   Assume all other status bits are the same.  */
+static int host_to_target_waitstatus(int status)
+{
+    if (WIFSIGNALED(status)) {
+        return host_to_target_signal(WTERMSIG(status)) | (status & ~0x7f);
+    }
+    if (WIFSTOPPED(status)) {
+        return (host_to_target_signal(WSTOPSIG(status)) << 8)
+               | (status & 0xff);
+    }
+    return status;
+}
+
 int get_osversion(void)
 {
     static int osversion;
@@ -3786,7 +3800,7 @@
             int status;
             ret = get_errno(waitpid(arg1, &status, arg3));
             if (!is_error(ret) && arg2
-                && put_user_s32(status, arg2))
+                && put_user_s32(host_to_target_waitstatus(status), arg2))
                 goto efault;
         }
         break;
@@ -5136,6 +5150,7 @@
             ret = get_errno(wait4(arg1, &status, arg3, rusage_ptr));
             if (!is_error(ret)) {
                 if (status_ptr) {
+                    status = host_to_target_waitstatus(status);
                     if (put_user_s32(status, status_ptr))
                         goto efault;
                 }
Index: linux-user/qemu.h
===================================================================
--- linux-user/qemu.h	(revision 7130)
+++ linux-user/qemu.h	(revision 7131)
@@ -201,6 +201,7 @@
 void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
 void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
 int target_to_host_signal(int sig);
+int host_to_target_signal(int sig);
 long do_sigreturn(CPUState *env);
 long do_rt_sigreturn(CPUState *env);
 abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c	(revision 7130)
+++ linux-user/signal.c	(revision 7131)
@@ -102,7 +102,7 @@
             : on_sig_stack(sp) ? SS_ONSTACK : 0);
 }
 
-static inline int host_to_target_signal(int sig)
+int host_to_target_signal(int sig)
 {
     if (sig > 64)
         return sig;

 ------------------------------------------------------------------------
r7130 | aurel32 | 2009-04-16 09:17:14 -0500 (Thu, 16 Apr 2009) | 11 lines
Changed paths:
   M /trunk/linux-user/syscall.c

linux-user: fix getcwd syscall

The patch called "prefer glibc over direct syscalls" (commit 7118) has
replaced the getcwd syscall with a call to the glibc. With this change,
the syscall is returning -1 in error case and 0 otherwise.
This is problematic as the sys_getcwd syscall should return the number
of bytes written to the buffer including the '\0'.

Signed-off-by: Arnaud Patard 
Acked-By: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7129)
+++ linux-user/syscall.c	(revision 7130)
@@ -293,7 +293,7 @@
       /* getcwd() sets errno */
       return (-1);
   }
-  return (0);
+  return strlen(buf)+1;
 }
 
 #ifdef CONFIG_ATFILE

 ------------------------------------------------------------------------
r7129 | aurel32 | 2009-04-16 07:57:58 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/target-ppc/helper.h

target-ppc: mark a few helpers TCG_CALL_CONST and/or TCG_CALL_PURE

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-ppc/helper.h
===================================================================
--- target-ppc/helper.h	(revision 7128)
+++ target-ppc/helper.h	(revision 7129)
@@ -31,23 +31,23 @@
 DEF_HELPER_4(lscbx, tl, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_2(mulhd, i64, i64, i64)
-DEF_HELPER_2(mulhdu, i64, i64, i64)
+DEF_HELPER_FLAGS_2(mulhd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(mulhdu, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
 DEF_HELPER_2(mulldo, i64, i64, i64)
 #endif
 
-DEF_HELPER_1(cntlzw, tl, tl)
-DEF_HELPER_1(popcntb, tl, tl)
+DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
 DEF_HELPER_2(sraw, tl, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_1(cntlzd, tl, tl)
-DEF_HELPER_1(popcntb_64, tl, tl)
+DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(popcntb_64, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
 DEF_HELPER_2(srad, tl, tl, tl)
 #endif
 
-DEF_HELPER_1(cntlsw32, i32, i32)
-DEF_HELPER_1(cntlzw32, i32, i32)
-DEF_HELPER_2(brinc, tl, tl, tl)
+DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(cntlzw32, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_2(brinc, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
 
 DEF_HELPER_0(float_check_status, void)
 #ifdef CONFIG_SOFTFLOAT
@@ -335,22 +335,22 @@
 DEF_HELPER_1(6xx_tlbi, void, tl)
 DEF_HELPER_1(74xx_tlbd, void, tl)
 DEF_HELPER_1(74xx_tlbi, void, tl)
-DEF_HELPER_0(tlbia, void)
-DEF_HELPER_1(tlbie, void, tl)
+DEF_HELPER_FLAGS_0(tlbia, TCG_CALL_CONST, void)
+DEF_HELPER_FLAGS_1(tlbie, TCG_CALL_CONST, void, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_1(load_slb, tl, tl)
-DEF_HELPER_2(store_slb, void, tl, tl)
-DEF_HELPER_0(slbia, void)
-DEF_HELPER_1(slbie, void, tl)
+DEF_HELPER_FLAGS_1(load_slb, TCG_CALL_CONST, tl, tl)
+DEF_HELPER_FLAGS_2(store_slb, TCG_CALL_CONST, void, tl, tl)
+DEF_HELPER_FLAGS_0(slbia, TCG_CALL_CONST, void)
+DEF_HELPER_FLAGS_1(slbie, TCG_CALL_CONST, void, tl)
 #endif
-DEF_HELPER_1(load_sr, tl, tl);
-DEF_HELPER_2(store_sr, void, tl, tl)
+DEF_HELPER_FLAGS_1(load_sr, TCG_CALL_CONST, tl, tl);
+DEF_HELPER_FLAGS_2(store_sr, TCG_CALL_CONST, void, tl, tl)
 
-DEF_HELPER_1(602_mfrom, tl, tl)
+DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
 #endif
 
 DEF_HELPER_3(dlmzb, tl, tl, tl, i32)
-DEF_HELPER_1(clcs, tl, i32)
+DEF_HELPER_FLAGS_1(clcs, TCG_CALL_CONST | TCG_CALL_PURE, tl, i32)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_1(rac, tl, tl)
 #endif

 ------------------------------------------------------------------------
r7128 | aurel32 | 2009-04-16 07:57:50 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/target-mips/translate.c

target-mips: fix revision r7126

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7127)
+++ target-mips/translate.c	(revision 7128)
@@ -7295,7 +7295,7 @@
             tcg_gen_extu_i32_tl(t1, fp0);
             tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
             tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(t1);
+            tcg_temp_free(t1);
         }
         opn = "swxc1";
         store = 1;

 ------------------------------------------------------------------------
r7127 | aurel32 | 2009-04-16 06:51:19 -0500 (Thu, 16 Apr 2009) | 7 lines
Changed paths:
   M /trunk/target-mips/translate.c

target-mips: fix call to check_*() functions

check_*() functions may in fine call generate_exception(), which ends
by a call to tcg_gen_exit_tb(). As a consequence, we have to make sure
that no TCG temp variables are crossing a check_*() function.

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7126)
+++ target-mips/translate.c	(revision 7127)
@@ -6382,10 +6382,13 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
+            if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
+            }
+
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
                 gen_cmpabs_s(func-48, fp0, fp1, cc);
                 opn = condnames_abs[func-48];
             } else {
@@ -6744,16 +6747,17 @@
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
+            if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
+            }
+            check_cp1_registers(ctx, fs | ft);
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
-                check_cp1_registers(ctx, fs | ft);
                 gen_cmpabs_d(func-48, fp0, fp1, cc);
                 opn = condnames_abs[func-48];
             } else {
-                check_cp1_registers(ctx, fs | ft);
                 gen_cmp_d(func-48, fp0, fp1, cc);
                 opn = condnames[func-48];
             }
@@ -7222,6 +7226,22 @@
     int store = 0;
     TCGv t0 = tcg_temp_new();
 
+    switch (opc) {
+    case OPC_LWXC1:
+    case OPC_SWXC1:
+        check_cop1x(ctx);
+        break;
+    case OPC_LDXC1:
+    case OPC_SDXC1:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd);
+        break;
+    case OPC_LUXC1:
+    case OPC_SUXC1:
+        check_cp1_64bitmode(ctx);
+        break;
+    }
+
     if (base == 0) {
         gen_load_gpr(t0, index);
     } else if (index == 0) {
@@ -7235,7 +7255,6 @@
     save_cpu_state(ctx, 0);
     switch (opc) {
     case OPC_LWXC1:
-        check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -7247,8 +7266,6 @@
         opn = "lwxc1";
         break;
     case OPC_LDXC1:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -7259,7 +7276,6 @@
         opn = "ldxc1";
         break;
     case OPC_LUXC1:
-        check_cp1_64bitmode(ctx);
         tcg_gen_andi_tl(t0, t0, ~0x7);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7271,7 +7287,6 @@
         opn = "luxc1";
         break;
     case OPC_SWXC1:
-        check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv t1 = tcg_temp_new();
@@ -7286,8 +7301,6 @@
         store = 1;
         break;
     case OPC_SDXC1:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fs);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -7299,7 +7312,6 @@
         store = 1;
         break;
     case OPC_SUXC1:
-        check_cp1_64bitmode(ctx);
         tcg_gen_andi_tl(t0, t0, ~0x7);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();

 ------------------------------------------------------------------------
r7126 | aurel32 | 2009-04-16 06:51:11 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/target-mips/translate.c

target-mips: optimize gen_flt3_ldst()

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7125)
+++ target-mips/translate.c	(revision 7126)
@@ -7221,7 +7221,6 @@
     const char *opn = "extended float load/store";
     int store = 0;
     TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
 
     if (base == 0) {
         gen_load_gpr(t0, index);
@@ -7240,8 +7239,8 @@
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
-            tcg_gen_trunc_tl_i32(fp0, t1);
+            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+            tcg_gen_trunc_tl_i32(fp0, t0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7275,11 +7274,13 @@
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv t1 = tcg_temp_new();
 
             gen_load_fpr32(fp0, fs);
             tcg_gen_extu_i32_tl(t1, fp0);
             tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
             tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(t1);
         }
         opn = "swxc1";
         store = 1;
@@ -7312,7 +7313,6 @@
         break;
     }
     tcg_temp_free(t0);
-    tcg_temp_free(t1);
     MIPS_DEBUG("%s %s, %s(%s)", opn, fregnames[store ? fs : fd],
                regnames[index], regnames[base]);
 }

 ------------------------------------------------------------------------
r7125 | aurel32 | 2009-04-16 06:51:03 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/target-mips/translate.c

target-mips: optimize gen_flt_ldst()

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7124)
+++ target-mips/translate.c	(revision 7125)
@@ -1180,12 +1180,10 @@
     case OPC_LWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv t1 = tcg_temp_new();
 
-            tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
-            tcg_gen_trunc_tl_i32(fp0, t1);
+            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+            tcg_gen_trunc_tl_i32(fp0, t0);
             gen_store_fpr32(fp0, ft);
-            tcg_temp_free(t1);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwc1";

 ------------------------------------------------------------------------
r7124 | pbrook | 2009-04-16 05:56:43 -0500 (Thu, 16 Apr 2009) | 4 lines
Changed paths:
   M /trunk/target-mips/translate.c

Stop translation after a syscall instruciton.

Signed-off-by: Paul Brook 

 ------------------------------------------------------------------------

Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 7123)
+++ target-mips/translate.c	(revision 7124)
@@ -7697,6 +7697,7 @@
             break;
         case OPC_SYSCALL:
             generate_exception(ctx, EXCP_SYSCALL);
+            ctx->bstate = BS_STOP;
             break;
         case OPC_BREAK:
             generate_exception(ctx, EXCP_BREAK);

 ------------------------------------------------------------------------
r7123 | aurel32 | 2009-04-16 04:58:41 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/configure

configure: display debug tcg status in summary

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: configure
===================================================================
--- configure	(revision 7122)
+++ configure	(revision 7123)
@@ -1263,6 +1263,7 @@
 echo "host CPU          $cpu"
 echo "host big endian   $bigendian"
 echo "target list       $target_list"
+echo "tcg debug enabled $debug_tcg"
 echo "gprof enabled     $gprof"
 echo "sparse enabled    $sparse"
 echo "strip binaries    $strip_opt"

 ------------------------------------------------------------------------
r7122 | aurel32 | 2009-04-16 04:58:30 -0500 (Thu, 16 Apr 2009) | 3 lines
Changed paths:
   M /trunk/tcg/tcg.c

tcg: make sure NDEBUG is defined before including 

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 7121)
+++ tcg/tcg.c	(revision 7122)
@@ -25,6 +25,13 @@
 /* define it to use liveness analysis (better code) */
 #define USE_LIVENESS_ANALYSIS
 
+#include "config.h"
+
+#ifndef DEBUG_TCG
+/* define it to suppress various consistency checks (faster) */
+#define NDEBUG
+#endif
+
 #include 
 #include 
 #include 
@@ -38,15 +45,9 @@
 #include 
 #endif
 
-#include "config.h"
 #include "qemu-common.h"
 #include "cache-utils.h"
 
-#ifndef DEBUG_TCG
-/* define it to suppress various consistency checks (faster) */
-#define NDEBUG
-#endif
-
 /* Note: the long term plan is to reduce the dependancies on the QEMU
    CPU definitions. Currently they are used for qemu_ld/st
    instructions */

 ------------------------------------------------------------------------
r7121 | aurel32 | 2009-04-15 14:48:17 -0500 (Wed, 15 Apr 2009) | 5 lines
Changed paths:
   M /trunk/configure

fix a typo introduced in r7118

Reported by Stefan Weil

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: configure
===================================================================
--- configure	(revision 7120)
+++ configure	(revision 7121)
@@ -1181,7 +1181,7 @@
   fi
 fi
 
-# Check for initofy functions when we are building linux-user
+# Check for inotify functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
 # have syscall stubs for these implemented.  In that case we
 # don't provide them even if kernel supports them.

 ------------------------------------------------------------------------
r7120 | aurel32 | 2009-04-15 12:12:01 -0500 (Wed, 15 Apr 2009) | 3 lines
Changed paths:
   M /trunk/linux-user/syscall.c

linux-user: fix warnings introduced by r7118

Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7119)
+++ linux-user/syscall.c	(revision 7120)
@@ -314,7 +314,7 @@
   return (fchmodat(dirfd, pathname, mode, flags));
 }
 #endif
-#ifdef TARGET_NR_fchownat
+#if defined(TARGET_NR_fchownat) && defined(USE_UID16)
 static int sys_fchownat(int dirfd, const char *pathname, uid_t owner,
     gid_t group, int flags)
 {

 ------------------------------------------------------------------------
r7119 | aurel32 | 2009-04-15 11:18:38 -0500 (Wed, 15 Apr 2009) | 13 lines
Changed paths:
   M /trunk/linux-user/signal.c

linux-user: proper exit code for uncaught signals

The proper exit code for dieing from an uncaught signal is -.
The kernel doesn't allow exit() or _exit() to pass a negative value.
To get the proper exit code we need to actually die from an uncaught signal.

A default signal handler is installed, we send ourself a signal
and we wait for it to arrive.

Patch originates from Scratchbox

Signed-off-by: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c	(revision 7118)
+++ linux-user/signal.c	(revision 7119)
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "qemu.h"
@@ -352,22 +353,34 @@
 static void QEMU_NORETURN force_sig(int sig)
 {
     int host_sig;
+    struct sigaction act;
     host_sig = target_to_host_signal(sig);
     fprintf(stderr, "qemu: uncaught target signal %d (%s) - exiting\n",
             sig, strsignal(host_sig));
-#if 1
     gdb_signalled(thread_env, sig);
-    _exit(-host_sig);
-#else
-    {
-        struct sigaction act;
-        sigemptyset(&act.sa_mask);
-        act.sa_flags = SA_SIGINFO;
-        act.sa_sigaction = SIG_DFL;
-        sigaction(SIGABRT, &act, NULL);
-        abort();
-    }
-#endif
+
+    /* The proper exit code for dieing from an uncaught signal is
+     * -.  The kernel doesn't allow exit() or _exit() to pass
+     * a negative value.  To get the proper exit code we need to
+     * actually die from an uncaught signal.  Here the default signal
+     * handler is installed, we send ourself a signal and we wait for
+     * it to arrive. */
+    sigfillset(&act.sa_mask);
+    act.sa_handler = SIG_DFL;
+    sigaction(host_sig, &act, NULL);
+
+    /* For some reason raise(host_sig) doesn't send the signal when
+     * statically linked on x86-64. */
+    kill(getpid(), host_sig);
+
+    /* Make sure the signal isn't masked (just reuse the mask inside
+    of act) */
+    sigdelset(&act.sa_mask, host_sig);
+    sigsuspend(&act.sa_mask);
+
+    /* unreachable */
+    assert(0);
+
 }
 
 /* queue a signal so that it will be send to the virtual CPU as soon

 ------------------------------------------------------------------------
r7118 | aurel32 | 2009-04-15 11:12:13 -0500 (Wed, 15 Apr 2009) | 12 lines
Changed paths:
   M /trunk/configure
   M /trunk/linux-user/syscall.c

linux-user: prefer glibc over direct syscalls

The openat/*at syscalls are incredibly common with modern coreutils,
calling them directly via syscalls breaks for example fakeroot. Use
glibc stubs whenever directly available and provide old syscall
calling for people still using older libc.

Patch originally from Mika Westerberg, Adapted to
apply to current trunk and cleaned up by Riku Voipio.

Signed-off-by: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7117)
+++ linux-user/syscall.c	(revision 7118)
@@ -53,6 +53,7 @@
 #include 
 #include 
 #include 
+#include 
 //#include 
 #include 
 #include 
@@ -200,7 +201,229 @@
     return -ENOSYS;
 }
 #endif
-_syscall1(int,sys_uname,struct new_utsname *,buf)
+#if TARGET_ABI_BITS == 32
+_syscall3(int, sys_getdents, uint, fd, struct linux_dirent *, dirp, uint, count);
+#endif
+#if defined(TARGET_NR_getdents64) && defined(__NR_getdents64)
+_syscall3(int, sys_getdents64, uint, fd, struct linux_dirent64 *, dirp, uint, count);
+#endif
+_syscall2(int, sys_getpriority, int, which, int, who);
+#if !defined (__x86_64__)
+_syscall5(int, _llseek,  uint,  fd, ulong, hi, ulong, lo,
+          loff_t *, res, uint, wh);
+#endif
+_syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo)
+_syscall3(int,sys_syslog,int,type,char*,bufp,int,len)
+#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
+_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig)
+#endif
+#if defined(TARGET_NR_tkill) && defined(__NR_tkill)
+_syscall2(int,sys_tkill,int,tid,int,sig)
+#endif
+#ifdef __NR_exit_group
+_syscall1(int,exit_group,int,error_code)
+#endif
+#if defined(TARGET_NR_set_tid_address) && defined(__NR_set_tid_address)
+_syscall1(int,set_tid_address,int *,tidptr)
+#endif
+#if defined(USE_NPTL)
+#if defined(TARGET_NR_futex) && defined(__NR_futex)
+_syscall6(int,sys_futex,int *,uaddr,int,op,int,val,
+          const struct timespec *,timeout,int *,uaddr2,int,val3)
+#endif
+#endif
+
+static bitmask_transtbl fcntl_flags_tbl[] = {
+  { TARGET_O_ACCMODE,   TARGET_O_WRONLY,    O_ACCMODE,   O_WRONLY,    },
+  { TARGET_O_ACCMODE,   TARGET_O_RDWR,      O_ACCMODE,   O_RDWR,      },
+  { TARGET_O_CREAT,     TARGET_O_CREAT,     O_CREAT,     O_CREAT,     },
+  { TARGET_O_EXCL,      TARGET_O_EXCL,      O_EXCL,      O_EXCL,      },
+  { TARGET_O_NOCTTY,    TARGET_O_NOCTTY,    O_NOCTTY,    O_NOCTTY,    },
+  { TARGET_O_TRUNC,     TARGET_O_TRUNC,     O_TRUNC,     O_TRUNC,     },
+  { TARGET_O_APPEND,    TARGET_O_APPEND,    O_APPEND,    O_APPEND,    },
+  { TARGET_O_NONBLOCK,  TARGET_O_NONBLOCK,  O_NONBLOCK,  O_NONBLOCK,  },
+  { TARGET_O_SYNC,      TARGET_O_SYNC,      O_SYNC,      O_SYNC,      },
+  { TARGET_FASYNC,      TARGET_FASYNC,      FASYNC,      FASYNC,      },
+  { TARGET_O_DIRECTORY, TARGET_O_DIRECTORY, O_DIRECTORY, O_DIRECTORY, },
+  { TARGET_O_NOFOLLOW,  TARGET_O_NOFOLLOW,  O_NOFOLLOW,  O_NOFOLLOW,  },
+  { TARGET_O_LARGEFILE, TARGET_O_LARGEFILE, O_LARGEFILE, O_LARGEFILE, },
+#if defined(O_DIRECT)
+  { TARGET_O_DIRECT,    TARGET_O_DIRECT,    O_DIRECT,    O_DIRECT,    },
+#endif
+  { 0, 0, 0, 0 }
+};
+
+#define COPY_UTSNAME_FIELD(dest, src) \
+  do { \
+      /* __NEW_UTS_LEN doesn't include terminating null */ \
+      (void) strncpy((dest), (src), __NEW_UTS_LEN); \
+      (dest)[__NEW_UTS_LEN] = '\0'; \
+  } while (0)
+
+static int sys_uname(struct new_utsname *buf)
+{
+  struct utsname uts_buf;
+
+  if (uname(&uts_buf) < 0)
+      return (-1);
+
+  /*
+   * Just in case these have some differences, we
+   * translate utsname to new_utsname (which is the
+   * struct linux kernel uses).
+   */
+
+  bzero(buf, sizeof (*buf));
+  COPY_UTSNAME_FIELD(buf->sysname, uts_buf.sysname);
+  COPY_UTSNAME_FIELD(buf->nodename, uts_buf.nodename);
+  COPY_UTSNAME_FIELD(buf->release, uts_buf.release);
+  COPY_UTSNAME_FIELD(buf->version, uts_buf.version);
+  COPY_UTSNAME_FIELD(buf->machine, uts_buf.machine);
+#ifdef _GNU_SOURCE
+  COPY_UTSNAME_FIELD(buf->domainname, uts_buf.domainname);
+#endif
+  return (0);
+
+#undef COPY_UTSNAME_FIELD
+}
+
+static int sys_getcwd1(char *buf, size_t size)
+{
+  if (getcwd(buf, size) == NULL) {
+      /* getcwd() sets errno */
+      return (-1);
+  }
+  return (0);
+}
+
+#ifdef CONFIG_ATFILE
+/*
+ * Host system seems to have atfile syscall stubs available.  We
+ * now enable them one by one as specified by target syscall_nr.h.
+ */
+
+#ifdef TARGET_NR_faccessat
+static int sys_faccessat(int dirfd, const char *pathname, int mode, int flags)
+{
+  return (faccessat(dirfd, pathname, mode, flags));
+}
+#endif
+#ifdef TARGET_NR_fchmodat
+static int sys_fchmodat(int dirfd, const char *pathname, mode_t mode, int flags)
+{
+  return (fchmodat(dirfd, pathname, mode, flags));
+}
+#endif
+#ifdef TARGET_NR_fchownat
+static int sys_fchownat(int dirfd, const char *pathname, uid_t owner,
+    gid_t group, int flags)
+{
+  return (fchownat(dirfd, pathname, owner, group, flags));
+}
+#endif
+#ifdef __NR_fstatat64
+static int sys_fstatat64(int dirfd, const char *pathname, struct stat *buf,
+    int flags)
+{
+  return (fstatat(dirfd, pathname, buf, flags));
+}
+#endif
+#ifdef __NR_newfstatat
+static int sys_newfstatat(int dirfd, const char *pathname, struct stat *buf,
+    int flags)
+{
+  return (fstatat(dirfd, pathname, buf, flags));
+}
+#endif
+#ifdef TARGET_NR_futimesat
+static int sys_futimesat(int dirfd, const char *pathname,
+    const struct timeval times[2])
+{
+  return (futimesat(dirfd, pathname, times));
+}
+#endif
+#ifdef TARGET_NR_linkat
+static int sys_linkat(int olddirfd, const char *oldpath,
+    int newdirfd, const char *newpath, int flags)
+{
+  return (linkat(olddirfd, oldpath, newdirfd, newpath, flags));
+}
+#endif
+#ifdef TARGET_NR_mkdirat
+static int sys_mkdirat(int dirfd, const char *pathname, mode_t mode)
+{
+  return (mkdirat(dirfd, pathname, mode));
+}
+#endif
+#ifdef TARGET_NR_mknodat
+static int sys_mknodat(int dirfd, const char *pathname, mode_t mode,
+    dev_t dev)
+{
+  return (mknodat(dirfd, pathname, mode, dev));
+}
+#endif
+#ifdef TARGET_NR_openat
+static int sys_openat(int dirfd, const char *pathname, int flags, ...)
+{
+  /*
+   * open(2) has extra parameter 'mode' when called with
+   * flag O_CREAT.
+   */
+  if ((flags & O_CREAT) != 0) {
+      va_list ap;
+      mode_t mode;
+
+      /*
+       * Get the 'mode' parameter and translate it to
+       * host bits.
+       */
+      va_start(ap, flags);
+      mode = va_arg(ap, mode_t);
+      mode = target_to_host_bitmask(mode, fcntl_flags_tbl);
+      va_end(ap);
+
+      return (openat(dirfd, pathname, flags, mode));
+  }
+  return (openat(dirfd, pathname, flags));
+}
+#endif
+#ifdef TARGET_NR_readlinkat
+static int sys_readlinkat(int dirfd, const char *pathname, char *buf, size_t bufsiz)
+{
+  return (readlinkat(dirfd, pathname, buf, bufsiz));
+}
+#endif
+#ifdef TARGET_NR_renameat
+static int sys_renameat(int olddirfd, const char *oldpath,
+    int newdirfd, const char *newpath)
+{
+  return (renameat(olddirfd, oldpath, newdirfd, newpath));
+}
+#endif
+#ifdef TARGET_NR_symlinkat
+static int sys_symlinkat(const char *oldpath, int newdirfd, const char *newpath)
+{
+  return (symlinkat(oldpath, newdirfd, newpath));
+}
+#endif
+#ifdef TARGET_NR_unlinkat
+static int sys_unlinkat(int dirfd, const char *pathname, int flags)
+{
+  return (unlinkat(dirfd, pathname, flags));
+}
+#endif
+#ifdef TARGET_NR_utimensat
+static int sys_utimensat(int dirfd, const char *pathname,
+    const struct timespec times[2], int flags)
+{
+  return (utimensat(dirfd, pathname, times, flags));
+}
+#endif
+#else /* !CONFIG_ATFILE */
+
+/*
+ * Try direct syscalls instead
+ */
 #if defined(TARGET_NR_faccessat) && defined(__NR_faccessat)
 _syscall4(int,sys_faccessat,int,dirfd,const char *,pathname,int,mode,int,flags)
 #endif
@@ -221,21 +444,14 @@
 _syscall3(int,sys_futimesat,int,dirfd,const char *,pathname,
          const struct timeval *,times)
 #endif
-_syscall2(int,sys_getcwd1,char *,buf,size_t,size)
-#if TARGET_ABI_BITS == 32
-_syscall3(int, sys_getdents, uint, fd, struct linux_dirent *, dirp, uint, count);
+#if (defined(TARGET_NR_newfstatat) || defined(TARGET_NR_fstatat64) ) && \
+        defined(__NR_newfstatat)
+_syscall4(int,sys_newfstatat,int,dirfd,const char *,pathname,
+          struct stat *,buf,int,flags)
 #endif
-#if defined(TARGET_NR_getdents64) && defined(__NR_getdents64)
-_syscall3(int, sys_getdents64, uint, fd, struct linux_dirent64 *, dirp, uint, count);
-#endif
-_syscall2(int, sys_getpriority, int, which, int, who);
-#if !defined (__x86_64__)
-_syscall5(int, _llseek,  uint,  fd, ulong, hi, ulong, lo,
-          loff_t *, res, uint, wh);
-#endif
 #if defined(TARGET_NR_linkat) && defined(__NR_linkat)
 _syscall5(int,sys_linkat,int,olddirfd,const char *,oldpath,
-	  int,newdirfd,const char *,newpath,int,flags)
+      int,newdirfd,const char *,newpath,int,flags)
 #endif
 #if defined(TARGET_NR_mkdirat) && defined(__NR_mkdirat)
 _syscall3(int,sys_mkdirat,int,dirfd,const char *,pathname,mode_t,mode)
@@ -244,11 +460,6 @@
 _syscall4(int,sys_mknodat,int,dirfd,const char *,pathname,
           mode_t,mode,dev_t,dev)
 #endif
-#if (defined(TARGET_NR_newfstatat) || defined(TARGET_NR_fstatat64) ) && \
-        defined(__NR_newfstatat)
-_syscall4(int,sys_newfstatat,int,dirfd,const char *,pathname,
-          struct stat *,buf,int,flags)
-#endif
 #if defined(TARGET_NR_openat) && defined(__NR_openat)
 _syscall4(int,sys_openat,int,dirfd,const char *,pathname,int,flags,mode_t,mode)
 #endif
@@ -260,24 +471,10 @@
 _syscall4(int,sys_renameat,int,olddirfd,const char *,oldpath,
           int,newdirfd,const char *,newpath)
 #endif
-_syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo)
 #if defined(TARGET_NR_symlinkat) && defined(__NR_symlinkat)
 _syscall3(int,sys_symlinkat,const char *,oldpath,
           int,newdirfd,const char *,newpath)
 #endif
-_syscall3(int,sys_syslog,int,type,char*,bufp,int,len)
-#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
-_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig)
-#endif
-#if defined(TARGET_NR_tkill) && defined(__NR_tkill)
-_syscall2(int,sys_tkill,int,tid,int,sig)
-#endif
-#ifdef __NR_exit_group
-_syscall1(int,exit_group,int,error_code)
-#endif
-#if defined(TARGET_NR_set_tid_address) && defined(__NR_set_tid_address)
-_syscall1(int,set_tid_address,int *,tidptr)
-#endif
 #if defined(TARGET_NR_unlinkat) && defined(__NR_unlinkat)
 _syscall3(int,sys_unlinkat,int,dirfd,const char *,pathname,int,flags)
 #endif
@@ -285,22 +482,37 @@
 _syscall4(int,sys_utimensat,int,dirfd,const char *,pathname,
           const struct timespec *,tsp,int,flags)
 #endif
+
+#endif /* CONFIG_ATFILE */
+
+#ifdef CONFIG_INOTIFY
+
 #if defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)
-_syscall0(int,sys_inotify_init)
+static int sys_inotify_init(void)
+{
+  return (inotify_init());
+}
 #endif
 #if defined(TARGET_NR_inotify_add_watch) && defined(__NR_inotify_add_watch)
-_syscall3(int,sys_inotify_add_watch,int,fd,const char *,pathname,uint32_t,mask)
+static int sys_inotify_add_watch(int fd,const char *pathname, int32_t mask)
+{
+  return (inotify_add_watch(fd, pathname, mask));
+}
 #endif
 #if defined(TARGET_NR_inotify_rm_watch) && defined(__NR_inotify_rm_watch)
-_syscall2(int,sys_inotify_rm_watch,int,fd,uint32_t,wd)
+static int sys_inotify_rm_watch(int fd, int32_t wd)
+{
+  return (inotify_rm_watch(fd,pathname, wd));
+}
 #endif
-#if defined(USE_NPTL)
-#if defined(TARGET_NR_futex) && defined(__NR_futex)
-_syscall6(int,sys_futex,int *,uaddr,int,op,int,val,
-          const struct timespec *,timeout,int *,uaddr2,int,val3)
-#endif
-#endif
+#else
+/* Userspace can usually survive runtime without inotify */
+#undef TARGET_NR_inotify_init
+#undef TARGET_NR_inotify_add_watch
+#undef TARGET_NR_inotify_rm_watch
+#endif /* CONFIG_INOTIFY  */
 
+
 extern int personality(int);
 extern int flock(int, int);
 extern int setfsuid(int);
@@ -2580,26 +2792,6 @@
 	{ 0, 0, 0, 0 }
 };
 
-static bitmask_transtbl fcntl_flags_tbl[] = {
-	{ TARGET_O_ACCMODE,   TARGET_O_WRONLY,    O_ACCMODE,   O_WRONLY,    },
-	{ TARGET_O_ACCMODE,   TARGET_O_RDWR,      O_ACCMODE,   O_RDWR,      },
-	{ TARGET_O_CREAT,     TARGET_O_CREAT,     O_CREAT,     O_CREAT,     },
-	{ TARGET_O_EXCL,      TARGET_O_EXCL,      O_EXCL,      O_EXCL,      },
-	{ TARGET_O_NOCTTY,    TARGET_O_NOCTTY,    O_NOCTTY,    O_NOCTTY,    },
-	{ TARGET_O_TRUNC,     TARGET_O_TRUNC,     O_TRUNC,     O_TRUNC,     },
-	{ TARGET_O_APPEND,    TARGET_O_APPEND,    O_APPEND,    O_APPEND,    },
-	{ TARGET_O_NONBLOCK,  TARGET_O_NONBLOCK,  O_NONBLOCK,  O_NONBLOCK,  },
-	{ TARGET_O_SYNC,      TARGET_O_SYNC,      O_SYNC,      O_SYNC,      },
-	{ TARGET_FASYNC,      TARGET_FASYNC,      FASYNC,      FASYNC,      },
-	{ TARGET_O_DIRECTORY, TARGET_O_DIRECTORY, O_DIRECTORY, O_DIRECTORY, },
-	{ TARGET_O_NOFOLLOW,  TARGET_O_NOFOLLOW,  O_NOFOLLOW,  O_NOFOLLOW,  },
-	{ TARGET_O_LARGEFILE, TARGET_O_LARGEFILE, O_LARGEFILE, O_LARGEFILE, },
-#if defined(O_DIRECT)
-	{ TARGET_O_DIRECT,    TARGET_O_DIRECT,    O_DIRECT,    O_DIRECT,    },
-#endif
-	{ 0, 0, 0, 0 }
-};
-
 #if defined(TARGET_I386)
 
 /* NOTE: there is really one LDT for all the threads */
Index: configure
===================================================================
--- configure	(revision 7117)
+++ configure	(revision 7118)
@@ -1156,6 +1156,53 @@
   fi
 fi
 
+#
+# Check for xxxat() functions when we are building linux-user
+# emulator.  This is done because older glibc versions don't
+# have syscall stubs for these implemented.
+#
+atfile=no
+if [ "$linux_user" = "yes" ] ; then
+  cat > $TMPC << EOF
+#define _ATFILE_SOURCE
+#include 
+#include 
+#include 
+
+int
+main(void)
+{
+	/* try to unlink nonexisting file */
+	return (unlinkat(AT_FDCWD, "nonexistent_file", 0));
+}
+EOF
+  if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then
+    atfile=yes
+  fi
+fi
+
+# Check for initofy functions when we are building linux-user
+# emulator.  This is done because older glibc versions don't
+# have syscall stubs for these implemented.  In that case we
+# don't provide them even if kernel supports them.
+#
+inotify=no
+if [ "$linux_user" = "yes" ] ; then
+  cat > $TMPC << EOF
+#include 
+
+int
+main(void)
+{
+	/* try to start inotify */
+	return inotify_init(void);
+}
+EOF
+  if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then
+    inotify=yes
+  fi
+fi
+
 # Check if tools are available to build documentation.
 if [ -x "`which texi2html 2>/dev/null`" ] && \
    [ -x "`which pod2man 2>/dev/null`" ]; then
@@ -1544,6 +1591,12 @@
   echo "CONFIG_CURSES=yes" >> $config_mak
   echo "CURSES_LIBS=-lcurses" >> $config_mak
 fi
+if test "$atfile" = "yes" ; then
+  echo "#define CONFIG_ATFILE 1" >> $config_h
+fi
+if test "$inotify" = "yes" ; then
+  echo "#define CONFIG_INOTIFY 1" >> $config_h
+fi
 if test "$brlapi" = "yes" ; then
   echo "CONFIG_BRLAPI=yes" >> $config_mak
   echo "#define CONFIG_BRLAPI 1" >> $config_h

 ------------------------------------------------------------------------
r7117 | aurel32 | 2009-04-15 11:12:06 -0500 (Wed, 15 Apr 2009) | 9 lines
Changed paths:
   M /trunk/linux-user/syscall.c

linux-user: removed unnecessary MAX_SOCK_ADDR checks for socket syscalls

- This check is not needed because kernel will check whether given
  buffer is too small and there is no upper limit for size of the buffer.

From: Mika Westerberg 

Signed-off-by: Riku Voipio 
Signed-off-by: Aurelien Jarno 
 ------------------------------------------------------------------------

Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 7116)
+++ linux-user/syscall.c	(revision 7117)
@@ -1208,16 +1208,13 @@
     return get_errno(socket(domain, type, protocol));
 }
 
-/* MAX_SOCK_ADDR from linux/net/socket.c */
-#define MAX_SOCK_ADDR	128
-
 /* do_bind() Must return target values and target errnos. */
 static abi_long do_bind(int sockfd, abi_ulong target_addr,
                         socklen_t addrlen)
 {
     void *addr;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     addr = alloca(addrlen+1);
@@ -1232,7 +1229,7 @@
 {
     void *addr;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     addr = alloca(addrlen);
@@ -1307,7 +1304,7 @@
     if (get_user_u32(addrlen, target_addrlen_addr))
         return -TARGET_EFAULT;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     addr = alloca(addrlen);
@@ -1332,7 +1329,7 @@
     if (get_user_u32(addrlen, target_addrlen_addr))
         return -TARGET_EFAULT;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     addr = alloca(addrlen);
@@ -1360,7 +1357,7 @@
     if (get_user_u32(addrlen, target_addrlen_addr))
         return -TARGET_EFAULT;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     addr = alloca(addrlen);
@@ -1398,7 +1395,7 @@
     void *host_msg;
     abi_long ret;
 
-    if (addrlen < 0 || addrlen > MAX_SOCK_ADDR)
+    if (addrlen < 0)
         return -TARGET_EINVAL;
 
     host_msg = lock_user(VERIFY_READ, msg, len, 1);
@@ -1433,7 +1430,7 @@
             ret = -TARGET_EFAULT;
             goto fail;
         }
-        if (addrlen < 0 || addrlen > MAX_SOCK_ADDR) {
+        if (addrlen < 0) {
             ret = -TARGET_EINVAL;
             goto fail;
         }