summaryrefslogtreecommitdiff |
diff options
author | Jan Beulich <jbeulich@suse.com> | 2013-07-02 11:34:13 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2013-07-02 11:34:13 +0200 |
commit | c5ad3f985e69b5db90c7518b175cb1523f1b7172 (patch) | |
tree | d1d1ad365fadc92fecc4724ed7243519b29c2a37 | |
parent | f080c74e6a74c7b0875ab304d62d2a07af1557a5 (diff) |
- Update Xen patches to 3.10-final.rpm-3.10.0-2.3.gc5ad3f9--openSUSE-13.1-Milestone3rpm-3.10.0-2.3.gc5ad3f9
-rw-r--r-- | patches.xen/ipv6-no-autoconf | 8 | ||||
-rw-r--r-- | patches.xen/pci-guestdev | 22 | ||||
-rw-r--r-- | patches.xen/pci-reserve | 8 | ||||
-rw-r--r-- | patches.xen/xen-netback-kernel-threads | 13 | ||||
-rw-r--r-- | patches.xen/xen-x86-EFI | 9 | ||||
-rw-r--r-- | patches.xen/xen3-auto-xen-arch.diff | 1409 | ||||
-rw-r--r-- | patches.xen/xen3-auto-xen-kconfig.diff | 28 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.19 | 197 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.20 | 67 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.21 | 97 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.22 | 452 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.23 | 121 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.24 | 244 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.25 | 2579 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.26 | 1462 | ||||
-rw-r--r-- | patches.xen/xen3-patch-2.6.32 | 54 | ||||
-rw-r--r-- | patches.xen/xen3-patch-3.10 (renamed from patches.xen/xen3-patch-3.10-rc6) | 82 | ||||
-rw-r--r-- | patches.xen/xen3-patch-3.2 | 5 | ||||
-rw-r--r-- | patches.xen/xen3-patch-3.3 | 17 | ||||
-rw-r--r-- | patches.xen/xen3-patch-3.7 | 47 | ||||
-rw-r--r-- | patches.xen/xen3-patch-3.9 | 82 | ||||
-rw-r--r-- | series.conf | 2 |
22 files changed, 1165 insertions, 5840 deletions
diff --git a/patches.xen/ipv6-no-autoconf b/patches.xen/ipv6-no-autoconf index b3095067df..1cd217177e 100644 --- a/patches.xen/ipv6-no-autoconf +++ b/patches.xen/ipv6-no-autoconf @@ -15,9 +15,9 @@ This patch makes autoconf (DAD and router discovery) depend on the interface's ability to do multicast. Turning off multicast for an interface before bringing it up will suppress autoconfiguration. ---- head.orig/net/ipv6/addrconf.c 2013-06-20 14:56:43.000000000 +0200 -+++ head/net/ipv6/addrconf.c 2013-06-20 14:58:40.000000000 +0200 -@@ -3186,6 +3186,7 @@ static void addrconf_dad_start(struct in +--- head.orig/net/ipv6/addrconf.c 2013-07-02 09:20:54.000000000 +0200 ++++ head/net/ipv6/addrconf.c 2013-07-02 09:21:28.000000000 +0200 +@@ -3189,6 +3189,7 @@ static void addrconf_dad_start(struct in goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || @@ -25,7 +25,7 @@ interface before bringing it up will suppress autoconfiguration. idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { -@@ -3288,6 +3289,7 @@ static void addrconf_dad_completed(struc +@@ -3291,6 +3292,7 @@ static void addrconf_dad_completed(struc if (ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && diff --git a/patches.xen/pci-guestdev b/patches.xen/pci-guestdev index 0d7f189d26..d055caabde 100644 --- a/patches.xen/pci-guestdev +++ b/patches.xen/pci-guestdev @@ -28,7 +28,7 @@ Acked-by: jbeulich@novell.com include/uapi/xen/public/iomulti.h | 50 ++ 15 files changed, 2522 insertions(+), 1 deletion(-) ---- head.orig/Documentation/kernel-parameters.txt 2013-05-23 08:57:01.000000000 +0200 +--- head.orig/Documentation/kernel-parameters.txt 2013-07-02 09:20:54.000000000 +0200 +++ head/Documentation/kernel-parameters.txt 2013-05-23 15:36:49.000000000 +0200 @@ -978,6 +978,24 @@ bytes respectively. Such letter suffixes Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0. @@ -66,7 +66,7 @@ Acked-by: jbeulich@novell.com reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode Format: <reboot_mode>[,<reboot_mode2>[,...]] See arch/*/kernel/reboot.c or arch/*/kernel/process.c ---- head.orig/drivers/acpi/pci_root.c 2013-06-04 11:48:43.000000000 +0200 +--- head.orig/drivers/acpi/pci_root.c 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/acpi/pci_root.c 2013-06-04 13:47:47.000000000 +0200 @@ -374,6 +374,41 @@ out: } @@ -156,7 +156,7 @@ Acked-by: jbeulich@novell.com + return FALSE; +} +#endif ---- head.orig/drivers/pci/Kconfig 2012-12-11 04:30:57.000000000 +0100 +--- head.orig/drivers/pci/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/pci/Kconfig 2012-10-22 16:26:32.000000000 +0200 @@ -43,6 +43,20 @@ config PCI_REALLOC_ENABLE_AUTO @@ -179,7 +179,7 @@ Acked-by: jbeulich@novell.com config PCI_STUB tristate "PCI Stub driver" depends on PCI ---- head.orig/drivers/pci/Makefile 2013-02-19 00:58:34.000000000 +0100 +--- head.orig/drivers/pci/Makefile 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/pci/Makefile 2011-01-31 14:31:28.000000000 +0100 @@ -7,6 +7,10 @@ obj-y += access.o bus.o probe.o host-br irq.o vpd.o setup-bus.o @@ -2104,7 +2104,7 @@ Acked-by: jbeulich@novell.com +int pci_iomul_switch_io_allocated(const struct pci_iomul_switch *); +void pci_iomul_get_lock_switch(struct pci_dev *, struct pci_iomul_switch **, + struct pci_iomul_slot **); ---- head.orig/drivers/pci/pci.c 2013-05-23 08:56:16.000000000 +0200 +--- head.orig/drivers/pci/pci.c 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/pci/pci.c 2013-05-23 15:37:06.000000000 +0200 @@ -3760,7 +3760,7 @@ void pci_reassigndev_resource_alignment( @@ -2115,9 +2115,9 @@ Acked-by: jbeulich@novell.com return; if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && ---- head.orig/drivers/pci/pci.h 2013-05-23 08:56:16.000000000 +0200 -+++ head/drivers/pci/pci.h 2013-05-23 15:37:09.000000000 +0200 -@@ -312,4 +312,12 @@ static inline int pci_dev_specific_reset +--- head.orig/drivers/pci/pci.h 2013-07-02 09:20:54.000000000 +0200 ++++ head/drivers/pci/pci.h 2013-07-02 09:21:41.000000000 +0200 +@@ -317,4 +317,12 @@ static inline int pci_dev_specific_reset } #endif @@ -2573,7 +2573,7 @@ Acked-by: jbeulich@novell.com +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Isaku Yamahata <yamahata@valinux.co.jp>"); +MODULE_DESCRIPTION("PCI IO space multiplexing driver"); ---- head.orig/include/linux/acpi.h 2013-05-23 08:57:01.000000000 +0200 +--- head.orig/include/linux/acpi.h 2013-07-02 09:20:54.000000000 +0200 +++ head/include/linux/acpi.h 2013-05-23 15:37:27.000000000 +0200 @@ -275,6 +275,8 @@ int acpi_check_region(resource_size_t st @@ -2584,7 +2584,7 @@ Acked-by: jbeulich@novell.com #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif ---- head.orig/include/linux/pci.h 2013-05-23 08:56:44.000000000 +0200 +--- head.orig/include/linux/pci.h 2013-07-02 09:20:54.000000000 +0200 +++ head/include/linux/pci.h 2013-05-23 15:37:32.000000000 +0200 @@ -1884,4 +1884,10 @@ static inline struct eeh_dev *pci_dev_to */ @@ -2597,7 +2597,7 @@ Acked-by: jbeulich@novell.com +#endif + #endif /* LINUX_PCI_H */ ---- head.orig/include/uapi/xen/Kbuild 2012-12-11 04:30:57.000000000 +0100 +--- head.orig/include/uapi/xen/Kbuild 2013-07-02 09:20:54.000000000 +0200 +++ head/include/uapi/xen/Kbuild 2012-10-22 16:25:23.000000000 +0200 @@ -1,3 +1,4 @@ # UAPI Header export list diff --git a/patches.xen/pci-reserve b/patches.xen/pci-reserve index 61e68404cc..c1056489d0 100644 --- a/patches.xen/pci-reserve +++ b/patches.xen/pci-reserve @@ -50,9 +50,9 @@ Acked-by: jbeulich@novell.com obj-$(CONFIG_PCI_QUIRKS) += quirks.o ---- head.orig/drivers/pci/pci.h 2013-05-23 15:37:09.000000000 +0200 -+++ head/drivers/pci/pci.h 2013-05-23 15:37:37.000000000 +0200 -@@ -320,4 +320,19 @@ extern int pci_is_iomuldev(struct pci_de +--- head.orig/drivers/pci/pci.h 2013-07-02 09:21:41.000000000 +0200 ++++ head/drivers/pci/pci.h 2013-07-02 09:21:45.000000000 +0200 +@@ -325,4 +325,19 @@ extern int pci_is_iomuldev(struct pci_de #define pci_is_iomuldev(dev) 0 #endif @@ -212,7 +212,7 @@ Acked-by: jbeulich@novell.com + return 1; +} +__setup("pci_reserve=", pci_reserve_setup); ---- head.orig/drivers/pci/setup-bus.c 2013-05-23 08:56:16.000000000 +0200 +--- head.orig/drivers/pci/setup-bus.c 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/pci/setup-bus.c 2012-10-22 16:33:41.000000000 +0200 @@ -747,7 +747,7 @@ static void pbus_size_io(struct pci_bus { diff --git a/patches.xen/xen-netback-kernel-threads b/patches.xen/xen-netback-kernel-threads index 84f8cb87ce..d71eca177b 100644 --- a/patches.xen/xen-netback-kernel-threads +++ b/patches.xen/xen-netback-kernel-threads @@ -67,7 +67,7 @@ Acked-by: jbeulich@novell.com netif_tx_request_t slots[XEN_NETIF_NR_SLOTS_MIN]; } tx; -+ wait_queue_head_t netbk_action_wq; ++ wait_queue_head_t action_wq; + struct task_struct *task; + struct xen_netbk_rx { @@ -105,15 +105,14 @@ Acked-by: jbeulich@novell.com int netbk_copy_skb_mode; -@@ -223,6 +230,23 @@ static void flush_notify_list(netif_t *l +@@ -223,6 +230,22 @@ static void flush_notify_list(netif_t *l BUG(); } +static void netbk_rx_schedule(struct xen_netbk_rx *netbk) +{ + if (use_kthreads) -+ wake_up(&container_of(netbk, struct xen_netbk, -+ rx)->netbk_action_wq); ++ wake_up(&container_of(netbk, struct xen_netbk, rx)->action_wq); + else + tasklet_schedule(&netbk->tasklet); +} @@ -121,7 +120,7 @@ Acked-by: jbeulich@novell.com +static void netbk_tx_schedule(struct xen_netbk *netbk) +{ + if (use_kthreads) -+ wake_up(&netbk->netbk_action_wq); ++ wake_up(&netbk->action_wq); + else + tasklet_schedule(&netbk->tx.tasklet); +} @@ -223,7 +222,7 @@ Acked-by: jbeulich@novell.com + struct xen_netbk *netbk = &xen_netbk[group]; + + while (!kthread_should_stop()) { -+ wait_event_interruptible(netbk->netbk_action_wq, ++ wait_event_interruptible(netbk->action_wq, + rx_work_todo(netbk) || + tx_work_todo(netbk) || + kthread_should_stop()); @@ -259,7 +258,7 @@ Acked-by: jbeulich@novell.com } + + if (use_kthreads) { -+ init_waitqueue_head(&netbk->netbk_action_wq); ++ init_waitqueue_head(&netbk->action_wq); + netbk->task = kthread_create(netbk_action_thread, + (void *)(long)group, + "netback/%u", group); diff --git a/patches.xen/xen-x86-EFI b/patches.xen/xen-x86-EFI index b6709a7806..5eb54c0f72 100644 --- a/patches.xen/xen-x86-EFI +++ b/patches.xen/xen-x86-EFI @@ -77,8 +77,8 @@ References: fate#311376, fate#311529, bnc#578927, bnc#628554 +ccflags-$(CONFIG_XEN) += -fshort-wchar +disabled-obj-$(CONFIG_XEN) := efi_%$(BITS).o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ head/arch/x86/platform/efi/efi-xen.c 2013-06-20 15:58:25.000000000 +0200 -@@ -0,0 +1,661 @@ ++++ head/arch/x86/platform/efi/efi-xen.c 2013-06-27 12:13:42.000000000 +0200 +@@ -0,0 +1,664 @@ +/* + * Common EFI (Extensible Firmware Interface) support functions + * Based on Extensible Firmware Interface Specification version 1.0 @@ -699,7 +699,10 @@ References: fate#311376, fate#311529, bnc#578927, bnc#628554 + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; -+ void *dummy = kmalloc(dummy_size, GFP_ATOMIC|__GFP_ZERO); ++ void *dummy = kzalloc(dummy_size, GFP_ATOMIC); ++ ++ if (!dummy) ++ return EFI_OUT_OF_RESOURCES; + + status = xen_efi_set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | diff --git a/patches.xen/xen3-auto-xen-arch.diff b/patches.xen/xen3-auto-xen-arch.diff index a64a544645..888ebf1f33 100644 --- a/patches.xen/xen3-auto-xen-arch.diff +++ b/patches.xen/xen3-auto-xen-arch.diff @@ -20,13 +20,11 @@ places): +++ linux/arch/x86/kernel/pci-nommu-xen.c +++ linux/arch/x86/kernel/process-xen.c +++ linux/arch/x86/kernel/setup-xen.c -+++ linux/arch/x86/kernel/smp-xen.c +++ linux/arch/x86/kernel/syscall_32-xen.c +++ linux/arch/x86/kernel/traps-xen.c +++ linux/arch/x86/kernel/x86_init-xen.c +++ linux/arch/x86/lib/cache-smp-xen.c +++ linux/arch/x86/mm/dump_pagetables-xen.c -+++ linux/arch/x86/mm/fault-xen.c +++ linux/arch/x86/mm/init-xen.c +++ linux/arch/x86/mm/iomap_32-xen.c +++ linux/arch/x86/mm/pat-xen.c @@ -7730,614 +7728,6 @@ pick them up (for reference, prefixed with the version the removal occured): + * End: + */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ head/arch/x86/kernel/smp_32-xen.c 2007-12-10 08:47:31.000000000 +0100 -@@ -0,0 +1,605 @@ -+/* -+ * Intel SMP support routines. -+ * -+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> -+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> -+ * -+ * This code is released under the GNU General Public License version 2 or -+ * later. -+ */ -+ -+#include <linux/init.h> -+ -+#include <linux/mm.h> -+#include <linux/delay.h> -+#include <linux/spinlock.h> -+#include <linux/smp_lock.h> -+#include <linux/kernel_stat.h> -+#include <linux/mc146818rtc.h> -+#include <linux/cache.h> -+#include <linux/interrupt.h> -+#include <linux/cpu.h> -+#include <linux/module.h> -+ -+#include <asm/mtrr.h> -+#include <asm/tlbflush.h> -+#if 0 -+#include <mach_apic.h> -+#endif -+#include <xen/evtchn.h> -+ -+/* -+ * Some notes on x86 processor bugs affecting SMP operation: -+ * -+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. -+ * The Linux implications for SMP are handled as follows: -+ * -+ * Pentium III / [Xeon] -+ * None of the E1AP-E3AP errata are visible to the user. -+ * -+ * E1AP. see PII A1AP -+ * E2AP. see PII A2AP -+ * E3AP. see PII A3AP -+ * -+ * Pentium II / [Xeon] -+ * None of the A1AP-A3AP errata are visible to the user. -+ * -+ * A1AP. see PPro 1AP -+ * A2AP. see PPro 2AP -+ * A3AP. see PPro 7AP -+ * -+ * Pentium Pro -+ * None of 1AP-9AP errata are visible to the normal user, -+ * except occasional delivery of 'spurious interrupt' as trap #15. -+ * This is very rare and a non-problem. -+ * -+ * 1AP. Linux maps APIC as non-cacheable -+ * 2AP. worked around in hardware -+ * 3AP. fixed in C0 and above steppings microcode update. -+ * Linux does not use excessive STARTUP_IPIs. -+ * 4AP. worked around in hardware -+ * 5AP. symmetric IO mode (normal Linux operation) not affected. -+ * 'noapic' mode has vector 0xf filled out properly. -+ * 6AP. 'noapic' mode might be affected - fixed in later steppings -+ * 7AP. We do not assume writes to the LVT deassering IRQs -+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup -+ * 9AP. We do not use mixed mode -+ * -+ * Pentium -+ * There is a marginal case where REP MOVS on 100MHz SMP -+ * machines with B stepping processors can fail. XXX should provide -+ * an L1cache=Writethrough or L1cache=off option. -+ * -+ * B stepping CPUs may hang. There are hardware work arounds -+ * for this. We warn about it in case your board doesn't have the work -+ * arounds. Basically thats so I can tell anyone with a B stepping -+ * CPU and SMP problems "tough". -+ * -+ * Specific items [From Pentium Processor Specification Update] -+ * -+ * 1AP. Linux doesn't use remote read -+ * 2AP. Linux doesn't trust APIC errors -+ * 3AP. We work around this -+ * 4AP. Linux never generated 3 interrupts of the same priority -+ * to cause a lost local interrupt. -+ * 5AP. Remote read is never used -+ * 6AP. not affected - worked around in hardware -+ * 7AP. not affected - worked around in hardware -+ * 8AP. worked around in hardware - we get explicit CS errors if not -+ * 9AP. only 'noapic' mode affected. Might generate spurious -+ * interrupts, we log only the first one and count the -+ * rest silently. -+ * 10AP. not affected - worked around in hardware -+ * 11AP. Linux reads the APIC between writes to avoid this, as per -+ * the documentation. Make sure you preserve this as it affects -+ * the C stepping chips too. -+ * 12AP. not affected - worked around in hardware -+ * 13AP. not affected - worked around in hardware -+ * 14AP. we always deassert INIT during bootup -+ * 15AP. not affected - worked around in hardware -+ * 16AP. not affected - worked around in hardware -+ * 17AP. not affected - worked around in hardware -+ * 18AP. not affected - worked around in hardware -+ * 19AP. not affected - worked around in BIOS -+ * -+ * If this sounds worrying believe me these bugs are either ___RARE___, -+ * or are signal timing bugs worked around in hardware and there's -+ * about nothing of note with C stepping upwards. -+ */ -+ -+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; -+ -+/* -+ * the following functions deal with sending IPIs between CPUs. -+ * -+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. -+ */ -+ -+static inline int __prepare_ICR (unsigned int shortcut, int vector) -+{ -+ unsigned int icr = shortcut | APIC_DEST_LOGICAL; -+ -+ switch (vector) { -+ default: -+ icr |= APIC_DM_FIXED | vector; -+ break; -+ case NMI_VECTOR: -+ icr |= APIC_DM_NMI; -+ break; -+ } -+ return icr; -+} -+ -+static inline int __prepare_ICR2 (unsigned int mask) -+{ -+ return SET_APIC_DEST_FIELD(mask); -+} -+ -+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); -+ -+static inline void __send_IPI_one(unsigned int cpu, int vector) -+{ -+ int irq = per_cpu(ipi_to_irq, cpu)[vector]; -+ BUG_ON(irq < 0); -+ notify_remote_via_irq(irq); -+} -+ -+void __send_IPI_shortcut(unsigned int shortcut, int vector) -+{ -+ int cpu; -+ -+ switch (shortcut) { -+ case APIC_DEST_SELF: -+ __send_IPI_one(smp_processor_id(), vector); -+ break; -+ case APIC_DEST_ALLBUT: -+ for (cpu = 0; cpu < NR_CPUS; ++cpu) { -+ if (cpu == smp_processor_id()) -+ continue; -+ if (cpu_isset(cpu, cpu_online_map)) { -+ __send_IPI_one(cpu, vector); -+ } -+ } -+ break; -+ default: -+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, -+ vector); -+ break; -+ } -+} -+ -+void fastcall send_IPI_self(int vector) -+{ -+ __send_IPI_shortcut(APIC_DEST_SELF, vector); -+} -+ -+/* -+ * This is only used on smaller machines. -+ */ -+void send_IPI_mask_bitmask(cpumask_t mask, int vector) -+{ -+ unsigned long flags; -+ unsigned int cpu; -+ -+ local_irq_save(flags); -+ WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]); -+ -+ for (cpu = 0; cpu < NR_CPUS; ++cpu) { -+ if (cpu_isset(cpu, mask)) { -+ __send_IPI_one(cpu, vector); -+ } -+ } -+ -+ local_irq_restore(flags); -+} -+ -+void send_IPI_mask_sequence(cpumask_t mask, int vector) -+{ -+ -+ send_IPI_mask_bitmask(mask, vector); -+} -+ -+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */ -+ -+#if 0 /* XEN */ -+/* -+ * Smarter SMP flushing macros. -+ * c/o Linus Torvalds. -+ * -+ * These mean you can really definitely utterly forget about -+ * writing to user space from interrupts. (Its not allowed anyway). -+ * -+ * Optimizations Manfred Spraul <manfred@colorfullife.com> -+ */ -+ -+static cpumask_t flush_cpumask; -+static struct mm_struct * flush_mm; -+static unsigned long flush_va; -+static DEFINE_SPINLOCK(tlbstate_lock); -+#define FLUSH_ALL 0xffffffff -+ -+/* -+ * We cannot call mmdrop() because we are in interrupt context, -+ * instead update mm->cpu_vm_mask. -+ * -+ * We need to reload %cr3 since the page tables may be going -+ * away from under us.. -+ */ -+static inline void leave_mm (unsigned long cpu) -+{ -+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) -+ BUG(); -+ cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); -+ load_cr3(swapper_pg_dir); -+} -+ -+/* -+ * -+ * The flush IPI assumes that a thread switch happens in this order: -+ * [cpu0: the cpu that switches] -+ * 1) switch_mm() either 1a) or 1b) -+ * 1a) thread switch to a different mm -+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); -+ * Stop ipi delivery for the old mm. This is not synchronized with -+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis -+ * for the wrong mm, and in the worst case we perform a superflous -+ * tlb flush. -+ * 1a2) set cpu_tlbstate to TLBSTATE_OK -+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 -+ * was in lazy tlb mode. -+ * 1a3) update cpu_tlbstate[].active_mm -+ * Now cpu0 accepts tlb flushes for the new mm. -+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); -+ * Now the other cpus will send tlb flush ipis. -+ * 1a4) change cr3. -+ * 1b) thread switch without mm change -+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles -+ * flush ipis. -+ * 1b1) set cpu_tlbstate to TLBSTATE_OK -+ * 1b2) test_and_set the cpu bit in cpu_vm_mask. -+ * Atomically set the bit [other cpus will start sending flush ipis], -+ * and test the bit. -+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb. -+ * 2) switch %%esp, ie current -+ * -+ * The interrupt must handle 2 special cases: -+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. -+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only -+ * runs in kernel space, the cpu could load tlb entries for user space -+ * pages. -+ * -+ * The good news is that cpu_tlbstate is local to each cpu, no -+ * write/read ordering problems. -+ */ -+ -+/* -+ * TLB flush IPI: -+ * -+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. -+ * 2) Leave the mm if we are in the lazy tlb mode. -+ */ -+ -+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, -+ struct pt_regs *regs) -+{ -+ unsigned long cpu; -+ -+ cpu = get_cpu(); -+ -+ if (!cpu_isset(cpu, flush_cpumask)) -+ goto out; -+ /* -+ * This was a BUG() but until someone can quote me the -+ * line from the intel manual that guarantees an IPI to -+ * multiple CPUs is retried _only_ on the erroring CPUs -+ * its staying as a return -+ * -+ * BUG(); -+ */ -+ -+ if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { -+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { -+ if (flush_va == FLUSH_ALL) -+ local_flush_tlb(); -+ else -+ __flush_tlb_one(flush_va); -+ } else -+ leave_mm(cpu); -+ } -+ smp_mb__before_clear_bit(); -+ cpu_clear(cpu, flush_cpumask); -+ smp_mb__after_clear_bit(); -+out: -+ put_cpu_no_resched(); -+ -+ return IRQ_HANDLED; -+} -+ -+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, -+ unsigned long va) -+{ -+ /* -+ * A couple of (to be removed) sanity checks: -+ * -+ * - current CPU must not be in mask -+ * - mask must exist :) -+ */ -+ BUG_ON(cpus_empty(cpumask)); -+ BUG_ON(cpu_isset(smp_processor_id(), cpumask)); -+ BUG_ON(!mm); -+ -+ /* If a CPU which we ran on has gone down, OK. */ -+ cpus_and(cpumask, cpumask, cpu_online_map); -+ if (cpus_empty(cpumask)) -+ return; -+ -+ /* -+ * i'm not happy about this global shared spinlock in the -+ * MM hot path, but we'll see how contended it is. -+ * Temporarily this turns IRQs off, so that lockups are -+ * detected by the NMI watchdog. -+ */ -+ spin_lock(&tlbstate_lock); -+ -+ flush_mm = mm; -+ flush_va = va; -+#if NR_CPUS <= BITS_PER_LONG -+ atomic_set_mask(cpumask, &flush_cpumask); -+#else -+ { -+ int k; -+ unsigned long *flush_mask = (unsigned long *)&flush_cpumask; -+ unsigned long *cpu_mask = (unsigned long *)&cpumask; -+ for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) -+ atomic_set_mask(cpu_mask[k], &flush_mask[k]); -+ } -+#endif -+ /* -+ * We have to send the IPI only to -+ * CPUs affected. -+ */ -+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); -+ -+ while (!cpus_empty(flush_cpumask)) -+ /* nothing. lockup detection does not belong here */ -+ mb(); -+ -+ flush_mm = NULL; -+ flush_va = 0; -+ spin_unlock(&tlbstate_lock); -+} -+ -+void flush_tlb_current_task(void) -+{ -+ struct mm_struct *mm = current->mm; -+ cpumask_t cpu_mask; -+ -+ preempt_disable(); -+ cpu_mask = mm->cpu_vm_mask; -+ cpu_clear(smp_processor_id(), cpu_mask); -+ -+ local_flush_tlb(); -+ if (!cpus_empty(cpu_mask)) -+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL); -+ preempt_enable(); -+} -+ -+void flush_tlb_mm (struct mm_struct * mm) -+{ -+ cpumask_t cpu_mask; -+ -+ preempt_disable(); -+ cpu_mask = mm->cpu_vm_mask; -+ cpu_clear(smp_processor_id(), cpu_mask); -+ -+ if (current->active_mm == mm) { -+ if (current->mm) -+ local_flush_tlb(); -+ else -+ leave_mm(smp_processor_id()); -+ } -+ if (!cpus_empty(cpu_mask)) -+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL); -+ -+ preempt_enable(); -+} -+ -+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ cpumask_t cpu_mask; -+ -+ preempt_disable(); -+ cpu_mask = mm->cpu_vm_mask; -+ cpu_clear(smp_processor_id(), cpu_mask); -+ -+ if (current->active_mm == mm) { -+ if(current->mm) -+ __flush_tlb_one(va); -+ else -+ leave_mm(smp_processor_id()); -+ } -+ -+ if (!cpus_empty(cpu_mask)) -+ flush_tlb_others(cpu_mask, mm, va); -+ -+ preempt_enable(); -+} -+EXPORT_SYMBOL(flush_tlb_page); -+ -+static void do_flush_tlb_all(void* info) -+{ -+ unsigned long cpu = smp_processor_id(); -+ -+ __flush_tlb_all(); -+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) -+ leave_mm(cpu); -+} -+ -+void flush_tlb_all(void) -+{ -+ on_each_cpu(do_flush_tlb_all, NULL, 1, 1); -+} -+ -+#endif /* XEN */ -+ -+/* -+ * this function sends a 'reschedule' IPI to another CPU. -+ * it goes straight through and wastes no time serializing -+ * anything. Worst case is that we lose a reschedule ... -+ */ -+void smp_send_reschedule(int cpu) -+{ -+ WARN_ON(cpu_is_offline(cpu)); -+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -+} -+ -+/* -+ * Structure and data for smp_call_function(). This is designed to minimise -+ * static memory requirements. It also looks cleaner. -+ */ -+static DEFINE_SPINLOCK(call_lock); -+ -+struct call_data_struct { -+ void (*func) (void *info); -+ void *info; -+ atomic_t started; -+ atomic_t finished; -+ int wait; -+}; -+ -+void lock_ipi_call_lock(void) -+{ -+ spin_lock_irq(&call_lock); -+} -+ -+void unlock_ipi_call_lock(void) -+{ -+ spin_unlock_irq(&call_lock); -+} -+ -+static struct call_data_struct *call_data; -+ -+/** -+ * smp_call_function(): Run a function on all other CPUs. -+ * @func: The function to run. This must be fast and non-blocking. -+ * @info: An arbitrary pointer to pass to the function. -+ * @nonatomic: currently unused. -+ * @wait: If true, wait (atomically) until function has completed on other CPUs. -+ * -+ * Returns 0 on success, else a negative status code. Does not return until -+ * remote CPUs are nearly ready to execute <<func>> or are or have executed. -+ * -+ * You must not call this function with disabled interrupts or from a -+ * hardware interrupt handler or from a bottom half handler. -+ */ -+int smp_call_function (void (*func) (void *info), void *info, int nonatomic, -+ int wait) -+{ -+ struct call_data_struct data; -+ int cpus; -+ -+ /* Holding any lock stops cpus from going down. */ -+ spin_lock(&call_lock); -+ cpus = num_online_cpus() - 1; -+ if (!cpus) { -+ spin_unlock(&call_lock); -+ return 0; -+ } -+ -+ /* Can deadlock when called with interrupts disabled */ -+ WARN_ON(irqs_disabled()); -+ -+ data.func = func; -+ data.info = info; -+ atomic_set(&data.started, 0); -+ data.wait = wait; -+ if (wait) -+ atomic_set(&data.finished, 0); -+ -+ call_data = &data; -+ mb(); -+ -+ /* Send a message to all other CPUs and wait for them to respond */ -+ send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ -+ /* Wait for response */ -+ while (atomic_read(&data.started) != cpus) -+ cpu_relax(); -+ -+ if (wait) -+ while (atomic_read(&data.finished) != cpus) -+ cpu_relax(); -+ spin_unlock(&call_lock); -+ -+ return 0; -+} -+EXPORT_SYMBOL(smp_call_function); -+ -+static void stop_this_cpu (void * dummy) -+{ -+ /* -+ * Remove this CPU: -+ */ -+ cpu_clear(smp_processor_id(), cpu_online_map); -+ local_irq_disable(); -+ disable_all_local_evtchn(); -+ if (cpu_data[smp_processor_id()].hlt_works_ok) -+ for(;;) halt(); -+ for (;;); -+} -+ -+/* -+ * this function calls the 'stop' function on all other CPUs in the system. -+ */ -+ -+void smp_send_stop(void) -+{ -+ smp_call_function(stop_this_cpu, NULL, 1, 0); -+ -+ local_irq_disable(); -+ disable_all_local_evtchn(); -+ local_irq_enable(); -+} -+ -+/* -+ * Reschedule call back. Nothing to do, -+ * all the work is done automatically when -+ * we return from the interrupt. -+ */ -+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, -+ struct pt_regs *regs) -+{ -+ -+ return IRQ_HANDLED; -+} -+ -+#include <linux/kallsyms.h> -+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, -+ struct pt_regs *regs) -+{ -+ void (*func) (void *info) = call_data->func; -+ void *info = call_data->info; -+ int wait = call_data->wait; -+ -+ /* -+ * Notify initiating CPU that I've grabbed the data and am -+ * about to execute the function -+ */ -+ mb(); -+ atomic_inc(&call_data->started); -+ /* -+ * At this point the info structure may be out of scope unless wait==1 -+ */ -+ irq_enter(); -+ (*func)(info); -+ irq_exit(); -+ -+ if (wait) { -+ mb(); -+ atomic_inc(&call_data->finished); -+ } -+ -+ return IRQ_HANDLED; -+} -+ ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head/arch/x86/kernel/time-xen.c 2010-08-31 09:24:21.000000000 +0200 @@ -0,0 +1,1242 @@ +/* @@ -10970,801 +10360,6 @@ pick them up (for reference, prefixed with the version the removal occured): + clear_page(v); +} --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ head/arch/x86/mm/fault_32-xen.c 2010-09-23 15:39:04.000000000 +0200 -@@ -0,0 +1,792 @@ -+/* -+ * linux/arch/i386/mm/fault.c -+ * -+ * Copyright (C) 1995 Linus Torvalds -+ */ -+ -+#include <linux/signal.h> -+#include <linux/sched.h> -+#include <linux/kernel.h> -+#include <linux/errno.h> -+#include <linux/string.h> -+#include <linux/types.h> -+#include <linux/ptrace.h> -+#include <linux/mman.h> -+#include <linux/mm.h> -+#include <linux/smp.h> -+#include <linux/smp_lock.h> -+#include <linux/interrupt.h> -+#include <linux/init.h> -+#include <linux/tty.h> -+#include <linux/vt_kern.h> /* For unblank_screen() */ -+#include <linux/highmem.h> -+#include <linux/module.h> -+#include <linux/kprobes.h> -+ -+#include <asm/system.h> -+#include <asm/uaccess.h> -+#include <asm/desc.h> -+#include <asm/kdebug.h> -+ -+extern void die(const char *,struct pt_regs *,long); -+ -+#ifdef CONFIG_KPROBES -+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); -+int register_page_fault_notifier(struct notifier_block *nb) -+{ -+ vmalloc_sync_all(); -+ return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -+} -+ -+int unregister_page_fault_notifier(struct notifier_block *nb) -+{ -+ return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -+} -+ -+static inline int notify_page_fault(enum die_val val, const char *str, -+ struct pt_regs *regs, long err, int trap, int sig) -+{ -+ struct die_args args = { -+ .regs = regs, -+ .str = str, -+ .err = err, -+ .trapnr = trap, -+ .signr = sig -+ }; -+ return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); -+} -+#else -+static inline int notify_page_fault(enum die_val val, const char *str, -+ struct pt_regs *regs, long err, int trap, int sig) -+{ -+ return NOTIFY_DONE; -+} -+#endif -+ -+ -+/* -+ * Unlock any spinlocks which will prevent us from getting the -+ * message out -+ */ -+void bust_spinlocks(int yes) -+{ -+ int loglevel_save = console_loglevel; -+ -+ if (yes) { -+ oops_in_progress = 1; -+ return; -+ } -+#ifdef CONFIG_VT -+ unblank_screen(); -+#endif -+ oops_in_progress = 0; -+ /* -+ * OK, the message is on the console. Now we call printk() -+ * without oops_in_progress set so that printk will give klogd -+ * a poke. Hold onto your hats... -+ */ -+ console_loglevel = 15; /* NMI oopser may have shut the console up */ -+ printk(" "); -+ console_loglevel = loglevel_save; -+} -+ -+/* -+ * Return EIP plus the CS segment base. The segment limit is also -+ * adjusted, clamped to the kernel/user address space (whichever is -+ * appropriate), and returned in *eip_limit. -+ * -+ * The segment is checked, because it might have been changed by another -+ * task between the original faulting instruction and here. -+ * -+ * If CS is no longer a valid code segment, or if EIP is beyond the -+ * limit, or if it is a kernel address when CS is not a kernel segment, -+ * then the returned value will be greater than *eip_limit. -+ * -+ * This is slow, but is very rarely executed. -+ */ -+static inline unsigned long get_segment_eip(struct pt_regs *regs, -+ unsigned long *eip_limit) -+{ -+ unsigned long eip = regs->eip; -+ unsigned seg = regs->xcs & 0xffff; -+ u32 seg_ar, seg_limit, base, *desc; -+ -+ /* Unlikely, but must come before segment checks. */ -+ if (unlikely(regs->eflags & VM_MASK)) { -+ base = seg << 4; -+ *eip_limit = base + 0xffff; -+ return base + (eip & 0xffff); -+ } -+ -+ /* The standard kernel/user address space limit. */ -+ *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg; -+ -+ /* By far the most common cases. */ -+ if (likely(seg == __USER_CS || seg == GET_KERNEL_CS())) -+ return eip; -+ -+ /* Check the segment exists, is within the current LDT/GDT size, -+ that kernel/user (ring 0..3) has the appropriate privilege, -+ that it's a code segment, and get the limit. */ -+ __asm__ ("larl %3,%0; lsll %3,%1" -+ : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); -+ if ((~seg_ar & 0x9800) || eip > seg_limit) { -+ *eip_limit = 0; -+ return 1; /* So that returned eip > *eip_limit. */ -+ } -+ -+ /* Get the GDT/LDT descriptor base. -+ When you look for races in this code remember that -+ LDT and other horrors are only used in user space. */ -+ if (seg & (1<<2)) { -+ /* Must lock the LDT while reading it. */ -+ down(¤t->mm->context.sem); -+ desc = current->mm->context.ldt; -+ desc = (void *)desc + (seg & ~7); -+ } else { -+ /* Must disable preemption while reading the GDT. */ -+ desc = (u32 *)get_cpu_gdt_table(get_cpu()); -+ desc = (void *)desc + (seg & ~7); -+ } -+ -+ /* Decode the code segment base from the descriptor */ -+ base = get_desc_base((unsigned long *)desc); -+ -+ if (seg & (1<<2)) { -+ up(¤t->mm->context.sem); -+ } else -+ put_cpu(); -+ -+ /* Adjust EIP and segment limit, and clamp at the kernel limit. -+ It's legitimate for segments to wrap at 0xffffffff. */ -+ seg_limit += base; -+ if (seg_limit < *eip_limit && seg_limit >= base) -+ *eip_limit = seg_limit; -+ return eip + base; -+} -+ -+/* -+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. -+ * Check that here and ignore it. -+ */ -+static int __is_prefetch(struct pt_regs *regs, unsigned long addr) -+{ -+ unsigned long limit; -+ unsigned long instr = get_segment_eip (regs, &limit); -+ int scan_more = 1; -+ int prefetch = 0; -+ int i; -+ -+ for (i = 0; scan_more && i < 15; i++) { -+ unsigned char opcode; -+ unsigned char instr_hi; -+ unsigned char instr_lo; -+ -+ if (instr > limit) -+ break; -+ if (__get_user(opcode, (unsigned char __user *) instr)) -+ break; -+ -+ instr_hi = opcode & 0xf0; -+ instr_lo = opcode & 0x0f; -+ instr++; -+ -+ switch (instr_hi) { -+ case 0x20: -+ case 0x30: -+ /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ -+ scan_more = ((instr_lo & 7) == 0x6); -+ break; -+ -+ case 0x60: -+ /* 0x64 thru 0x67 are valid prefixes in all modes. */ -+ scan_more = (instr_lo & 0xC) == 0x4; -+ break; -+ case 0xF0: -+ /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ -+ scan_more = !instr_lo || (instr_lo>>1) == 1; -+ break; -+ case 0x00: -+ /* Prefetch instruction is 0x0F0D or 0x0F18 */ -+ scan_more = 0; -+ if (instr > limit) -+ break; -+ if (__get_user(opcode, (unsigned char __user *) instr)) -+ break; -+ prefetch = (instr_lo == 0xF) && -+ (opcode == 0x0D || opcode == 0x18); -+ break; -+ default: -+ scan_more = 0; -+ break; -+ } -+ } -+ return prefetch; -+} -+ -+static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, -+ unsigned long error_code) -+{ -+ if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && -+ boot_cpu_data.x86 >= 6)) { -+ /* Catch an obscure case of prefetch inside an NX page. */ -+ if (nx_enabled && (error_code & 16)) -+ return 0; -+ return __is_prefetch(regs, addr); -+ } -+ return 0; -+} -+ -+static noinline void force_sig_info_fault(int si_signo, int si_code, -+ unsigned long address, struct task_struct *tsk) -+{ -+ siginfo_t info; -+ -+ info.si_signo = si_signo; -+ info.si_errno = 0; -+ info.si_code = si_code; -+ info.si_addr = (void __user *)address; -+ force_sig_info(si_signo, &info, tsk); -+} -+ -+fastcall void do_invalid_op(struct pt_regs *, unsigned long); -+ -+#ifdef CONFIG_X86_PAE -+static void dump_fault_path(unsigned long address) -+{ -+ unsigned long *p, page; -+ unsigned long mfn; -+ -+ page = read_cr3(); -+ p = (unsigned long *)__va(page); -+ p += (address >> 30) * 2; -+ printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]); -+ if (p[0] & _PAGE_PRESENT) { -+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); -+ page = mfn_to_pfn(mfn) << PAGE_SHIFT; -+ p = (unsigned long *)__va(page); -+ address &= 0x3fffffff; -+ p += (address >> 21) * 2; -+ printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n", -+ page, p[1], p[0]); -+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); -+#ifdef CONFIG_HIGHPTE -+ if (mfn_to_pfn(mfn) >= highstart_pfn) -+ return; -+#endif -+ if (p[0] & _PAGE_PRESENT) { -+ page = mfn_to_pfn(mfn) << PAGE_SHIFT; -+ p = (unsigned long *) __va(page); -+ address &= 0x001fffff; -+ p += (address >> 12) * 2; -+ printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n", -+ page, p[1], p[0]); -+ } -+ } -+} -+#else -+static void dump_fault_path(unsigned long address) -+{ -+ unsigned long page; -+ -+ page = read_cr3(); -+ page = ((unsigned long *) __va(page))[address >> 22]; -+ if (oops_may_print()) -+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, -+ machine_to_phys(page)); -+ /* -+ * We must not directly access the pte in the highpte -+ * case if the page table is located in highmem. -+ * And lets rather not kmap-atomic the pte, just in case -+ * it's allocated already. -+ */ -+#ifdef CONFIG_HIGHPTE -+ if ((page >> PAGE_SHIFT) >= highstart_pfn) -+ return; -+#endif -+ if ((page & 1) && oops_may_print()) { -+ page &= PAGE_MASK; -+ address &= 0x003ff000; -+ page = machine_to_phys(page); -+ page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; -+ printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, -+ machine_to_phys(page)); -+ } -+} -+#endif -+ -+static int spurious_fault(struct pt_regs *regs, -+ unsigned long address, -+ unsigned long error_code) -+{ -+ pgd_t *pgd; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ /* Reserved-bit violation or user access to kernel space? */ -+ if (error_code & 0x0c) -+ return 0; -+ -+ pgd = init_mm.pgd + pgd_index(address); -+ if (!pgd_present(*pgd)) -+ return 0; -+ -+ pud = pud_offset(pgd, address); -+ if (!pud_present(*pud)) -+ return 0; -+ -+ pmd = pmd_offset(pud, address); -+ if (!pmd_present(*pmd)) -+ return 0; -+ -+ pte = pte_offset_kernel(pmd, address); -+ if (!pte_present(*pte)) -+ return 0; -+ if ((error_code & 0x02) && !pte_write(*pte)) -+ return 0; -+#ifdef CONFIG_X86_PAE -+ if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX)) -+ return 0; -+#endif -+ -+ return 1; -+} -+ -+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -+{ -+ unsigned index = pgd_index(address); -+ pgd_t *pgd_k; -+ pud_t *pud, *pud_k; -+ pmd_t *pmd, *pmd_k; -+ -+ pgd += index; -+ pgd_k = init_mm.pgd + index; -+ -+ if (!pgd_present(*pgd_k)) -+ return NULL; -+ -+ /* -+ * set_pgd(pgd, *pgd_k); here would be useless on PAE -+ * and redundant with the set_pmd() on non-PAE. As would -+ * set_pud. -+ */ -+ -+ pud = pud_offset(pgd, address); -+ pud_k = pud_offset(pgd_k, address); -+ if (!pud_present(*pud_k)) -+ return NULL; -+ -+ pmd = pmd_offset(pud, address); -+ pmd_k = pmd_offset(pud_k, address); -+ if (!pmd_present(*pmd_k)) -+ return NULL; -+ if (!pmd_present(*pmd)) -+#if CONFIG_XEN_COMPAT > 0x030002 -+ set_pmd(pmd, *pmd_k); -+#else -+ /* -+ * When running on older Xen we must launder *pmd_k through -+ * pmd_val() to ensure that _PAGE_PRESENT is correctly set. -+ */ -+ set_pmd(pmd, __pmd(pmd_val(*pmd_k))); -+#endif -+ else -+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); -+ return pmd_k; -+} -+ -+/* -+ * Handle a fault on the vmalloc or module mapping area -+ * -+ * This assumes no large pages in there. -+ */ -+static inline int vmalloc_fault(unsigned long address) -+{ -+ unsigned long pgd_paddr; -+ pmd_t *pmd_k; -+ pte_t *pte_k; -+ /* -+ * Synchronize this task's top level page-table -+ * with the 'reference' page table. -+ * -+ * Do _not_ use "current" here. We might be inside -+ * an interrupt in the middle of a task switch.. -+ */ -+ pgd_paddr = read_cr3(); -+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); -+ if (!pmd_k) -+ return -1; -+ pte_k = pte_offset_kernel(pmd_k, address); -+ if (!pte_present(*pte_k)) -+ return -1; -+ return 0; -+} -+ -+/* -+ * This routine handles page faults. It determines the address, -+ * and the problem, and then passes it off to one of the appropriate -+ * routines. -+ * -+ * error_code: -+ * bit 0 == 0 means no page found, 1 means protection fault -+ * bit 1 == 0 means read, 1 means write -+ * bit 2 == 0 means kernel, 1 means user-mode -+ * bit 3 == 1 means use of reserved bit detected -+ * bit 4 == 1 means fault was an instruction fetch -+ */ -+fastcall void __kprobes do_page_fault(struct pt_regs *regs, -+ unsigned long error_code) -+{ -+ struct task_struct *tsk; -+ struct mm_struct *mm; -+ struct vm_area_struct * vma; -+ unsigned long address; -+ int write, si_code; -+ -+ /* get the address */ -+ address = read_cr2(); -+ -+ /* Set the "privileged fault" bit to something sane. */ -+ error_code &= ~4; -+ error_code |= (regs->xcs & 2) << 1; -+ if (regs->eflags & X86_EFLAGS_VM) -+ error_code |= 4; -+ -+ tsk = current; -+ -+ si_code = SEGV_MAPERR; -+ -+ /* -+ * We fault-in kernel-space virtual memory on-demand. The -+ * 'reference' page table is init_mm.pgd. -+ * -+ * NOTE! We MUST NOT take any locks for this case. We may -+ * be in an interrupt or a critical region, and should -+ * only copy the information from the master page table, -+ * nothing more. -+ * -+ * This verifies that the fault happens in kernel space -+ * (error_code & 4) == 0, and that the fault was not a -+ * protection error (error_code & 9) == 0. -+ */ -+ if (unlikely(address >= TASK_SIZE)) { -+#ifdef CONFIG_XEN -+ /* Faults in hypervisor area can never be patched up. */ -+ if (address >= hypervisor_virt_start) -+ goto bad_area_nosemaphore; -+#endif -+ if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) -+ return; -+ /* Can take a spurious fault if mapping changes R/O -> R/W. */ -+ if (spurious_fault(regs, address, error_code)) -+ return; -+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, -+ SIGSEGV) == NOTIFY_STOP) -+ return; -+ /* -+ * Don't take the mm semaphore here. If we fixup a prefetch -+ * fault we could otherwise deadlock. -+ */ -+ goto bad_area_nosemaphore; -+ } -+ -+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, -+ SIGSEGV) == NOTIFY_STOP) -+ return; -+ -+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc -+ fault has been handled. */ -+ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) -+ local_irq_enable(); -+ -+ mm = tsk->mm; -+ -+ /* -+ * If we're in an interrupt, have no user context or are running in an -+ * atomic region then we must not take the fault.. -+ */ -+ if (in_atomic() || !mm) -+ goto bad_area_nosemaphore; -+ -+ /* When running in the kernel we expect faults to occur only to -+ * addresses in user space. All other faults represent errors in the -+ * kernel and should generate an OOPS. Unfortunatly, in the case of an -+ * erroneous fault occurring in a code path which already holds mmap_sem -+ * we will deadlock attempting to validate the fault against the -+ * address space. Luckily the kernel only validly references user -+ * space from well defined areas of code, which are listed in the -+ * exceptions table. -+ * -+ * As the vast majority of faults will be valid we will only perform -+ * the source reference check when there is a possibilty of a deadlock. -+ * Attempt to lock the address space, if we cannot we then validate the -+ * source. If this is invalid we can skip the address space check, -+ * thus avoiding the deadlock. -+ */ -+ if (!down_read_trylock(&mm->mmap_sem)) { -+ if ((error_code & 4) == 0 && -+ !search_exception_tables(regs->eip)) -+ goto bad_area_nosemaphore; -+ down_read(&mm->mmap_sem); -+ } -+ -+ vma = find_vma(mm, address); -+ if (!vma) -+ goto bad_area; -+ if (vma->vm_start <= address) -+ goto good_area; -+ if (!(vma->vm_flags & VM_GROWSDOWN)) -+ goto bad_area; -+ if (error_code & 4) { -+ /* -+ * Accessing the stack below %esp is always a bug. -+ * The large cushion allows instructions like enter -+ * and pusha to work. ("enter $65535,$31" pushes -+ * 32 pointers and then decrements %esp by 65535.) -+ */ -+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) -+ goto bad_area; -+ } -+ if (expand_stack(vma, address)) -+ goto bad_area; -+/* -+ * Ok, we have a good vm_area for this memory access, so -+ * we can handle it.. -+ */ -+good_area: -+ si_code = SEGV_ACCERR; -+ write = 0; -+ switch (error_code & 3) { -+ default: /* 3: write, present */ -+#ifdef TEST_VERIFY_AREA -+ if (regs->cs == GET_KERNEL_CS()) -+ printk("WP fault at %08lx\n", regs->eip); -+#endif -+ /* fall through */ -+ case 2: /* write, not present */ -+ if (!(vma->vm_flags & VM_WRITE)) -+ goto bad_area; -+ write++; -+ break; -+ case 1: /* read, present */ -+ goto bad_area; -+ case 0: /* read, not present */ -+ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) -+ goto bad_area; -+ } -+ -+ survive: -+ /* -+ * If for any reason at all we couldn't handle the fault, -+ * make sure we exit gracefully rather than endlessly redo -+ * the fault. -+ */ -+ switch (handle_mm_fault(mm, vma, address, write)) { -+ case VM_FAULT_MINOR: -+ tsk->min_flt++; -+ break; -+ case VM_FAULT_MAJOR: -+ tsk->maj_flt++; -+ break; -+ case VM_FAULT_SIGBUS: -+ goto do_sigbus; -+ case VM_FAULT_OOM: -+ goto out_of_memory; -+ default: -+ BUG(); -+ } -+ -+ /* -+ * Did it hit the DOS screen memory VA from vm86 mode? -+ */ -+ if (regs->eflags & VM_MASK) { -+ unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; -+ if (bit < 32) -+ tsk->thread.screen_bitmap |= 1 << bit; -+ } -+ up_read(&mm->mmap_sem); -+ return; -+ -+/* -+ * Something tried to access memory that isn't in our memory map.. -+ * Fix it, but check if it's kernel or user first.. -+ */ -+bad_area: -+ up_read(&mm->mmap_sem); -+ -+bad_area_nosemaphore: -+ /* User mode accesses just cause a SIGSEGV */ -+ if (error_code & 4) { -+ /* -+ * Valid to do another page fault here because this one came -+ * from user space. -+ */ -+ if (is_prefetch(regs, address, error_code)) -+ return; -+ -+ tsk->thread.cr2 = address; -+ /* Kernel addresses are always protection faults */ -+ tsk->thread.error_code = error_code | (address >= TASK_SIZE); -+ tsk->thread.trap_no = 14; -+ force_sig_info_fault(SIGSEGV, si_code, address, tsk); -+ return; -+ } -+ -+#ifdef CONFIG_X86_F00F_BUG -+ /* -+ * Pentium F0 0F C7 C8 bug workaround. -+ */ -+ if (boot_cpu_data.f00f_bug) { -+ unsigned long nr; -+ -+ nr = (address - idt_descr.address) >> 3; -+ -+ if (nr == 6) { -+ do_invalid_op(regs, 0); -+ return; -+ } -+ } -+#endif -+ -+no_context: -+ /* Are we prepared to handle this kernel fault? */ -+ if (fixup_exception(regs)) -+ return; -+ -+ /* -+ * Valid to do another page fault here, because if this fault -+ * had been triggered by is_prefetch fixup_exception would have -+ * handled it. -+ */ -+ if (is_prefetch(regs, address, error_code)) -+ return; -+ -+/* -+ * Oops. The kernel tried to access some bad page. We'll have to -+ * terminate things with extreme prejudice. -+ */ -+ -+ bust_spinlocks(1); -+ -+ if (oops_may_print()) { -+ #ifdef CONFIG_X86_PAE -+ if (error_code & 16) { -+ pte_t *pte = lookup_address(address); -+ -+ if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) -+ printk(KERN_CRIT "kernel tried to execute " -+ "NX-protected page - exploit attempt? " -+ "(uid: %d)\n", current->uid); -+ } -+ #endif -+ if (address < PAGE_SIZE) -+ printk(KERN_ALERT "BUG: unable to handle kernel NULL " -+ "pointer dereference"); -+ else -+ printk(KERN_ALERT "BUG: unable to handle kernel paging" -+ " request"); -+ printk(" at virtual address %08lx\n",address); -+ printk(KERN_ALERT " printing eip:\n"); -+ printk("%08lx\n", regs->eip); -+ } -+ dump_fault_path(address); -+ tsk->thread.cr2 = address; -+ tsk->thread.trap_no = 14; -+ tsk->thread.error_code = error_code; -+ die("Oops", regs, error_code); -+ bust_spinlocks(0); -+ do_exit(SIGKILL); -+ -+/* -+ * We ran out of memory, or some other thing happened to us that made -+ * us unable to handle the page fault gracefully. -+ */ -+out_of_memory: -+ up_read(&mm->mmap_sem); -+ if (tsk->pid == 1) { -+ yield(); -+ down_read(&mm->mmap_sem); -+ goto survive; -+ } -+ printk("VM: killing process %s\n", tsk->comm); -+ if (error_code & 4) -+ do_exit(SIGKILL); -+ goto no_context; -+ -+do_sigbus: -+ up_read(&mm->mmap_sem); -+ -+ /* Kernel mode? Handle exceptions or die */ -+ if (!(error_code & 4)) -+ goto no_context; -+ -+ /* User space => ok to do another page fault */ -+ if (is_prefetch(regs, address, error_code)) -+ return; -+ -+ tsk->thread.cr2 = address; -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = 14; -+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); -+} -+ -+#if !HAVE_SHARED_KERNEL_PMD -+void vmalloc_sync_all(void) -+{ -+ /* -+ * Note that races in the updates of insync and start aren't -+ * problematic: insync can only get set bits added, and updates to -+ * start are only improving performance (without affecting correctness -+ * if undone). -+ * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. -+ * This change works just fine with 2-level paging too. -+ */ -+#define sync_index(a) ((a) >> PMD_SHIFT) -+ static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); -+ static unsigned long start = TASK_SIZE; -+ unsigned long address; -+ -+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); -+ for (address = start; -+ address >= TASK_SIZE && address < hypervisor_virt_start; -+ address += 1UL << PMD_SHIFT) { -+ if (!test_bit(sync_index(address), insync)) { -+ unsigned long flags; -+ struct page *page; -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ /* XEN: failure path assumes non-empty pgd_list. */ -+ if (unlikely(!pgd_list)) { -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ return; -+ } -+ for (page = pgd_list; page; page = -+ (struct page *)page->index) { -+ spinlock_t *lock = page->mapping -+ ? &((struct mm_struct *)page->mapping) -+ ->page_table_lock -+ : NULL; -+ pmd_t *pmd; -+ -+ if (lock) -+ spin_lock(lock); -+ pmd = vmalloc_sync_one(page_address(page), -+ address); -+ if (lock) -+ spin_unlock(lock); -+ -+ if (!pmd) { -+ BUG_ON(page != pgd_list); -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ if (!page) -+ set_bit(sync_index(address), insync); -+ } -+ if (address == start && test_bit(sync_index(address), insync)) -+ start = address + (1UL << PMD_SHIFT); -+ } -+} -+#endif ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head/arch/x86/mm/highmem_32-xen.c 2008-10-29 09:55:56.000000000 +0100 @@ -0,0 +1,183 @@ +#include <linux/highmem.h> @@ -23549,7 +22144,7 @@ pick them up (for reference, prefixed with the version the removal occured): +device_initcall(add_pcspkr); +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ head/arch/x86/kernel/smp_64-xen.c 2008-04-02 12:34:02.000000000 +0200 ++++ head/arch/x86/kernel/smp-xen.c 2008-04-02 12:34:02.000000000 +0200 @@ -0,0 +1,575 @@ +/* + * Intel SMP support routines. @@ -25534,7 +24129,7 @@ pick them up (for reference, prefixed with the version the removal occured): + +__initcall(vsyscall_init); --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ head/arch/x86/mm/fault_64-xen.c 2011-10-07 10:44:29.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-10-07 10:44:29.000000000 +0200 @@ -0,0 +1,731 @@ +/* + * linux/arch/x86-64/mm/fault.c diff --git a/patches.xen/xen3-auto-xen-kconfig.diff b/patches.xen/xen3-auto-xen-kconfig.diff index 5cced633e7..890caf9772 100644 --- a/patches.xen/xen3-auto-xen-kconfig.diff +++ b/patches.xen/xen3-auto-xen-kconfig.diff @@ -4,8 +4,8 @@ From: xen-devel@lists.xen.org Patch-mainline: n/a Acked-by: jbeulich@suse.com ---- head.orig/arch/x86/Kconfig 2013-05-23 08:57:02.000000000 +0200 -+++ head/arch/x86/Kconfig 2013-05-23 15:38:09.000000000 +0200 +--- head.orig/arch/x86/Kconfig 2013-07-02 09:20:54.000000000 +0200 ++++ head/arch/x86/Kconfig 2013-07-02 09:34:53.000000000 +0200 @@ -235,7 +235,17 @@ config X86_64_SMP config X86_HT @@ -320,7 +320,7 @@ Acked-by: jbeulich@suse.com ---help--- Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff -@@ -2332,7 +2405,9 @@ source "net/Kconfig" +@@ -2333,7 +2406,9 @@ source "net/Kconfig" source "drivers/Kconfig" @@ -330,14 +330,14 @@ Acked-by: jbeulich@suse.com source "fs/Kconfig" -@@ -2344,4 +2419,6 @@ source "crypto/Kconfig" +@@ -2345,4 +2420,6 @@ source "crypto/Kconfig" source "arch/x86/kvm/Kconfig" +source "drivers/xen/Kconfig" + source "lib/Kconfig" ---- head.orig/arch/x86/Kconfig.cpu 2013-02-19 00:58:34.000000000 +0100 +--- head.orig/arch/x86/Kconfig.cpu 2013-07-02 09:20:54.000000000 +0200 +++ head/arch/x86/Kconfig.cpu 2013-01-08 09:10:04.000000000 +0100 @@ -319,7 +319,7 @@ config X86_PPRO_FENCE @@ -356,7 +356,7 @@ Acked-by: jbeulich@suse.com config X86_CMPXCHG64 def_bool y ---- head.orig/arch/x86/Kconfig.debug 2013-05-23 08:55:27.000000000 +0200 +--- head.orig/arch/x86/Kconfig.debug 2013-07-02 09:20:54.000000000 +0200 +++ head/arch/x86/Kconfig.debug 2012-02-08 10:28:21.000000000 +0100 @@ -122,7 +122,7 @@ config DEBUG_NX_TEST config DOUBLEFAULT @@ -367,7 +367,7 @@ Acked-by: jbeulich@suse.com ---help--- This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this ---- head.orig/drivers/acpi/Kconfig 2013-05-23 08:55:33.000000000 +0200 +--- head.orig/drivers/acpi/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/acpi/Kconfig 2013-05-23 15:38:21.000000000 +0200 @@ -311,6 +311,7 @@ config ACPI_PCI_SLOT config X86_PM_TIMER @@ -391,7 +391,7 @@ Acked-by: jbeulich@suse.com + depends on (X86 || IA64) && XEN + default y endif # ACPI ---- head.orig/drivers/char/Kconfig 2013-05-23 08:57:02.000000000 +0200 +--- head.orig/drivers/char/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/char/Kconfig 2013-03-21 11:52:40.000000000 +0100 @@ -520,7 +520,7 @@ config MAX_RAW_DEVS config HPET @@ -402,7 +402,7 @@ Acked-by: jbeulich@suse.com help If you say Y here, you will have a miscdevice named "/dev/hpet/". Each open selects one of the timers supported by the HPET. The timers are ---- head.orig/drivers/char/tpm/Kconfig 2013-04-29 02:36:01.000000000 +0200 +--- head.orig/drivers/char/tpm/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/char/tpm/Kconfig 2013-03-21 11:52:29.000000000 +0100 @@ -91,4 +91,13 @@ config TCG_ST33_I2C To compile this driver as a module, choose M here; the module will be @@ -418,7 +418,7 @@ Acked-by: jbeulich@suse.com + will be called tpm_xenu. + endif # TCG_TPM ---- head.orig/drivers/cpufreq/Kconfig 2013-05-23 08:55:37.000000000 +0200 +--- head.orig/drivers/cpufreq/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/cpufreq/Kconfig 2011-06-30 15:36:01.000000000 +0200 @@ -2,6 +2,7 @@ menu "CPU Frequency scaling" @@ -428,7 +428,7 @@ Acked-by: jbeulich@suse.com help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because ---- head.orig/drivers/tty/serial/8250/Kconfig 2013-04-29 02:36:01.000000000 +0200 +--- head.orig/drivers/tty/serial/8250/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/tty/serial/8250/Kconfig 2012-02-08 10:28:04.000000000 +0100 @@ -5,6 +5,7 @@ @@ -438,7 +438,7 @@ Acked-by: jbeulich@suse.com select SERIAL_CORE ---help--- This selects whether you want to include the driver for the standard ---- head.orig/drivers/xen/Kconfig 2013-05-23 08:56:33.000000000 +0200 +--- head.orig/drivers/xen/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/drivers/xen/Kconfig 2013-05-23 15:38:14.000000000 +0200 @@ -1,9 +1,381 @@ +# @@ -848,7 +848,7 @@ Acked-by: jbeulich@suse.com If in doubt, say yes. config XEN_DEV_EVTCHN ---- head.orig/fs/Kconfig 2013-05-23 08:57:01.000000000 +0200 +--- head.orig/fs/Kconfig 2013-07-02 09:20:54.000000000 +0200 +++ head/fs/Kconfig 2013-01-30 10:32:38.000000000 +0100 @@ -160,6 +160,7 @@ config HUGETLBFS bool "HugeTLB file system support" @@ -858,7 +858,7 @@ Acked-by: jbeulich@suse.com help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read ---- head.orig/kernel/Kconfig.preempt 2013-05-23 08:57:01.000000000 +0200 +--- head.orig/kernel/Kconfig.preempt 2013-07-02 09:20:54.000000000 +0200 +++ head/kernel/Kconfig.preempt 2012-04-10 15:16:13.000000000 +0200 @@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY diff --git a/patches.xen/xen3-patch-2.6.19 b/patches.xen/xen3-patch-2.6.19 index 6f2aa53d4b..753551315f 100644 --- a/patches.xen/xen3-patch-2.6.19 +++ b/patches.xen/xen3-patch-2.6.19 @@ -1634,107 +1634,6 @@ Acked-by: jbeulich@novell.com #ifdef CONFIG_ACPI acpi_boot_init(); ---- head.orig/arch/x86/kernel/smp_32-xen.c 2007-12-10 08:47:31.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:29:16.000000000 +0100 -@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo - * 2) Leave the mm if we are in the lazy tlb mode. - */ - --irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, -- struct pt_regs *regs) -+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id) - { - unsigned long cpu; - -@@ -567,16 +566,14 @@ void smp_send_stop(void) - * all the work is done automatically when - * we return from the interrupt. - */ --irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, -- struct pt_regs *regs) -+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) - { - - return IRQ_HANDLED; - } - - #include <linux/kallsyms.h> --irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, -- struct pt_regs *regs) -+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) - { - void (*func) (void *info) = call_data->func; - void *info = call_data->info; -@@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt( - return IRQ_HANDLED; - } - -+/* -+ * this function sends a 'generic call function' IPI to one other CPU -+ * in the system. -+ * -+ * cpu is a standard Linux logical CPU number. -+ */ -+static void -+__smp_call_function_single(int cpu, void (*func) (void *info), void *info, -+ int nonatomic, int wait) -+{ -+ struct call_data_struct data; -+ int cpus = 1; -+ -+ data.func = func; -+ data.info = info; -+ atomic_set(&data.started, 0); -+ data.wait = wait; -+ if (wait) -+ atomic_set(&data.finished, 0); -+ -+ call_data = &data; -+ wmb(); -+ /* Send a message to all other CPUs and wait for them to respond */ -+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); -+ -+ /* Wait for response */ -+ while (atomic_read(&data.started) != cpus) -+ cpu_relax(); -+ -+ if (!wait) -+ return; -+ -+ while (atomic_read(&data.finished) != cpus) -+ cpu_relax(); -+} -+ -+/* -+ * smp_call_function_single - Run a function on another CPU -+ * @func: The function to run. This must be fast and non-blocking. -+ * @info: An arbitrary pointer to pass to the function. -+ * @nonatomic: Currently unused. -+ * @wait: If true, wait until function has completed on other CPUs. -+ * -+ * Retrurns 0 on success, else a negative status code. -+ * -+ * Does not return until the remote CPU is nearly ready to execute <func> -+ * or is or has executed. -+ */ -+ -+int smp_call_function_single(int cpu, void (*func) (void *info), void *info, -+ int nonatomic, int wait) -+{ -+ /* prevent preemption and reschedule on another processor */ -+ int me = get_cpu(); -+ if (cpu == me) { -+ WARN_ON(1); -+ put_cpu(); -+ return -EBUSY; -+ } -+ spin_lock_bh(&call_lock); -+ __smp_call_function_single(cpu, func, info, nonatomic, wait); -+ spin_unlock_bh(&call_lock); -+ put_cpu(); -+ return 0; -+} -+EXPORT_SYMBOL(smp_call_function_single); --- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:09:48.000000000 +0200 +++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:10:26.000000000 +0200 @@ -88,7 +88,6 @@ int pit_latch_buggy; /* ext @@ -2383,94 +2282,6 @@ Acked-by: jbeulich@novell.com if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { machine_to_phys_mapping = (unsigned long *)mapping.v_start; ---- head.orig/arch/x86/mm/fault_32-xen.c 2010-09-23 15:39:04.000000000 +0200 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:29:16.000000000 +0100 -@@ -27,21 +27,24 @@ - #include <asm/uaccess.h> - #include <asm/desc.h> - #include <asm/kdebug.h> -+#include <asm/segment.h> - - extern void die(const char *,struct pt_regs *,long); - --#ifdef CONFIG_KPROBES --ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); -+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); -+ - int register_page_fault_notifier(struct notifier_block *nb) - { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); - } -+EXPORT_SYMBOL_GPL(register_page_fault_notifier); - - int unregister_page_fault_notifier(struct notifier_block *nb) - { - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); - } -+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - - static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); - } --#else --static inline int notify_page_fault(enum die_val val, const char *str, -- struct pt_regs *regs, long err, int trap, int sig) --{ -- return NOTIFY_DONE; --} --#endif -- - - /* - * Unlock any spinlocks which will prevent us from getting the -@@ -119,10 +114,10 @@ static inline unsigned long get_segment_ - } - - /* The standard kernel/user address space limit. */ -- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg; -+ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; - - /* By far the most common cases. */ -- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS())) -+ if (likely(SEGMENT_IS_FLAT_CODE(seg))) - return eip; - - /* Check the segment exists, is within the current LDT/GDT size, -@@ -559,11 +554,7 @@ good_area: - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ --#ifdef TEST_VERIFY_AREA -- if (regs->cs == GET_KERNEL_CS()) -- printk("WP fault at %08lx\n", regs->eip); --#endif -- /* fall through */ -+ /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; -@@ -572,7 +563,7 @@ good_area: - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ -- if (!(vma->vm_flags & (VM_READ | VM_EXEC))) -+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } - -@@ -704,7 +695,7 @@ no_context: - */ - out_of_memory: - up_read(&mm->mmap_sem); -- if (tsk->pid == 1) { -+ if (is_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; --- head.orig/arch/x86/mm/highmem_32-xen.c 2008-10-29 09:55:56.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:29:16.000000000 +0100 @@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page * @@ -6484,8 +6295,8 @@ Acked-by: jbeulich@novell.com NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ ---- head.orig/arch/x86/kernel/smp_64-xen.c 2008-04-02 12:34:02.000000000 +0200 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:29:16.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2008-04-02 12:34:02.000000000 +0200 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:29:16.000000000 +0100 @@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v /* prevent preemption and reschedule on another processor */ int me = get_cpu(); @@ -7199,8 +7010,8 @@ Acked-by: jbeulich@novell.com return 0; } ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-10-07 10:44:29.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:24:01.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-10-07 10:44:29.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:24:01.000000000 +0200 @@ -40,8 +40,7 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) diff --git a/patches.xen/xen3-patch-2.6.20 b/patches.xen/xen3-patch-2.6.20 index 5e0b514c4c..aeaa15a02f 100644 --- a/patches.xen/xen3-patch-2.6.20 +++ b/patches.xen/xen3-patch-2.6.20 @@ -2688,19 +2688,6 @@ Acked-by: jbeulich@novell.com if (is_initial_xendomain()) { #ifdef CONFIG_VT ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:29:16.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:16.000000000 +0100 -@@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo - put_cpu(); - return -EBUSY; - } -+ -+ /* Can deadlock when called with interrupts disabled */ -+ WARN_ON(irqs_disabled()); -+ - spin_lock_bh(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock_bh(&call_lock); --- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:10:26.000000000 +0200 +++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:10:37.000000000 +0200 @@ -60,6 +60,7 @@ @@ -3119,52 +3106,6 @@ Acked-by: jbeulich@novell.com default y ---help--- Say Y here to get to see options for using your Linux host to run other ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:29:16.000000000 +0100 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:16.000000000 +0100 -@@ -22,9 +22,9 @@ - #include <linux/highmem.h> - #include <linux/module.h> - #include <linux/kprobes.h> -+#include <linux/uaccess.h> - - #include <asm/system.h> --#include <asm/uaccess.h> - #include <asm/desc.h> - #include <asm/kdebug.h> - #include <asm/segment.h> -@@ -167,7 +167,7 @@ static inline unsigned long get_segment_ - static int __is_prefetch(struct pt_regs *regs, unsigned long addr) - { - unsigned long limit; -- unsigned long instr = get_segment_eip (regs, &limit); -+ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); - int scan_more = 1; - int prefetch = 0; - int i; -@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs - unsigned char instr_hi; - unsigned char instr_lo; - -- if (instr > limit) -+ if (instr > (unsigned char *)limit) - break; -- if (__get_user(opcode, (unsigned char __user *) instr)) -+ if (probe_kernel_address(instr, opcode)) - break; - - instr_hi = opcode & 0xf0; -@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - scan_more = 0; -- if (instr > limit) -+ if (instr > (unsigned char *)limit) - break; -- if (__get_user(opcode, (unsigned char __user *) instr)) -+ if (probe_kernel_address(instr, opcode)) - break; - prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); --- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:29:16.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:16.000000000 +0100 @@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page * @@ -3769,8 +3710,8 @@ Acked-by: jbeulich@novell.com c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:29:16.000000000 +0100 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:16.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:29:16.000000000 +0100 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:16.000000000 +0100 @@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v put_cpu(); return 0; @@ -4059,8 +4000,8 @@ Acked-by: jbeulich@novell.com __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:24:01.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:24.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:24:01.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:24.000000000 +0200 @@ -23,9 +23,9 @@ #include <linux/compiler.h> #include <linux/module.h> diff --git a/patches.xen/xen3-patch-2.6.21 b/patches.xen/xen3-patch-2.6.21 index ff4c326d97..80fd967588 100644 --- a/patches.xen/xen3-patch-2.6.21 +++ b/patches.xen/xen3-patch-2.6.21 @@ -847,27 +847,6 @@ Acked-by: jbeulich@novell.com - * c-basic-offset:8 - * End: - */ ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:16.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:22.000000000 +0100 -@@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. -- * Temporarily this turns IRQs off, so that lockups are -- * detected by the NMI watchdog. -+ * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - -@@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c - - while (!cpus_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ -- mb(); -+ cpu_relax(); - - flush_mm = NULL; - flush_va = 0; --- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:10:37.000000000 +0200 +++ head/arch/x86/kernel/time-xen.c 2011-07-12 11:13:30.000000000 +0200 @@ -50,6 +50,7 @@ @@ -1480,78 +1459,6 @@ Acked-by: jbeulich@novell.com + return 1; +} +__setup("code_bytes=", code_bytes_setup); ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:16.000000000 +0100 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:22.000000000 +0100 -@@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc - } - EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - --static inline int notify_page_fault(enum die_val val, const char *str, -- struct pt_regs *regs, long err, int trap, int sig) -+static inline int notify_page_fault(struct pt_regs *regs, long err) - { - struct die_args args = { - .regs = regs, -- .str = str, -+ .str = "page fault", - .err = err, -- .trapnr = trap, -- .signr = sig -+ .trapnr = 14, -+ .signr = SIGSEGV - }; -- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); --} -- --/* -- * Unlock any spinlocks which will prevent us from getting the -- * message out -- */ --void bust_spinlocks(int yes) --{ -- int loglevel_save = console_loglevel; -- -- if (yes) { -- oops_in_progress = 1; -- return; -- } --#ifdef CONFIG_VT -- unblank_screen(); --#endif -- oops_in_progress = 0; -- /* -- * OK, the message is on the console. Now we call printk() -- * without oops_in_progress set so that printk will give klogd -- * a poke. Hold onto your hats... -- */ -- console_loglevel = 15; /* NMI oopser may have shut the console up */ -- printk(" "); -- console_loglevel = loglevel_save; -+ return atomic_notifier_call_chain(¬ify_page_fault_chain, -+ DIE_PAGE_FAULT, &args); - } - - /* -@@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st - /* Can take a spurious fault if mapping changes R/O -> R/W. */ - if (spurious_fault(regs, address, error_code)) - return; -- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, -- SIGSEGV) == NOTIFY_STOP) -+ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) - return; - /* - * Don't take the mm semaphore here. If we fixup a prefetch -@@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st - goto bad_area_nosemaphore; - } - -- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, -- SIGSEGV) == NOTIFY_STOP) -+ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) - return; - - /* It's safe to allow irq's after cr2 has been saved and the vmalloc --- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:16.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:22.000000000 +0100 @@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page * @@ -3146,8 +3053,8 @@ Acked-by: jbeulich@novell.com #endif on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); hotcpu_notifier(cpu_vsyscall_notifier, 0); ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:24.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:34.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:24.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:34.000000000 +0200 @@ -56,38 +56,17 @@ int unregister_page_fault_notifier(struc } EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); diff --git a/patches.xen/xen3-patch-2.6.22 b/patches.xen/xen3-patch-2.6.22 index 031b2a5b70..540716204d 100644 --- a/patches.xen/xen3-patch-2.6.22 +++ b/patches.xen/xen3-patch-2.6.22 @@ -796,328 +796,6 @@ Acked-by: jbeulich@novell.com return prev_p; } ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:22.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:29.000000000 +0100 -@@ -13,7 +13,6 @@ - #include <linux/mm.h> - #include <linux/delay.h> - #include <linux/spinlock.h> --#include <linux/smp_lock.h> - #include <linux/kernel_stat.h> - #include <linux/mc146818rtc.h> - #include <linux/cache.h> -@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask; - static struct mm_struct * flush_mm; - static unsigned long flush_va; - static DEFINE_SPINLOCK(tlbstate_lock); --#define FLUSH_ALL 0xffffffff - - /* - * We cannot call mmdrop() because we are in interrupt context, -@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { -- if (flush_va == FLUSH_ALL) -+ if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); -@@ -314,9 +312,11 @@ out: - return IRQ_HANDLED; - } - --static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, -- unsigned long va) -+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, -+ unsigned long va) - { -+ cpumask_t cpumask = *cpumaskp; -+ - /* - * A couple of (to be removed) sanity checks: - * -@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - -+#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); -- if (cpus_empty(cpumask)) -+ if (unlikely(cpus_empty(cpumask))) - return; -+#endif - - /* - * i'm not happy about this global shared spinlock in the -@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c - - flush_mm = mm; - flush_va = va; --#if NR_CPUS <= BITS_PER_LONG -- atomic_set_mask(cpumask, &flush_cpumask); --#else -- { -- int k; -- unsigned long *flush_mask = (unsigned long *)&flush_cpumask; -- unsigned long *cpu_mask = (unsigned long *)&cpumask; -- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) -- atomic_set_mask(cpu_mask[k], &flush_mask[k]); -- } --#endif -+ cpus_or(flush_cpumask, cpumask, flush_cpumask); - /* - * We have to send the IPI only to - * CPUs affected. -@@ -378,7 +370,7 @@ void flush_tlb_current_task(void) - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, FLUSH_ALL); -+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); - } - -@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm - leave_mm(smp_processor_id()); - } - if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, FLUSH_ALL); -+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); - } -@@ -446,7 +438,7 @@ void flush_tlb_all(void) - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ --void smp_send_reschedule(int cpu) -+void xen_smp_send_reschedule(int cpu) - { - WARN_ON(cpu_is_offline(cpu)); - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -@@ -478,36 +470,79 @@ void unlock_ipi_call_lock(void) - - static struct call_data_struct *call_data; - -+static void __smp_call_function(void (*func) (void *info), void *info, -+ int nonatomic, int wait) -+{ -+ struct call_data_struct data; -+ int cpus = num_online_cpus() - 1; -+ -+ if (!cpus) -+ return; -+ -+ data.func = func; -+ data.info = info; -+ atomic_set(&data.started, 0); -+ data.wait = wait; -+ if (wait) -+ atomic_set(&data.finished, 0); -+ -+ call_data = &data; -+ mb(); -+ -+ /* Send a message to all other CPUs and wait for them to respond */ -+ send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ -+ /* Wait for response */ -+ while (atomic_read(&data.started) != cpus) -+ cpu_relax(); -+ -+ if (wait) -+ while (atomic_read(&data.finished) != cpus) -+ cpu_relax(); -+} -+ -+ - /** -- * smp_call_function(): Run a function on all other CPUs. -+ * smp_call_function_mask(): Run a function on a set of other CPUs. -+ * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. -- * @nonatomic: currently unused. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * -- * Returns 0 on success, else a negative status code. Does not return until -- * remote CPUs are nearly ready to execute <<func>> or are or have executed. -+ * Returns 0 on success, else a negative status code. -+ * -+ * If @wait is true, then returns once @func has returned; otherwise -+ * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ --int smp_call_function (void (*func) (void *info), void *info, int nonatomic, -- int wait) -+int -+xen_smp_call_function_mask(cpumask_t mask, -+ void (*func)(void *), void *info, -+ int wait) - { - struct call_data_struct data; -+ cpumask_t allbutself; - int cpus; - -+ /* Can deadlock when called with interrupts disabled */ -+ WARN_ON(irqs_disabled()); -+ - /* Holding any lock stops cpus from going down. */ - spin_lock(&call_lock); -- cpus = num_online_cpus() - 1; -+ -+ allbutself = cpu_online_map; -+ cpu_clear(smp_processor_id(), allbutself); -+ -+ cpus_and(mask, mask, allbutself); -+ cpus = cpus_weight(mask); -+ - if (!cpus) { - spin_unlock(&call_lock); - return 0; - } - -- /* Can deadlock when called with interrupts disabled */ -- WARN_ON(irqs_disabled()); -- - data.func = func; - data.info = info; - atomic_set(&data.started, 0); -@@ -517,9 +552,12 @@ int smp_call_function (void (*func) (voi - - call_data = &data; - mb(); -- -- /* Send a message to all other CPUs and wait for them to respond */ -- send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ -+ /* Send a message to other CPUs */ -+ if (cpus_equal(mask, allbutself)) -+ send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ else -+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) -@@ -532,15 +570,14 @@ int smp_call_function (void (*func) (voi - - return 0; - } --EXPORT_SYMBOL(smp_call_function); - - static void stop_this_cpu (void * dummy) - { -+ local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); -- local_irq_disable(); - disable_all_local_evtchn(); - if (cpu_data[smp_processor_id()].hlt_works_ok) - for(;;) halt(); -@@ -551,13 +588,18 @@ static void stop_this_cpu (void * dummy) - * this function calls the 'stop' function on all other CPUs in the system. - */ - --void smp_send_stop(void) -+void xen_smp_send_stop(void) - { -- smp_call_function(stop_this_cpu, NULL, 1, 0); -+ /* Don't deadlock on the call lock in panic */ -+ int nolock = !spin_trylock(&call_lock); -+ unsigned long flags; - -- local_irq_disable(); -+ local_irq_save(flags); -+ __smp_call_function(stop_this_cpu, NULL, 0, 0); -+ if (!nolock) -+ spin_unlock(&call_lock); - disable_all_local_evtchn(); -- local_irq_enable(); -+ local_irq_restore(flags); - } - - /* -@@ -598,74 +640,3 @@ irqreturn_t smp_call_function_interrupt( - - return IRQ_HANDLED; - } -- --/* -- * this function sends a 'generic call function' IPI to one other CPU -- * in the system. -- * -- * cpu is a standard Linux logical CPU number. -- */ --static void --__smp_call_function_single(int cpu, void (*func) (void *info), void *info, -- int nonatomic, int wait) --{ -- struct call_data_struct data; -- int cpus = 1; -- -- data.func = func; -- data.info = info; -- atomic_set(&data.started, 0); -- data.wait = wait; -- if (wait) -- atomic_set(&data.finished, 0); -- -- call_data = &data; -- wmb(); -- /* Send a message to all other CPUs and wait for them to respond */ -- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); -- -- /* Wait for response */ -- while (atomic_read(&data.started) != cpus) -- cpu_relax(); -- -- if (!wait) -- return; -- -- while (atomic_read(&data.finished) != cpus) -- cpu_relax(); --} -- --/* -- * smp_call_function_single - Run a function on another CPU -- * @func: The function to run. This must be fast and non-blocking. -- * @info: An arbitrary pointer to pass to the function. -- * @nonatomic: Currently unused. -- * @wait: If true, wait until function has completed on other CPUs. -- * -- * Retrurns 0 on success, else a negative status code. -- * -- * Does not return until the remote CPU is nearly ready to execute <func> -- * or is or has executed. -- */ -- --int smp_call_function_single(int cpu, void (*func) (void *info), void *info, -- int nonatomic, int wait) --{ -- /* prevent preemption and reschedule on another processor */ -- int me = get_cpu(); -- if (cpu == me) { -- WARN_ON(1); -- put_cpu(); -- return -EBUSY; -- } -- -- /* Can deadlock when called with interrupts disabled */ -- WARN_ON(irqs_disabled()); -- -- spin_lock_bh(&call_lock); -- __smp_call_function_single(cpu, func, info, nonatomic, wait); -- spin_unlock_bh(&call_lock); -- put_cpu(); -- return 0; --} --EXPORT_SYMBOL(smp_call_function_single); --- head.orig/arch/x86/kernel/time-xen.c 2011-07-12 11:13:30.000000000 +0200 +++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:26:34.000000000 +0100 @@ -42,7 +42,6 @@ @@ -1529,128 +1207,6 @@ Acked-by: jbeulich@novell.com unsigned long base = (kesp - uesp) & -THREAD_SIZE; unsigned long new_kesp = kesp - base; unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:22.000000000 +0100 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:29.000000000 +0100 -@@ -14,19 +14,20 @@ - #include <linux/mman.h> - #include <linux/mm.h> - #include <linux/smp.h> --#include <linux/smp_lock.h> - #include <linux/interrupt.h> - #include <linux/init.h> - #include <linux/tty.h> - #include <linux/vt_kern.h> /* For unblank_screen() */ - #include <linux/highmem.h> -+#include <linux/bootmem.h> /* for max_low_pfn */ -+#include <linux/vmalloc.h> - #include <linux/module.h> - #include <linux/kprobes.h> - #include <linux/uaccess.h> -+#include <linux/kdebug.h> - - #include <asm/system.h> - #include <asm/desc.h> --#include <asm/kdebug.h> - #include <asm/segment.h> - - extern void die(const char *,struct pt_regs *,long); -@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon - unsigned long page; - - page = read_cr3(); -- page = ((unsigned long *) __va(page))[address >> 22]; -- if (oops_may_print()) -- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, -- machine_to_phys(page)); -+ page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT]; -+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, -+ machine_to_phys(page)); - /* - * We must not directly access the pte in the highpte - * case if the page table is located in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ --#ifdef CONFIG_HIGHPTE -- if ((page >> PAGE_SHIFT) >= highstart_pfn) -- return; --#endif -- if ((page & 1) && oops_may_print()) { -- page &= PAGE_MASK; -- address &= 0x003ff000; -- page = machine_to_phys(page); -- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; -+ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn -+ && (page & _PAGE_PRESENT)) { -+ page = machine_to_phys(page & PAGE_MASK); -+ page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) -+ & (PTRS_PER_PTE - 1)]; - printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, - machine_to_phys(page)); - } -@@ -581,6 +577,11 @@ bad_area: - bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { -+ /* -+ * It's possible to have interrupts off here. -+ */ -+ local_irq_enable(); -+ - /* - * Valid to do another page fault here because this one came - * from user space. -@@ -633,7 +634,7 @@ no_context: - bust_spinlocks(1); - - if (oops_may_print()) { -- #ifdef CONFIG_X86_PAE -+#ifdef CONFIG_X86_PAE - if (error_code & 16) { - pte_t *pte = lookup_address(address); - -@@ -642,7 +643,7 @@ no_context: - "NX-protected page - exploit attempt? " - "(uid: %d)\n", current->uid); - } -- #endif -+#endif - if (address < PAGE_SIZE) - printk(KERN_ALERT "BUG: unable to handle kernel NULL " - "pointer dereference"); -@@ -652,8 +653,8 @@ no_context: - printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); -+ dump_fault_path(address); - } -- dump_fault_path(address); - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; -@@ -694,7 +695,6 @@ do_sigbus: - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); - } - --#if !HAVE_SHARED_KERNEL_PMD - void vmalloc_sync_all(void) - { - /* -@@ -710,6 +710,9 @@ void vmalloc_sync_all(void) - static unsigned long start = TASK_SIZE; - unsigned long address; - -+ if (SHARED_KERNEL_PMD) -+ return; -+ - BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); - for (address = start; - address >= TASK_SIZE && address < hypervisor_virt_start; -@@ -752,4 +755,3 @@ void vmalloc_sync_all(void) - start = address + (1UL << PMD_SHIFT); - } - } --#endif --- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:22.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:29.000000000 +0100 @@ -26,7 +26,7 @@ void kunmap(struct page *page) @@ -2999,8 +2555,8 @@ Acked-by: jbeulich@novell.com }; ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:16.000000000 +0100 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:29.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:16.000000000 +0100 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:29.000000000 +0100 @@ -14,7 +14,6 @@ #include <linux/mm.h> #include <linux/delay.h> @@ -3339,8 +2895,8 @@ Acked-by: jbeulich@novell.com smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:34.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:41.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:34.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:27:41.000000000 +0200 @@ -15,22 +15,22 @@ #include <linux/mman.h> #include <linux/mm.h> diff --git a/patches.xen/xen3-patch-2.6.23 b/patches.xen/xen3-patch-2.6.23 index ec01a8f1e1..bbb858011b 100644 --- a/patches.xen/xen3-patch-2.6.23 +++ b/patches.xen/xen3-patch-2.6.23 @@ -405,32 +405,6 @@ Acked-by: jbeulich@novell.com if (is_initial_xendomain()) { #ifdef CONFIG_VT ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:29.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:49:31.000000000 +0100 -@@ -22,6 +22,7 @@ - - #include <asm/mtrr.h> - #include <asm/tlbflush.h> -+#include <asm/mmu_context.h> - #if 0 - #include <mach_apic.h> - #endif -@@ -217,13 +218,13 @@ static unsigned long flush_va; - static DEFINE_SPINLOCK(tlbstate_lock); - - /* -- * We cannot call mmdrop() because we are in interrupt context, -+ * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ --static inline void leave_mm (unsigned long cpu) -+void leave_mm(unsigned long cpu) - { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); --- head.orig/arch/x86/kernel/time-xen.c 2012-02-10 13:26:34.000000000 +0100 +++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:26:46.000000000 +0100 @@ -74,11 +74,12 @@ @@ -745,93 +719,6 @@ Acked-by: jbeulich@novell.com + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE)); + } } ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:29.000000000 +0100 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:49:31.000000000 +0100 -@@ -346,7 +346,10 @@ static inline pmd_t *vmalloc_sync_one(pg - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; -- if (!pmd_present(*pmd)) -+ if (!pmd_present(*pmd)) { -+ bool lazy = x86_read_percpu(xen_lazy_mmu); -+ -+ x86_write_percpu(xen_lazy_mmu, false); - #if CONFIG_XEN_COMPAT > 0x030002 - set_pmd(pmd, *pmd_k); - #else -@@ -356,7 +359,8 @@ static inline pmd_t *vmalloc_sync_one(pg - */ - set_pmd(pmd, __pmd(pmd_val(*pmd_k))); - #endif -- else -+ x86_write_percpu(xen_lazy_mmu, lazy); -+ } else - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - return pmd_k; - } -@@ -388,6 +392,8 @@ static inline int vmalloc_fault(unsigned - return 0; - } - -+int show_unhandled_signals = 1; -+ - /* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate -@@ -408,6 +414,7 @@ fastcall void __kprobes do_page_fault(st - struct vm_area_struct * vma; - unsigned long address; - int write, si_code; -+ int fault; - - /* get the address */ - address = read_cr2(); -@@ -541,20 +548,18 @@ good_area: - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ -- switch (handle_mm_fault(mm, vma, address, write)) { -- case VM_FAULT_MINOR: -- tsk->min_flt++; -- break; -- case VM_FAULT_MAJOR: -- tsk->maj_flt++; -- break; -- case VM_FAULT_SIGBUS: -- goto do_sigbus; -- case VM_FAULT_OOM: -+ fault = handle_mm_fault(mm, vma, address, write); -+ if (unlikely(fault & VM_FAULT_ERROR)) { -+ if (fault & VM_FAULT_OOM) - goto out_of_memory; -- default: -- BUG(); -+ else if (fault & VM_FAULT_SIGBUS) -+ goto do_sigbus; -+ BUG(); - } -+ if (fault & VM_FAULT_MAJOR) -+ tsk->maj_flt++; -+ else -+ tsk->min_flt++; - - /* - * Did it hit the DOS screen memory VA from vm86 mode? -@@ -589,6 +594,14 @@ bad_area_nosemaphore: - if (is_prefetch(regs, address, error_code)) - return; - -+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && -+ printk_ratelimit()) { -+ printk("%s%s[%d]: segfault at %08lx eip %08lx " -+ "esp %08lx error %lx\n", -+ tsk->pid > 1 ? KERN_INFO : KERN_EMERG, -+ tsk->comm, tsk->pid, address, regs->eip, -+ regs->esp, error_code); -+ } - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); --- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:49:31.000000000 +0100 @@ -34,17 +34,16 @@ void *kmap_atomic_prot(struct page *page @@ -1703,8 +1590,8 @@ Acked-by: jbeulich@novell.com }; static char *x86_power_flags[] = { "ts", /* temperature sensor */ ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:29.000000000 +0100 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:32:29.000000000 +0100 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:49:31.000000000 +0100 @@ -362,7 +362,7 @@ __smp_call_function_single(int cpu, void } @@ -1860,8 +1747,8 @@ Acked-by: jbeulich@novell.com write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:27:41.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-08-15 10:44:41.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:27:41.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-08-15 10:44:41.000000000 +0200 @@ -160,7 +160,9 @@ void dump_pagetable(unsigned long addres pmd_t *pmd; pte_t *pte; diff --git a/patches.xen/xen3-patch-2.6.24 b/patches.xen/xen3-patch-2.6.24 index 6f98841f08..1b3ddf306f 100644 --- a/patches.xen/xen3-patch-2.6.24 +++ b/patches.xen/xen3-patch-2.6.24 @@ -2071,61 +2071,8 @@ Acked-by: jbeulich@novell.com return c_start(m, pos); } ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:49:31.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:56:27.000000000 +0100 -@@ -72,7 +72,7 @@ - * - * B stepping CPUs may hang. There are hardware work arounds - * for this. We warn about it in case your board doesn't have the work -- * arounds. Basically thats so I can tell anyone with a B stepping -+ * arounds. Basically that's so I can tell anyone with a B stepping - * CPU and SMP problems "tough". - * - * Specific items [From Pentium Processor Specification Update] -@@ -241,7 +241,7 @@ void leave_mm(unsigned long cpu) - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis -- * for the wrong mm, and in the worst case we perform a superflous -+ * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 -@@ -309,6 +309,7 @@ irqreturn_t smp_invalidate_interrupt(int - smp_mb__after_clear_bit(); - out: - put_cpu_no_resched(); -+ __get_cpu_var(irq_stat).irq_tlb_count++; - - return IRQ_HANDLED; - } -@@ -580,7 +581,7 @@ static void stop_this_cpu (void * dummy) - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_all_local_evtchn(); -- if (cpu_data[smp_processor_id()].hlt_works_ok) -+ if (cpu_data(smp_processor_id()).hlt_works_ok) - for(;;) halt(); - for (;;); - } -@@ -610,6 +611,7 @@ void xen_smp_send_stop(void) - */ - irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) - { -+ __get_cpu_var(irq_stat).irq_resched_count++; - - return IRQ_HANDLED; - } -@@ -632,6 +634,7 @@ irqreturn_t smp_call_function_interrupt( - */ - irq_enter(); - (*func)(info); -+ __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); - - if (wait) { ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:49:31.000000000 +0100 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:56:27.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:49:31.000000000 +0100 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -167,6 +167,7 @@ asmlinkage void smp_invalidate_interrupt out: ack_APIC_irq(); @@ -2930,191 +2877,8 @@ Acked-by: jbeulich@novell.com write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:49:31.000000000 +0100 -+++ head/arch/x86/mm/fault_32-xen.c 2011-01-31 17:56:27.000000000 +0100 -@@ -25,6 +25,7 @@ - #include <linux/kprobes.h> - #include <linux/uaccess.h> - #include <linux/kdebug.h> -+#include <linux/kprobes.h> - - #include <asm/system.h> - #include <asm/desc.h> -@@ -32,33 +33,27 @@ - - extern void die(const char *,struct pt_regs *,long); - --static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); -- --int register_page_fault_notifier(struct notifier_block *nb) -+#ifdef CONFIG_KPROBES -+static inline int notify_page_fault(struct pt_regs *regs) - { -- vmalloc_sync_all(); -- return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); --} --EXPORT_SYMBOL_GPL(register_page_fault_notifier); -+ int ret = 0; - --int unregister_page_fault_notifier(struct notifier_block *nb) --{ -- return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); --} --EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); -+ /* kprobe_running() needs smp_processor_id() */ -+ if (!user_mode_vm(regs)) { -+ preempt_disable(); -+ if (kprobe_running() && kprobe_fault_handler(regs, 14)) -+ ret = 1; -+ preempt_enable(); -+ } - --static inline int notify_page_fault(struct pt_regs *regs, long err) -+ return ret; -+} -+#else -+static inline int notify_page_fault(struct pt_regs *regs) - { -- struct die_args args = { -- .regs = regs, -- .str = "page fault", -- .err = err, -- .trapnr = 14, -- .signr = SIGSEGV -- }; -- return atomic_notifier_call_chain(¬ify_page_fault_chain, -- DIE_PAGE_FAULT, &args); -+ return 0; - } -+#endif - - /* - * Return EIP plus the CS segment base. The segment limit is also -@@ -110,7 +105,7 @@ static inline unsigned long get_segment_ - LDT and other horrors are only used in user space. */ - if (seg & (1<<2)) { - /* Must lock the LDT while reading it. */ -- down(¤t->mm->context.sem); -+ mutex_lock(¤t->mm->context.lock); - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); - } else { -@@ -123,7 +118,7 @@ static inline unsigned long get_segment_ - base = get_desc_base((unsigned long *)desc); - - if (seg & (1<<2)) { -- up(¤t->mm->context.sem); -+ mutex_unlock(¤t->mm->context.lock); - } else - put_cpu(); - -@@ -244,7 +239,7 @@ static void dump_fault_path(unsigned lon - if (mfn_to_pfn(mfn) >= highstart_pfn) - return; - #endif -- if (p[0] & _PAGE_PRESENT) { -+ if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { - page = mfn_to_pfn(mfn) << PAGE_SHIFT; - p = (unsigned long *) __va(page); - address &= 0x001fffff; -@@ -270,7 +265,8 @@ static void dump_fault_path(unsigned lon - * it's allocated already. - */ - if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn -- && (page & _PAGE_PRESENT)) { -+ && (page & _PAGE_PRESENT) -+ && !(page & _PAGE_PSE)) { - page = machine_to_phys(page & PAGE_MASK); - page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; -@@ -416,6 +412,11 @@ fastcall void __kprobes do_page_fault(st - int write, si_code; - int fault; - -+ /* -+ * We can fault from pretty much anywhere, with unknown IRQ state. -+ */ -+ trace_hardirqs_fixup(); -+ - /* get the address */ - address = read_cr2(); - -@@ -453,7 +454,7 @@ fastcall void __kprobes do_page_fault(st - /* Can take a spurious fault if mapping changes R/O -> R/W. */ - if (spurious_fault(regs, address, error_code)) - return; -- if (notify_page_fault(regs, error_code) == NOTIFY_STOP) -+ if (notify_page_fault(regs)) - return; - /* - * Don't take the mm semaphore here. If we fixup a prefetch -@@ -462,7 +463,7 @@ fastcall void __kprobes do_page_fault(st - goto bad_area_nosemaphore; - } - -- if (notify_page_fault(regs, error_code) == NOTIFY_STOP) -+ if (notify_page_fault(regs)) - return; - - /* It's safe to allow irq's after cr2 has been saved and the vmalloc -@@ -481,7 +482,7 @@ fastcall void __kprobes do_page_fault(st - - /* When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the -- * kernel and should generate an OOPS. Unfortunatly, in the case of an -+ * kernel and should generate an OOPS. Unfortunately, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user -@@ -489,7 +490,7 @@ fastcall void __kprobes do_page_fault(st - * exceptions table. - * - * As the vast majority of faults will be valid we will only perform -- * the source reference check when there is a possibilty of a deadlock. -+ * the source reference check when there is a possibility of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. -@@ -598,8 +599,8 @@ bad_area_nosemaphore: - printk_ratelimit()) { - printk("%s%s[%d]: segfault at %08lx eip %08lx " - "esp %08lx error %lx\n", -- tsk->pid > 1 ? KERN_INFO : KERN_EMERG, -- tsk->comm, tsk->pid, address, regs->eip, -+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, -+ tsk->comm, task_pid_nr(tsk), address, regs->eip, - regs->esp, error_code); - } - tsk->thread.cr2 = address; -@@ -664,8 +665,7 @@ no_context: - printk(KERN_ALERT "BUG: unable to handle kernel paging" - " request"); - printk(" at virtual address %08lx\n",address); -- printk(KERN_ALERT " printing eip:\n"); -- printk("%08lx\n", regs->eip); -+ printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); - dump_fault_path(address); - } - tsk->thread.cr2 = address; -@@ -681,14 +681,14 @@ no_context: - */ - out_of_memory: - up_read(&mm->mmap_sem); -- if (is_init(tsk)) { -+ if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) -- do_exit(SIGKILL); -+ do_group_exit(SIGKILL); - goto no_context; - - do_sigbus: ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-08-15 10:44:41.000000000 +0200 -+++ head/arch/x86/mm/fault_64-xen.c 2011-07-26 09:28:01.000000000 +0200 +--- head.orig/arch/x86/mm/fault-xen.c 2011-08-15 10:44:41.000000000 +0200 ++++ head/arch/x86/mm/fault-xen.c 2011-07-26 09:28:01.000000000 +0200 @@ -25,6 +25,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> diff --git a/patches.xen/xen3-patch-2.6.25 b/patches.xen/xen3-patch-2.6.25 index 2481e0bc11..20a712332b 100644 --- a/patches.xen/xen3-patch-2.6.25 +++ b/patches.xen/xen3-patch-2.6.25 @@ -4403,7 +4403,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} ---- head.orig/arch/x86/kernel/rtc.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/kernel/rtc.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/kernel/rtc.c 2013-05-23 17:11:10.000000000 +0200 @@ -31,6 +31,7 @@ EXPORT_SYMBOL(cmos_lock); DEFINE_SPINLOCK(rtc_lock); @@ -6158,35 +6158,8 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches .next = c_next, .stop = c_stop, .show = show_cpuinfo, ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:56:27.000000000 +0100 -+++ head/arch/x86/kernel/smp_32-xen.c 2011-01-31 18:01:51.000000000 +0100 -@@ -168,7 +168,7 @@ void __send_IPI_shortcut(unsigned int sh - } - } - --void fastcall send_IPI_self(int vector) -+void send_IPI_self(int vector) - { - __send_IPI_shortcut(APIC_DEST_SELF, vector); - } -@@ -224,13 +224,14 @@ static DEFINE_SPINLOCK(tlbstate_lock); - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ --void leave_mm(unsigned long cpu) -+void leave_mm(int cpu) - { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); - } -+EXPORT_SYMBOL_GPL(leave_mm); - - /* - * ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:56:27.000000000 +0100 -+++ head/arch/x86/kernel/smp_64-xen.c 2011-01-31 18:01:51.000000000 +0100 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 17:56:27.000000000 +0100 ++++ head/arch/x86/kernel/smp-xen.c 2011-01-31 18:01:51.000000000 +0100 @@ -33,7 +33,7 @@ #ifndef CONFIG_XEN @@ -8273,46 +8246,39 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); } } ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- head.orig/arch/x86/mm/fault-xen.c 2011-07-26 09:28:01.000000000 +0200 +++ head/arch/x86/mm/fault-xen.c 2011-08-15 10:46:15.000000000 +0200 -@@ -0,0 +1,1037 @@ -+/* -+ * Copyright (C) 1995 Linus Torvalds -+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. -+ */ -+ -+#include <linux/signal.h> -+#include <linux/sched.h> -+#include <linux/kernel.h> -+#include <linux/errno.h> -+#include <linux/string.h> -+#include <linux/types.h> -+#include <linux/ptrace.h> -+#include <linux/mman.h> -+#include <linux/mm.h> -+#include <linux/smp.h> -+#include <linux/interrupt.h> -+#include <linux/init.h> -+#include <linux/tty.h> -+#include <linux/vt_kern.h> /* For unblank_screen() */ -+#include <linux/compiler.h> +@@ -1,6 +1,4 @@ + /* +- * linux/arch/x86-64/mm/fault.c +- * + * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. + */ +@@ -20,34 +18,48 @@ + #include <linux/tty.h> + #include <linux/vt_kern.h> /* For unblank_screen() */ + #include <linux/compiler.h> +#include <linux/highmem.h> +#include <linux/bootmem.h> /* for max_low_pfn */ -+#include <linux/vmalloc.h> -+#include <linux/module.h> -+#include <linux/kprobes.h> -+#include <linux/uaccess.h> -+#include <linux/kdebug.h> -+ -+#include <asm/system.h> + #include <linux/vmalloc.h> + #include <linux/module.h> + #include <linux/kprobes.h> + #include <linux/uaccess.h> + #include <linux/kdebug.h> +-#include <linux/kprobes.h> + + #include <asm/system.h> +#include <asm/desc.h> +#include <asm/segment.h> -+#include <asm/pgalloc.h> -+#include <asm/smp.h> -+#include <asm/tlbflush.h> -+#include <asm/proto.h> -+#include <asm-generic/sections.h> -+ + #include <asm/pgalloc.h> + #include <asm/smp.h> + #include <asm/tlbflush.h> + #include <asm/proto.h> + #include <asm-generic/sections.h> + +-/* Page fault error code bits */ +-#define PF_PROT (1<<0) /* or no page found */ +/* + * Page fault error code bits + * bit 0 == 0 means no page found, 1 means protection fault @@ -8322,34 +8288,47 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + * bit 4 == 1 means fault was an instruction fetch + */ +#define PF_PROT (1<<0) -+#define PF_WRITE (1<<1) + #define PF_WRITE (1<<1) +-#define PF_USER (1<<2) +-#define PF_RSVD (1<<3) +#define PF_USER (1<<2) +#define PF_RSVD (1<<3) -+#define PF_INSTR (1<<4) -+ -+static inline int notify_page_fault(struct pt_regs *regs) -+{ + #define PF_INSTR (1<<4) + +-#ifdef CONFIG_KPROBES + static inline int notify_page_fault(struct pt_regs *regs) + { +#ifdef CONFIG_KPROBES -+ int ret = 0; -+ -+ /* kprobe_running() needs smp_processor_id() */ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ +#ifdef CONFIG_X86_32 + if (!user_mode_vm(regs)) { +#else -+ if (!user_mode(regs)) { -+#endif -+ preempt_disable(); -+ if (kprobe_running() && kprobe_fault_handler(regs, 14)) -+ ret = 1; -+ preempt_enable(); -+ } -+ -+ return ret; -+#else -+ return 0; + if (!user_mode(regs)) { +#endif + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; +@@ -55,100 +67,164 @@ static inline int notify_page_fault(stru + } + + return ret; +-} + #else +-static inline int notify_page_fault(struct pt_regs *regs) +-{ + return 0; +-} + #endif +} -+ + +-/* Sometimes the CPU reports invalid exceptions on prefetch. +- Check that here and ignore. +- Opcode checker based on code by Richard Brunner */ +-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, +- unsigned long error_code) +-{ +/* + * X86_32 + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. @@ -8364,49 +8343,69 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches +static int is_prefetch(struct pt_regs *regs, unsigned long addr, + unsigned long error_code) +{ -+ unsigned char *instr; -+ int scan_more = 1; + unsigned char *instr; + int scan_more = 1; +- int prefetch = 0; + int prefetch = 0; -+ unsigned char *max_instr; -+ + unsigned char *max_instr; + +- /* If it was a exec fault ignore */ + /* + * If it was a exec (instruction fetch) fault on NX page, then + * do not ignore the fault: + */ -+ if (error_code & PF_INSTR) -+ return 0; + if (error_code & PF_INSTR) + return 0; +- +- instr = (unsigned char __user *)convert_rip_to_linear(current, regs); + + instr = (unsigned char *)convert_ip_to_linear(current, regs); -+ max_instr = instr + 15; -+ -+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) -+ return 0; -+ + max_instr = instr + 15; + + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + return 0; + +- while (scan_more && instr < max_instr) { + while (scan_more && instr < max_instr) { -+ unsigned char opcode; -+ unsigned char instr_hi; -+ unsigned char instr_lo; -+ -+ if (probe_kernel_address(instr, opcode)) + unsigned char opcode; + unsigned char instr_hi; + unsigned char instr_lo; + + if (probe_kernel_address(instr, opcode)) +- break; + break; -+ + +- instr_hi = opcode & 0xf0; +- instr_lo = opcode & 0x0f; + instr_hi = opcode & 0xf0; + instr_lo = opcode & 0x0f; -+ instr++; -+ + instr++; + +- switch (instr_hi) { + switch (instr_hi) { -+ case 0x20: -+ case 0x30: + case 0x20: + case 0x30: +- /* Values 0x26,0x2E,0x36,0x3E are valid x86 +- prefixes. In long mode, the CPU will signal +- invalid opcode if some of these prefixes are +- present so we will never get here anyway */ + /* + * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. + * In X86_64 long mode, the CPU will signal invalid + * opcode if some of these prefixes are present so + * X86_64 will never get here anyway + */ -+ scan_more = ((instr_lo & 7) == 0x6); -+ break; + scan_more = ((instr_lo & 7) == 0x6); + break; +- +#ifdef CONFIG_X86_64 -+ case 0x40: + case 0x40: +- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes +- Need to figure out under what instruction mode the +- instruction was issued ... */ +- /* Could check the LDT for lm, but for now it's good +- enough to assume that long mode only uses well known +- segments or kernel. */ + /* + * In AMD64 long mode 0x40..0x4F are valid REX prefixes + * Need to figure out under what instruction mode the @@ -8414,35 +8413,43 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + * but for now it's good enough to assume that long + * mode only uses well known segments or kernel. + */ -+ scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS) -+ || (regs->cs == FLAT_USER_CS64); -+ break; + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS) + || (regs->cs == FLAT_USER_CS64); + break; +- +#endif -+ case 0x60: -+ /* 0x64 thru 0x67 are valid prefixes in all modes. */ -+ scan_more = (instr_lo & 0xC) == 0x4; + case 0x60: + /* 0x64 thru 0x67 are valid prefixes in all modes. */ + scan_more = (instr_lo & 0xC) == 0x4; +- break; + break; -+ case 0xF0: + case 0xF0: +- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ + /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ -+ scan_more = !instr_lo || (instr_lo>>1) == 1; + scan_more = !instr_lo || (instr_lo>>1) == 1; +- break; + break; -+ case 0x00: -+ /* Prefetch instruction is 0x0F0D or 0x0F18 */ -+ scan_more = 0; + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + scan_more = 0; + -+ if (probe_kernel_address(instr, opcode)) -+ break; -+ prefetch = (instr_lo == 0xF) && -+ (opcode == 0x0D || opcode == 0x18); -+ break; -+ default: -+ scan_more = 0; + if (probe_kernel_address(instr, opcode)) + break; + prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); +- break; + break; + default: + scan_more = 0; + break; +- } + } -+ } -+ return prefetch; -+} -+ + } + return prefetch; + } + +-static int bad_address(void *p) +-{ +static void force_sig_info_fault(int si_signo, int si_code, + unsigned long address, struct task_struct *tsk) +{ @@ -8458,13 +8465,15 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches +#ifdef CONFIG_X86_64 +static int bad_address(void *p) +{ -+ unsigned long dummy; -+ return probe_kernel_address((unsigned long *)p, dummy); + unsigned long dummy; + return probe_kernel_address((unsigned long *)p, dummy); +-} +} +#endif -+ + +-void dump_pagetable(unsigned long address) +static void dump_pagetable(unsigned long address) -+{ + { +#ifdef CONFIG_X86_32 + __typeof__(pte_val(__pte(0))) page; + @@ -8503,41 +8512,44 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + + printk(KERN_CONT "\n"); +#else /* CONFIG_X86_64 */ -+ pgd_t *pgd; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = (pgd_t *)read_cr3(); -+ -+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); -+ pgd += pgd_index(address); -+ if (bad_address(pgd)) goto bad; -+ printk("PGD %lx ", pgd_val(*pgd)); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; +@@ -160,62 +236,191 @@ void dump_pagetable(unsigned long addres + pgd += pgd_index(address); + if (bad_address(pgd)) goto bad; + printk("PGD %lx ", pgd_val(*pgd)); +- if (!pgd_present(*pgd)) goto ret; + if (!pgd_present(*pgd)) goto ret; -+ -+ pud = pud_offset(pgd, address); -+ if (bad_address(pud)) goto bad; + + pud = pud_offset(pgd, address); + if (bad_address(pud)) goto bad; +- printk("PUD %lx ", pud_val(*pud)); +- if (!pud_present(*pud)) goto ret; + printk(KERN_CONT "PUD %lx ", pud_val(*pud)); + if (!pud_present(*pud) || pud_large(*pud)) + goto ret; -+ -+ pmd = pmd_offset(pud, address); -+ if (bad_address(pmd)) goto bad; + + pmd = pmd_offset(pud, address); + if (bad_address(pmd)) goto bad; +- printk("PMD %lx ", pmd_val(*pmd)); + printk(KERN_CONT "PMD %lx ", pmd_val(*pmd)); -+ if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; -+ -+ pte = pte_offset_kernel(pmd, address); -+ if (bad_address(pte)) goto bad; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; + + pte = pte_offset_kernel(pmd, address); + if (bad_address(pte)) goto bad; +- printk("PTE %lx", pte_val(*pte)); + printk(KERN_CONT "PTE %lx", pte_val(*pte)); -+ret: + ret: +- printk("\n"); + printk(KERN_CONT "\n"); -+ return; -+bad: -+ printk("BAD\n"); + return; + bad: + printk("BAD\n"); +#endif -+} -+ + } + +-static const char errata93_warning[] = +#ifdef CONFIG_X86_32 +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ @@ -8589,36 +8601,47 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + +#ifdef CONFIG_X86_64 +static const char errata93_warning[] = -+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -+KERN_ERR "******* Please consider a BIOS update.\n" -+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; + KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" + KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" + KERN_ERR "******* Please consider a BIOS update.\n" + KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; +#endif -+ -+/* Workaround for K8 erratum #93 & buggy BIOS. -+ BIOS SMM functions are required to use a specific workaround + + /* Workaround for K8 erratum #93 & buggy BIOS. + BIOS SMM functions are required to use a specific workaround +- to avoid corruption of the 64bit RIP register on C stepping K8. +- A lot of BIOS that didn't get tested properly miss this. + to avoid corruption of the 64bit RIP register on C stepping K8. + A lot of BIOS that didn't get tested properly miss this. -+ The OS sees this as a page fault with the upper 32bits of RIP cleared. -+ Try to work around it here. + The OS sees this as a page fault with the upper 32bits of RIP cleared. + Try to work around it here. +- Note we only handle faults in kernel here. */ +- +-static int is_errata93(struct pt_regs *regs, unsigned long address) + Note we only handle faults in kernel here. + Does nothing for X86_32 + */ +static int is_errata93(struct pt_regs *regs, unsigned long address) -+{ + { +#ifdef CONFIG_X86_64 -+ static int warned; + static int warned; +- if (address != regs->rip) + if (address != regs->ip) -+ return 0; + return 0; +- if ((address >> 32) != 0) + if ((address >> 32) != 0) -+ return 0; -+ address |= 0xffffffffUL << 32; + return 0; + address |= 0xffffffffUL << 32; +- if ((address >= (u64)_stext && address <= (u64)_etext) || +- (address >= MODULES_VADDR && address <= MODULES_END)) { + if ((address >= (u64)_stext && address <= (u64)_etext) || + (address >= MODULES_VADDR && address <= MODULES_END)) { -+ if (!warned) { + if (!warned) { +- printk(errata93_warning); + printk(errata93_warning); -+ warned = 1; -+ } + warned = 1; + } +- regs->rip = address; + regs->ip = address; + return 1; + } @@ -8637,7 +8660,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches +#ifdef CONFIG_X86_64 + if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 || + (regs->cs & (1<<2))) && (address >> 32)) -+ return 1; + return 1; +#endif + return 0; +} @@ -8658,9 +8681,10 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + do_invalid_op(regs, 0); + return 1; + } -+ } + } +#endif -+ return 0; + return 0; +-} +} + +static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, @@ -8670,7 +8694,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + if (!oops_may_print()) + return; +#endif -+ + +#ifdef CONFIG_X86_PAE + if (error_code & PF_INSTR) { + unsigned int level; @@ -8699,19 +8723,16 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches +} + +#ifdef CONFIG_X86_64 -+static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, -+ unsigned long error_code) -+{ -+ unsigned long flags = oops_begin(); -+ struct task_struct *tsk; -+ -+ printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", -+ current->comm, address); -+ dump_pagetable(address); -+ tsk = current; -+ tsk->thread.cr2 = address; -+ tsk->thread.trap_no = 14; -+ tsk->thread.error_code = error_code; + static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, + unsigned long error_code) + { +@@ -229,23 +434,108 @@ static noinline void pgtable_bad(unsigne + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; +- __die("Bad pagetable", regs, error_code); +- oops_end(flags); +- do_exit(SIGKILL); + if (__die("Bad pagetable", regs, error_code)) + regs = NULL; + oops_end(flags, regs, SIGKILL); @@ -8726,9 +8747,9 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + return 0; + + return 1; -+} -+ -+/* + } + + /* + * Handle a spurious fault caused by a stale TLB entry. This allows + * us to lazily refresh the TLB when increasing the permissions of a + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very @@ -8779,12 +8800,12 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + * Handle a fault on the vmalloc or module mapping area + * + * X86_64 -+ * Handle a fault on the vmalloc area -+ * -+ * This assumes no large pages in there. -+ */ -+static int vmalloc_fault(unsigned long address) -+{ + * Handle a fault on the vmalloc area + * + * This assumes no large pages in there. + */ + static int vmalloc_fault(unsigned long address) + { +#ifdef CONFIG_X86_32 + unsigned long pgd_paddr; + pmd_t *pmd_k; @@ -8805,121 +8826,149 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + return -1; + return 0; +#else -+ pgd_t *pgd, *pgd_ref; -+ pud_t *pud, *pud_ref; -+ pmd_t *pmd, *pmd_ref; -+ pte_t *pte, *pte_ref; -+ + pgd_t *pgd, *pgd_ref; + pud_t *pud, *pud_ref; + pmd_t *pmd, *pmd_ref; + pte_t *pte, *pte_ref; + + /* Make sure we are in vmalloc area */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + -+ /* Copy kernel mappings over when needed. This can also -+ happen within a race in page table update. In the later -+ case just flush. */ -+ -+ /* On Xen the line below does not always work. Needs investigating! */ -+ /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ -+ pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); -+ pgd += pgd_index(address); -+ pgd_ref = pgd_offset_k(address); -+ if (pgd_none(*pgd_ref)) -+ return -1; -+ if (pgd_none(*pgd)) -+ set_pgd(pgd, *pgd_ref); -+ else -+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); -+ -+ /* Below here mismatches are bugs because these lower tables -+ are shared */ -+ -+ pud = pud_offset(pgd, address); -+ pud_ref = pud_offset(pgd_ref, address); -+ if (pud_none(*pud_ref)) -+ return -1; -+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) -+ BUG(); -+ pmd = pmd_offset(pud, address); -+ pmd_ref = pmd_offset(pud_ref, address); -+ if (pmd_none(*pmd_ref)) -+ return -1; -+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) -+ BUG(); -+ pte_ref = pte_offset_kernel(pmd_ref, address); -+ if (!pte_present(*pte_ref)) -+ return -1; -+ pte = pte_offset_kernel(pmd, address); -+ /* Don't use pte_page here, because the mappings can point -+ outside mem_map, and the NUMA hash lookup cannot handle -+ that. */ -+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) -+ BUG(); -+ return 0; + /* Copy kernel mappings over when needed. This can also + happen within a race in page table update. In the later + case just flush. */ +@@ -287,89 +577,42 @@ static int vmalloc_fault(unsigned long a + if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) + BUG(); + return 0; +#endif -+} -+ -+int show_unhandled_signals = 1; -+ -+/* -+ * This routine handles page faults. It determines the address, -+ * and the problem, and then passes it off to one of the appropriate -+ * routines. -+ */ + } + + int show_unhandled_signals = 1; + +- +-#define MEM_VERBOSE 1 +- +-#ifdef MEM_VERBOSE +-#define MEM_LOG(_f, _a...) \ +- printk("fault.c:[%d]-> " _f "\n", \ +- __LINE__ , ## _a ) +-#else +-#define MEM_LOG(_f, _a...) ((void)0) +-#endif +- +-static int spurious_fault(struct pt_regs *regs, +- unsigned long address, +- unsigned long error_code) +-{ +- pgd_t *pgd; +- pud_t *pud; +- pmd_t *pmd; +- pte_t *pte; +- +-#ifdef CONFIG_XEN +- /* Faults in hypervisor area are never spurious. */ +- if ((address >= HYPERVISOR_VIRT_START) && +- (address < HYPERVISOR_VIRT_END)) +- return 0; +-#endif +- +- /* Reserved-bit violation or user access to kernel space? */ +- if (error_code & (PF_RSVD|PF_USER)) +- return 0; +- +- pgd = init_mm.pgd + pgd_index(address); +- if (!pgd_present(*pgd)) +- return 0; +- +- pud = pud_offset(pgd, address); +- if (!pud_present(*pud)) +- return 0; +- +- pmd = pmd_offset(pud, address); +- if (!pmd_present(*pmd)) +- return 0; +- +- pte = pte_offset_kernel(pmd, address); +- if (!pte_present(*pte)) +- return 0; +- if ((error_code & PF_WRITE) && !pte_write(*pte)) +- return 0; +- if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX)) +- return 0; +- +- return 1; +-} +- + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, +- unsigned long error_code) +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) -+{ -+ struct task_struct *tsk; -+ struct mm_struct *mm; + { + struct task_struct *tsk; + struct mm_struct *mm; +- struct vm_area_struct * vma; + struct vm_area_struct *vma; -+ unsigned long address; + unsigned long address; +- const struct exception_table_entry *fixup; +- int write, fault; + int write, si_code; + int fault; +#ifdef CONFIG_X86_64 -+ unsigned long flags; + unsigned long flags; +- siginfo_t info; +- +- if (!user_mode(regs)) +- error_code &= ~PF_USER; /* means kernel */ +#endif -+ -+ /* -+ * We can fault from pretty much anywhere, with unknown IRQ state. -+ */ -+ trace_hardirqs_fixup(); -+ + + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + + /* Set the "privileged fault" bit to something sane. */ + if (user_mode_vm(regs)) + error_code |= PF_USER; + else + error_code &= ~PF_USER; + -+ tsk = current; -+ mm = tsk->mm; -+ prefetchw(&mm->mmap_sem); -+ -+ /* get the address */ -+ address = read_cr2(); -+ + tsk = current; + mm = tsk->mm; + prefetchw(&mm->mmap_sem); +@@ -377,8 +620,10 @@ asmlinkage void __kprobes do_page_fault( + /* get the address */ + address = read_cr2(); + +- info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; -+ + + if (notify_page_fault(regs)) + return; -+ -+ /* -+ * We fault-in kernel-space virtual memory on-demand. The -+ * 'reference' page table is init_mm.pgd. -+ * -+ * NOTE! We MUST NOT take any locks for this case. We may -+ * be in an interrupt or a critical region, and should -+ * only copy the information from the master page table, -+ * nothing more. -+ * -+ * This verifies that the fault happens in kernel space -+ * (error_code & 4) == 0, and that the fault was not a -+ * protection error (error_code & 9) == 0. -+ */ + + /* + * We fault-in kernel-space virtual memory on-demand. The +@@ -393,22 +638,28 @@ asmlinkage void __kprobes do_page_fault( + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 9) == 0. + */ +#ifdef CONFIG_X86_32 + if (unlikely(address >= TASK_SIZE)) { +#else -+ if (unlikely(address >= TASK_SIZE64)) { + if (unlikely(address >= TASK_SIZE64)) { +- /* +- * Don't check for the module range here: its PML4 +- * is always initialized because it's shared with the main +- * kernel text. Only vmalloc may need PML4 syncups. +- */ +#endif + /* Faults in hypervisor area can never be patched up. */ +#if defined(CONFIG_X86_XEN) @@ -8930,22 +8979,32 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + && address < HYPERVISOR_VIRT_END) + goto bad_area_nosemaphore; +#endif -+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && +- ((address >= VMALLOC_START && address < VMALLOC_END))) { +- if (vmalloc_fault(address) >= 0) +- return; +- } +- /* Can take a spurious fault if mapping changes R/O -> R/W. */ +- if (spurious_fault(regs, address, error_code)) + vmalloc_fault(address) >= 0) -+ return; + return; +- if (notify_page_fault(regs)) + + /* Can handle a stale RO->RW TLB */ + if (spurious_fault(address, error_code)) -+ return; -+ -+ /* -+ * Don't take the mm semaphore here. If we fixup a prefetch -+ * fault we could otherwise deadlock. -+ */ -+ goto bad_area_nosemaphore; -+ } -+ + return; + + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. +@@ -416,18 +667,29 @@ asmlinkage void __kprobes do_page_fault( + goto bad_area_nosemaphore; + } + +- if (notify_page_fault(regs)) +- return; + +- if (likely(regs->eflags & X86_EFLAGS_IF)) +#ifdef CONFIG_X86_32 + /* It's safe to allow irq's after cr2 has been saved and the vmalloc + fault has been handled. */ @@ -8960,75 +9019,79 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + goto bad_area_nosemaphore; +#else /* CONFIG_X86_64 */ + if (likely(regs->flags & X86_EFLAGS_IF)) -+ local_irq_enable(); -+ -+ if (unlikely(error_code & PF_RSVD)) -+ pgtable_bad(address, regs, error_code); -+ -+ /* + local_irq_enable(); + + if (unlikely(error_code & PF_RSVD)) + pgtable_bad(address, regs, error_code); + + /* +- * If we're in an interrupt or have no user +- * context, we must not take the fault.. + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault. -+ */ -+ if (unlikely(in_atomic() || !mm)) -+ goto bad_area_nosemaphore; -+ -+ /* -+ * User-mode registers count as a user access even for any -+ * potential system fault or CPU buglet. -+ */ -+ if (user_mode_vm(regs)) -+ error_code |= PF_USER; + */ + if (unlikely(in_atomic() || !mm)) + goto bad_area_nosemaphore; +@@ -438,8 +700,8 @@ asmlinkage void __kprobes do_page_fault( + */ + if (user_mode_vm(regs)) + error_code |= PF_USER; +- +- again: +again: +#endif -+ /* When running in the kernel we expect faults to occur only to -+ * addresses in user space. All other faults represent errors in the -+ * kernel and should generate an OOPS. Unfortunately, in the case of an -+ * erroneous fault occurring in a code path which already holds mmap_sem -+ * we will deadlock attempting to validate the fault against the -+ * address space. Luckily the kernel only validly references user -+ * space from well defined areas of code, which are listed in the -+ * exceptions table. -+ * -+ * As the vast majority of faults will be valid we will only perform -+ * the source reference check when there is a possibility of a deadlock. -+ * Attempt to lock the address space, if we cannot we then validate the -+ * source. If this is invalid we can skip the address space check, -+ * thus avoiding the deadlock. -+ */ -+ if (!down_read_trylock(&mm->mmap_sem)) { -+ if ((error_code & PF_USER) == 0 && + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an +@@ -457,7 +719,7 @@ asmlinkage void __kprobes do_page_fault( + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if ((error_code & PF_USER) == 0 && +- !search_exception_tables(regs->rip)) + !search_exception_tables(regs->ip)) -+ goto bad_area_nosemaphore; -+ down_read(&mm->mmap_sem); -+ } -+ -+ vma = find_vma(mm, address); -+ if (!vma) -+ goto bad_area; + goto bad_area_nosemaphore; + down_read(&mm->mmap_sem); + } +@@ -465,15 +727,18 @@ asmlinkage void __kprobes do_page_fault( + vma = find_vma(mm, address); + if (!vma) + goto bad_area; +- if (likely(vma->vm_start <= address)) + if (vma->vm_start <= address) -+ goto good_area; -+ if (!(vma->vm_flags & VM_GROWSDOWN)) -+ goto bad_area; + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (error_code & 4) { +- /* Allow userspace just enough access below the stack pointer +- * to let the 'enter' instruction work. + if (error_code & PF_USER) { + /* + * Accessing the stack below %sp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535,$31" pushes + * 32 pointers and then decrements %sp by 65535.) -+ */ + */ +- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp) + if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) -+ goto bad_area; -+ } -+ if (expand_stack(vma, address)) -+ goto bad_area; -+/* -+ * Ok, we have a good vm_area for this memory access, so -+ * we can handle it.. -+ */ -+good_area: + goto bad_area; + } + if (expand_stack(vma, address)) +@@ -483,23 +748,26 @@ asmlinkage void __kprobes do_page_fault( + * we can handle it.. + */ + good_area: +- info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; -+ write = 0; -+ switch (error_code & (PF_PROT|PF_WRITE)) { + write = 0; + switch (error_code & (PF_PROT|PF_WRITE)) { +- default: /* 3: write, present */ +- /* fall through */ +- case PF_WRITE: /* write, not present */ +- if (!(vma->vm_flags & VM_WRITE)) +- goto bad_area; +- write++; +- break; +- case PF_PROT: /* read, present */ + default: /* 3: write, present */ + /* fall through */ + case PF_WRITE: /* write, not present */ @@ -9040,29 +9103,22 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) -+ goto bad_area; -+ } -+ + goto bad_area; +- case 0: /* read, not present */ +- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) +- goto bad_area; + } + +#ifdef CONFIG_X86_32 +survive: +#endif -+ /* -+ * If for any reason at all we couldn't handle the fault, -+ * make sure we exit gracefully rather than endlessly redo -+ * the fault. -+ */ -+ fault = handle_mm_fault(mm, vma, address, write); -+ if (unlikely(fault & VM_FAULT_ERROR)) { -+ if (fault & VM_FAULT_OOM) -+ goto out_of_memory; -+ else if (fault & VM_FAULT_SIGBUS) -+ goto do_sigbus; -+ BUG(); -+ } -+ if (fault & VM_FAULT_MAJOR) -+ tsk->maj_flt++; -+ else -+ tsk->min_flt++; + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +@@ -517,6 +785,17 @@ good_area: + tsk->maj_flt++; + else + tsk->min_flt++; + +#ifdef CONFIG_X86_32 + /* @@ -9074,37 +9130,44 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + tsk->thread.screen_bitmap |= 1 << bit; + } +#endif -+ up_read(&mm->mmap_sem); -+ return; -+ -+/* -+ * Something tried to access memory that isn't in our memory map.. -+ * Fix it, but check if it's kernel or user first.. -+ */ -+bad_area: -+ up_read(&mm->mmap_sem); -+ -+bad_area_nosemaphore: -+ /* User mode accesses just cause a SIGSEGV */ -+ if (error_code & PF_USER) { -+ /* -+ * It's possible to have interrupts off here. -+ */ -+ local_irq_enable(); -+ + up_read(&mm->mmap_sem); + return; + +@@ -530,87 +809,94 @@ bad_area: + bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { +- + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ -+ if (is_prefetch(regs, address, error_code)) -+ return; -+ + if (is_prefetch(regs, address, error_code)) + return; + +- /* Work around K8 erratum #100 K8 in compat mode +- occasionally jumps to illegal addresses >4GB. We +- catch this here in the page fault handler because +- these addresses are not reachable. Just detect this +- case and return. Any code segment in LDT is +- compatibility mode. */ +- if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 || +- (regs->cs & (1<<2))) && (address >> 32)) + if (is_errata100(regs, address)) -+ return; -+ -+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && -+ printk_ratelimit()) { -+ printk( + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk( +- "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", +- tsk->pid > 1 ? KERN_INFO : KERN_EMERG, +- tsk->comm, tsk->pid, address, regs->rip, +- regs->rsp, error_code); +#ifdef CONFIG_X86_32 + "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", +#else @@ -9115,24 +9178,36 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); -+ } + } +- + -+ tsk->thread.cr2 = address; -+ /* Kernel addresses are always protection faults */ -+ tsk->thread.error_code = error_code | (address >= TASK_SIZE); -+ tsk->thread.trap_no = 14; + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; +- info.si_signo = SIGSEGV; +- info.si_errno = 0; +- /* info.si_code has been set above */ +- info.si_addr = (void __user *)address; +- force_sig_info(SIGSEGV, &info, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk); -+ return; -+ } -+ + return; + } + + if (is_f00f_bug(regs, address)) + return; + -+no_context: -+ /* Are we prepared to handle this kernel fault? */ + no_context: +- + /* Are we prepared to handle this kernel fault? */ +- fixup = search_exception_tables(regs->rip); +- if (fixup) { +- regs->rip = fixup->fixup; + if (fixup_exception(regs)) -+ return; -+ + return; +- } + +- /* + /* + * X86_32 + * Valid to do another page fault here, because if this fault @@ -9140,29 +9215,42 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + * handled it. + * + * X86_64 -+ * Hall of shame of CPU/BIOS bugs. -+ */ + * Hall of shame of CPU/BIOS bugs. + */ +- +- if (is_prefetch(regs, address, error_code)) +- return; + if (is_prefetch(regs, address, error_code)) + return; -+ -+ if (is_errata93(regs, address)) + + if (is_errata93(regs, address)) +- return; + return; -+ -+/* -+ * Oops. The kernel tried to access some bad page. We'll have to -+ * terminate things with extreme prejudice. -+ */ + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +- +#ifdef CONFIG_X86_32 + bust_spinlocks(1); +#else -+ flags = oops_begin(); + flags = oops_begin(); +#endif + + show_fault_oops(regs, error_code, address); -+ -+ tsk->thread.cr2 = address; -+ tsk->thread.trap_no = 14; -+ tsk->thread.error_code = error_code; + +- if (address < PAGE_SIZE) +- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); +- else +- printk(KERN_ALERT "Unable to handle kernel paging request"); +- printk(" at %016lx RIP: \n" KERN_ALERT,address); +- printk_address(regs->rip); +- dump_pagetable(address); + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; +- __die("Oops", regs, error_code); + +#ifdef CONFIG_X86_32 + die("Oops", regs, error_code); @@ -9171,57 +9259,67 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches +#else + if (__die("Oops", regs, error_code)) + regs = NULL; -+ /* Executive summary in case the body of the oops scrolled away */ -+ printk(KERN_EMERG "CR2: %016lx\n", address); + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); +- oops_end(flags); +- do_exit(SIGKILL); + oops_end(flags, regs, SIGKILL); +#endif -+ -+/* -+ * We ran out of memory, or some other thing happened to us that made -+ * us unable to handle the page fault gracefully. -+ */ -+out_of_memory: -+ up_read(&mm->mmap_sem); + + /* + * We ran out of memory, or some other thing happened to us that made +@@ -618,12 +904,18 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (is_global_init(current)) { + if (is_global_init(tsk)) { -+ yield(); + yield(); +#ifdef CONFIG_X86_32 + down_read(&mm->mmap_sem); + goto survive; +#else -+ goto again; + goto again; +#endif -+ } + } + -+ printk("VM: killing process %s\n", tsk->comm); + printk("VM: killing process %s\n", tsk->comm); +- if (error_code & 4) + if (error_code & PF_USER) -+ do_group_exit(SIGKILL); -+ goto no_context; -+ -+do_sigbus: -+ up_read(&mm->mmap_sem); -+ -+ /* Kernel mode? Handle exceptions or die */ -+ if (!(error_code & PF_USER)) -+ goto no_context; + do_group_exit(SIGKILL); + goto no_context; + +@@ -633,16 +925,15 @@ do_sigbus: + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & PF_USER)) + goto no_context; +- +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, address, error_code)) + return; +#endif -+ tsk->thread.cr2 = address; -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = 14; + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; +- info.si_signo = SIGBUS; +- info.si_errno = 0; +- info.si_code = BUS_ADRERR; +- info.si_addr = (void __user *)address; +- force_sig_info(SIGBUS, &info, tsk); +- return; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); -+} -+ -+DEFINE_SPINLOCK(pgd_lock); -+LIST_HEAD(pgd_list); -+ -+#define pgd_page_table(what, pg) \ -+ spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock) -+ -+void vmalloc_sync_all(void) -+{ + } + + DEFINE_SPINLOCK(pgd_lock); +@@ -653,10 +944,62 @@ LIST_HEAD(pgd_list); + + void vmalloc_sync_all(void) + { +- /* Note that races in the updates of insync and start aren't +- problematic: +- insync can only get set bits added, and updates to start are only +- improving performance (without affecting correctness if undone). */ +#ifdef CONFIG_X86_32 + /* + * Note that races in the updates of insync and start aren't @@ -9278,1510 +9376,45 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + * start are only improving performance (without affecting correctness + * if undone). + */ -+ static DECLARE_BITMAP(insync, PTRS_PER_PGD); -+ static unsigned long start = VMALLOC_START & PGDIR_MASK; -+ unsigned long address; -+ -+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { -+ if (!test_bit(pgd_index(address), insync)) { -+ const pgd_t *pgd_ref = pgd_offset_k(address); + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = VMALLOC_START & PGDIR_MASK; + unsigned long address; +@@ -664,15 +1007,15 @@ void vmalloc_sync_all(void) + for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; -+ struct page *page; -+ -+ if (pgd_none(*pgd_ref)) -+ continue; + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; +- spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); -+ list_for_each_entry(page, &pgd_list, lru) { -+ pgd_t *pgd; -+ pgd = (pgd_t *)page_address(page) + pgd_index(address); -+ pgd_page_table(lock, page); -+ if (pgd_none(*pgd)) -+ set_pgd(pgd, *pgd_ref); -+ else -+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); -+ pgd_page_table(unlock, page); -+ } + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); +- + pgd_page_table(lock, page); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); +@@ -680,7 +1023,7 @@ void vmalloc_sync_all(void) + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + pgd_page_table(unlock, page); + } +- spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); -+ set_bit(pgd_index(address), insync); -+ } -+ if (address == start) -+ start = address + PGDIR_SIZE; -+ } -+ /* Check that there is no need to do the same for the modules area. */ -+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); + set_bit(pgd_index(address), insync); + } + if (address == start) +@@ -688,6 +1031,7 @@ void vmalloc_sync_all(void) + } + /* Check that there is no need to do the same for the modules area. */ + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); +- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == -+ (__START_KERNEL & PGDIR_MASK))); + (__START_KERNEL & PGDIR_MASK))); +#endif -+} ---- head.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:56:27.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 -@@ -1,770 +0,0 @@ --/* -- * linux/arch/i386/mm/fault.c -- * -- * Copyright (C) 1995 Linus Torvalds -- */ -- --#include <linux/signal.h> --#include <linux/sched.h> --#include <linux/kernel.h> --#include <linux/errno.h> --#include <linux/string.h> --#include <linux/types.h> --#include <linux/ptrace.h> --#include <linux/mman.h> --#include <linux/mm.h> --#include <linux/smp.h> --#include <linux/interrupt.h> --#include <linux/init.h> --#include <linux/tty.h> --#include <linux/vt_kern.h> /* For unblank_screen() */ --#include <linux/highmem.h> --#include <linux/bootmem.h> /* for max_low_pfn */ --#include <linux/vmalloc.h> --#include <linux/module.h> --#include <linux/kprobes.h> --#include <linux/uaccess.h> --#include <linux/kdebug.h> --#include <linux/kprobes.h> -- --#include <asm/system.h> --#include <asm/desc.h> --#include <asm/segment.h> -- --extern void die(const char *,struct pt_regs *,long); -- --#ifdef CONFIG_KPROBES --static inline int notify_page_fault(struct pt_regs *regs) --{ -- int ret = 0; -- -- /* kprobe_running() needs smp_processor_id() */ -- if (!user_mode_vm(regs)) { -- preempt_disable(); -- if (kprobe_running() && kprobe_fault_handler(regs, 14)) -- ret = 1; -- preempt_enable(); -- } -- -- return ret; --} --#else --static inline int notify_page_fault(struct pt_regs *regs) --{ -- return 0; --} --#endif -- --/* -- * Return EIP plus the CS segment base. The segment limit is also -- * adjusted, clamped to the kernel/user address space (whichever is -- * appropriate), and returned in *eip_limit. -- * -- * The segment is checked, because it might have been changed by another -- * task between the original faulting instruction and here. -- * -- * If CS is no longer a valid code segment, or if EIP is beyond the -- * limit, or if it is a kernel address when CS is not a kernel segment, -- * then the returned value will be greater than *eip_limit. -- * -- * This is slow, but is very rarely executed. -- */ --static inline unsigned long get_segment_eip(struct pt_regs *regs, -- unsigned long *eip_limit) --{ -- unsigned long eip = regs->eip; -- unsigned seg = regs->xcs & 0xffff; -- u32 seg_ar, seg_limit, base, *desc; -- -- /* Unlikely, but must come before segment checks. */ -- if (unlikely(regs->eflags & VM_MASK)) { -- base = seg << 4; -- *eip_limit = base + 0xffff; -- return base + (eip & 0xffff); -- } -- -- /* The standard kernel/user address space limit. */ -- *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; -- -- /* By far the most common cases. */ -- if (likely(SEGMENT_IS_FLAT_CODE(seg))) -- return eip; -- -- /* Check the segment exists, is within the current LDT/GDT size, -- that kernel/user (ring 0..3) has the appropriate privilege, -- that it's a code segment, and get the limit. */ -- __asm__ ("larl %3,%0; lsll %3,%1" -- : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); -- if ((~seg_ar & 0x9800) || eip > seg_limit) { -- *eip_limit = 0; -- return 1; /* So that returned eip > *eip_limit. */ -- } -- -- /* Get the GDT/LDT descriptor base. -- When you look for races in this code remember that -- LDT and other horrors are only used in user space. */ -- if (seg & (1<<2)) { -- /* Must lock the LDT while reading it. */ -- mutex_lock(¤t->mm->context.lock); -- desc = current->mm->context.ldt; -- desc = (void *)desc + (seg & ~7); -- } else { -- /* Must disable preemption while reading the GDT. */ -- desc = (u32 *)get_cpu_gdt_table(get_cpu()); -- desc = (void *)desc + (seg & ~7); -- } -- -- /* Decode the code segment base from the descriptor */ -- base = get_desc_base((unsigned long *)desc); -- -- if (seg & (1<<2)) { -- mutex_unlock(¤t->mm->context.lock); -- } else -- put_cpu(); -- -- /* Adjust EIP and segment limit, and clamp at the kernel limit. -- It's legitimate for segments to wrap at 0xffffffff. */ -- seg_limit += base; -- if (seg_limit < *eip_limit && seg_limit >= base) -- *eip_limit = seg_limit; -- return eip + base; --} -- --/* -- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. -- * Check that here and ignore it. -- */ --static int __is_prefetch(struct pt_regs *regs, unsigned long addr) --{ -- unsigned long limit; -- unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); -- int scan_more = 1; -- int prefetch = 0; -- int i; -- -- for (i = 0; scan_more && i < 15; i++) { -- unsigned char opcode; -- unsigned char instr_hi; -- unsigned char instr_lo; -- -- if (instr > (unsigned char *)limit) -- break; -- if (probe_kernel_address(instr, opcode)) -- break; -- -- instr_hi = opcode & 0xf0; -- instr_lo = opcode & 0x0f; -- instr++; -- -- switch (instr_hi) { -- case 0x20: -- case 0x30: -- /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ -- scan_more = ((instr_lo & 7) == 0x6); -- break; -- -- case 0x60: -- /* 0x64 thru 0x67 are valid prefixes in all modes. */ -- scan_more = (instr_lo & 0xC) == 0x4; -- break; -- case 0xF0: -- /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ -- scan_more = !instr_lo || (instr_lo>>1) == 1; -- break; -- case 0x00: -- /* Prefetch instruction is 0x0F0D or 0x0F18 */ -- scan_more = 0; -- if (instr > (unsigned char *)limit) -- break; -- if (probe_kernel_address(instr, opcode)) -- break; -- prefetch = (instr_lo == 0xF) && -- (opcode == 0x0D || opcode == 0x18); -- break; -- default: -- scan_more = 0; -- break; -- } -- } -- return prefetch; --} -- --static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, -- unsigned long error_code) --{ -- if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && -- boot_cpu_data.x86 >= 6)) { -- /* Catch an obscure case of prefetch inside an NX page. */ -- if (nx_enabled && (error_code & 16)) -- return 0; -- return __is_prefetch(regs, addr); -- } -- return 0; --} -- --static noinline void force_sig_info_fault(int si_signo, int si_code, -- unsigned long address, struct task_struct *tsk) --{ -- siginfo_t info; -- -- info.si_signo = si_signo; -- info.si_errno = 0; -- info.si_code = si_code; -- info.si_addr = (void __user *)address; -- force_sig_info(si_signo, &info, tsk); --} -- --fastcall void do_invalid_op(struct pt_regs *, unsigned long); -- --#ifdef CONFIG_X86_PAE --static void dump_fault_path(unsigned long address) --{ -- unsigned long *p, page; -- unsigned long mfn; -- -- page = read_cr3(); -- p = (unsigned long *)__va(page); -- p += (address >> 30) * 2; -- printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]); -- if (p[0] & _PAGE_PRESENT) { -- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); -- page = mfn_to_pfn(mfn) << PAGE_SHIFT; -- p = (unsigned long *)__va(page); -- address &= 0x3fffffff; -- p += (address >> 21) * 2; -- printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n", -- page, p[1], p[0]); -- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); --#ifdef CONFIG_HIGHPTE -- if (mfn_to_pfn(mfn) >= highstart_pfn) -- return; --#endif -- if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { -- page = mfn_to_pfn(mfn) << PAGE_SHIFT; -- p = (unsigned long *) __va(page); -- address &= 0x001fffff; -- p += (address >> 12) * 2; -- printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n", -- page, p[1], p[0]); -- } -- } --} --#else --static void dump_fault_path(unsigned long address) --{ -- unsigned long page; -- -- page = read_cr3(); -- page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT]; -- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, -- machine_to_phys(page)); -- /* -- * We must not directly access the pte in the highpte -- * case if the page table is located in highmem. -- * And lets rather not kmap-atomic the pte, just in case -- * it's allocated already. -- */ -- if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn -- && (page & _PAGE_PRESENT) -- && !(page & _PAGE_PSE)) { -- page = machine_to_phys(page & PAGE_MASK); -- page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) -- & (PTRS_PER_PTE - 1)]; -- printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, -- machine_to_phys(page)); -- } --} --#endif -- --static int spurious_fault(struct pt_regs *regs, -- unsigned long address, -- unsigned long error_code) --{ -- pgd_t *pgd; -- pud_t *pud; -- pmd_t *pmd; -- pte_t *pte; -- -- /* Reserved-bit violation or user access to kernel space? */ -- if (error_code & 0x0c) -- return 0; -- -- pgd = init_mm.pgd + pgd_index(address); -- if (!pgd_present(*pgd)) -- return 0; -- -- pud = pud_offset(pgd, address); -- if (!pud_present(*pud)) -- return 0; -- -- pmd = pmd_offset(pud, address); -- if (!pmd_present(*pmd)) -- return 0; -- -- pte = pte_offset_kernel(pmd, address); -- if (!pte_present(*pte)) -- return 0; -- if ((error_code & 0x02) && !pte_write(*pte)) -- return 0; --#ifdef CONFIG_X86_PAE -- if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX)) -- return 0; --#endif -- -- return 1; --} -- --static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) --{ -- unsigned index = pgd_index(address); -- pgd_t *pgd_k; -- pud_t *pud, *pud_k; -- pmd_t *pmd, *pmd_k; -- -- pgd += index; -- pgd_k = init_mm.pgd + index; -- -- if (!pgd_present(*pgd_k)) -- return NULL; -- -- /* -- * set_pgd(pgd, *pgd_k); here would be useless on PAE -- * and redundant with the set_pmd() on non-PAE. As would -- * set_pud. -- */ -- -- pud = pud_offset(pgd, address); -- pud_k = pud_offset(pgd_k, address); -- if (!pud_present(*pud_k)) -- return NULL; -- -- pmd = pmd_offset(pud, address); -- pmd_k = pmd_offset(pud_k, address); -- if (!pmd_present(*pmd_k)) -- return NULL; -- if (!pmd_present(*pmd)) { -- bool lazy = x86_read_percpu(xen_lazy_mmu); -- -- x86_write_percpu(xen_lazy_mmu, false); --#if CONFIG_XEN_COMPAT > 0x030002 -- set_pmd(pmd, *pmd_k); --#else -- /* -- * When running on older Xen we must launder *pmd_k through -- * pmd_val() to ensure that _PAGE_PRESENT is correctly set. -- */ -- set_pmd(pmd, __pmd(pmd_val(*pmd_k))); --#endif -- x86_write_percpu(xen_lazy_mmu, lazy); -- } else -- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); -- return pmd_k; --} -- --/* -- * Handle a fault on the vmalloc or module mapping area -- * -- * This assumes no large pages in there. -- */ --static inline int vmalloc_fault(unsigned long address) --{ -- unsigned long pgd_paddr; -- pmd_t *pmd_k; -- pte_t *pte_k; -- /* -- * Synchronize this task's top level page-table -- * with the 'reference' page table. -- * -- * Do _not_ use "current" here. We might be inside -- * an interrupt in the middle of a task switch.. -- */ -- pgd_paddr = read_cr3(); -- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); -- if (!pmd_k) -- return -1; -- pte_k = pte_offset_kernel(pmd_k, address); -- if (!pte_present(*pte_k)) -- return -1; -- return 0; --} -- --int show_unhandled_signals = 1; -- --/* -- * This routine handles page faults. It determines the address, -- * and the problem, and then passes it off to one of the appropriate -- * routines. -- * -- * error_code: -- * bit 0 == 0 means no page found, 1 means protection fault -- * bit 1 == 0 means read, 1 means write -- * bit 2 == 0 means kernel, 1 means user-mode -- * bit 3 == 1 means use of reserved bit detected -- * bit 4 == 1 means fault was an instruction fetch -- */ --fastcall void __kprobes do_page_fault(struct pt_regs *regs, -- unsigned long error_code) --{ -- struct task_struct *tsk; -- struct mm_struct *mm; -- struct vm_area_struct * vma; -- unsigned long address; -- int write, si_code; -- int fault; -- -- /* -- * We can fault from pretty much anywhere, with unknown IRQ state. -- */ -- trace_hardirqs_fixup(); -- -- /* get the address */ -- address = read_cr2(); -- -- /* Set the "privileged fault" bit to something sane. */ -- error_code &= ~4; -- error_code |= (regs->xcs & 2) << 1; -- if (regs->eflags & X86_EFLAGS_VM) -- error_code |= 4; -- -- tsk = current; -- -- si_code = SEGV_MAPERR; -- -- /* -- * We fault-in kernel-space virtual memory on-demand. The -- * 'reference' page table is init_mm.pgd. -- * -- * NOTE! We MUST NOT take any locks for this case. We may -- * be in an interrupt or a critical region, and should -- * only copy the information from the master page table, -- * nothing more. -- * -- * This verifies that the fault happens in kernel space -- * (error_code & 4) == 0, and that the fault was not a -- * protection error (error_code & 9) == 0. -- */ -- if (unlikely(address >= TASK_SIZE)) { --#ifdef CONFIG_XEN -- /* Faults in hypervisor area can never be patched up. */ -- if (address >= hypervisor_virt_start) -- goto bad_area_nosemaphore; --#endif -- if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) -- return; -- /* Can take a spurious fault if mapping changes R/O -> R/W. */ -- if (spurious_fault(regs, address, error_code)) -- return; -- if (notify_page_fault(regs)) -- return; -- /* -- * Don't take the mm semaphore here. If we fixup a prefetch -- * fault we could otherwise deadlock. -- */ -- goto bad_area_nosemaphore; -- } -- -- if (notify_page_fault(regs)) -- return; -- -- /* It's safe to allow irq's after cr2 has been saved and the vmalloc -- fault has been handled. */ -- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) -- local_irq_enable(); -- -- mm = tsk->mm; -- -- /* -- * If we're in an interrupt, have no user context or are running in an -- * atomic region then we must not take the fault.. -- */ -- if (in_atomic() || !mm) -- goto bad_area_nosemaphore; -- -- /* When running in the kernel we expect faults to occur only to -- * addresses in user space. All other faults represent errors in the -- * kernel and should generate an OOPS. Unfortunately, in the case of an -- * erroneous fault occurring in a code path which already holds mmap_sem -- * we will deadlock attempting to validate the fault against the -- * address space. Luckily the kernel only validly references user -- * space from well defined areas of code, which are listed in the -- * exceptions table. -- * -- * As the vast majority of faults will be valid we will only perform -- * the source reference check when there is a possibility of a deadlock. -- * Attempt to lock the address space, if we cannot we then validate the -- * source. If this is invalid we can skip the address space check, -- * thus avoiding the deadlock. -- */ -- if (!down_read_trylock(&mm->mmap_sem)) { -- if ((error_code & 4) == 0 && -- !search_exception_tables(regs->eip)) -- goto bad_area_nosemaphore; -- down_read(&mm->mmap_sem); -- } -- -- vma = find_vma(mm, address); -- if (!vma) -- goto bad_area; -- if (vma->vm_start <= address) -- goto good_area; -- if (!(vma->vm_flags & VM_GROWSDOWN)) -- goto bad_area; -- if (error_code & 4) { -- /* -- * Accessing the stack below %esp is always a bug. -- * The large cushion allows instructions like enter -- * and pusha to work. ("enter $65535,$31" pushes -- * 32 pointers and then decrements %esp by 65535.) -- */ -- if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) -- goto bad_area; -- } -- if (expand_stack(vma, address)) -- goto bad_area; --/* -- * Ok, we have a good vm_area for this memory access, so -- * we can handle it.. -- */ --good_area: -- si_code = SEGV_ACCERR; -- write = 0; -- switch (error_code & 3) { -- default: /* 3: write, present */ -- /* fall through */ -- case 2: /* write, not present */ -- if (!(vma->vm_flags & VM_WRITE)) -- goto bad_area; -- write++; -- break; -- case 1: /* read, present */ -- goto bad_area; -- case 0: /* read, not present */ -- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) -- goto bad_area; -- } -- -- survive: -- /* -- * If for any reason at all we couldn't handle the fault, -- * make sure we exit gracefully rather than endlessly redo -- * the fault. -- */ -- fault = handle_mm_fault(mm, vma, address, write); -- if (unlikely(fault & VM_FAULT_ERROR)) { -- if (fault & VM_FAULT_OOM) -- goto out_of_memory; -- else if (fault & VM_FAULT_SIGBUS) -- goto do_sigbus; -- BUG(); -- } -- if (fault & VM_FAULT_MAJOR) -- tsk->maj_flt++; -- else -- tsk->min_flt++; -- -- /* -- * Did it hit the DOS screen memory VA from vm86 mode? -- */ -- if (regs->eflags & VM_MASK) { -- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; -- if (bit < 32) -- tsk->thread.screen_bitmap |= 1 << bit; -- } -- up_read(&mm->mmap_sem); -- return; -- --/* -- * Something tried to access memory that isn't in our memory map.. -- * Fix it, but check if it's kernel or user first.. -- */ --bad_area: -- up_read(&mm->mmap_sem); -- --bad_area_nosemaphore: -- /* User mode accesses just cause a SIGSEGV */ -- if (error_code & 4) { -- /* -- * It's possible to have interrupts off here. -- */ -- local_irq_enable(); -- -- /* -- * Valid to do another page fault here because this one came -- * from user space. -- */ -- if (is_prefetch(regs, address, error_code)) -- return; -- -- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && -- printk_ratelimit()) { -- printk("%s%s[%d]: segfault at %08lx eip %08lx " -- "esp %08lx error %lx\n", -- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, -- tsk->comm, task_pid_nr(tsk), address, regs->eip, -- regs->esp, error_code); -- } -- tsk->thread.cr2 = address; -- /* Kernel addresses are always protection faults */ -- tsk->thread.error_code = error_code | (address >= TASK_SIZE); -- tsk->thread.trap_no = 14; -- force_sig_info_fault(SIGSEGV, si_code, address, tsk); -- return; -- } -- --#ifdef CONFIG_X86_F00F_BUG -- /* -- * Pentium F0 0F C7 C8 bug workaround. -- */ -- if (boot_cpu_data.f00f_bug) { -- unsigned long nr; -- -- nr = (address - idt_descr.address) >> 3; -- -- if (nr == 6) { -- do_invalid_op(regs, 0); -- return; -- } -- } --#endif -- --no_context: -- /* Are we prepared to handle this kernel fault? */ -- if (fixup_exception(regs)) -- return; -- -- /* -- * Valid to do another page fault here, because if this fault -- * had been triggered by is_prefetch fixup_exception would have -- * handled it. -- */ -- if (is_prefetch(regs, address, error_code)) -- return; -- --/* -- * Oops. The kernel tried to access some bad page. We'll have to -- * terminate things with extreme prejudice. -- */ -- -- bust_spinlocks(1); -- -- if (oops_may_print()) { --#ifdef CONFIG_X86_PAE -- if (error_code & 16) { -- pte_t *pte = lookup_address(address); -- -- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) -- printk(KERN_CRIT "kernel tried to execute " -- "NX-protected page - exploit attempt? " -- "(uid: %d)\n", current->uid); -- } --#endif -- if (address < PAGE_SIZE) -- printk(KERN_ALERT "BUG: unable to handle kernel NULL " -- "pointer dereference"); -- else -- printk(KERN_ALERT "BUG: unable to handle kernel paging" -- " request"); -- printk(" at virtual address %08lx\n",address); -- printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); -- dump_fault_path(address); -- } -- tsk->thread.cr2 = address; -- tsk->thread.trap_no = 14; -- tsk->thread.error_code = error_code; -- die("Oops", regs, error_code); -- bust_spinlocks(0); -- do_exit(SIGKILL); -- --/* -- * We ran out of memory, or some other thing happened to us that made -- * us unable to handle the page fault gracefully. -- */ --out_of_memory: -- up_read(&mm->mmap_sem); -- if (is_global_init(tsk)) { -- yield(); -- down_read(&mm->mmap_sem); -- goto survive; -- } -- printk("VM: killing process %s\n", tsk->comm); -- if (error_code & 4) -- do_group_exit(SIGKILL); -- goto no_context; -- --do_sigbus: -- up_read(&mm->mmap_sem); -- -- /* Kernel mode? Handle exceptions or die */ -- if (!(error_code & 4)) -- goto no_context; -- -- /* User space => ok to do another page fault */ -- if (is_prefetch(regs, address, error_code)) -- return; -- -- tsk->thread.cr2 = address; -- tsk->thread.error_code = error_code; -- tsk->thread.trap_no = 14; -- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); --} -- --void vmalloc_sync_all(void) --{ -- /* -- * Note that races in the updates of insync and start aren't -- * problematic: insync can only get set bits added, and updates to -- * start are only improving performance (without affecting correctness -- * if undone). -- * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. -- * This change works just fine with 2-level paging too. -- */ --#define sync_index(a) ((a) >> PMD_SHIFT) -- static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); -- static unsigned long start = TASK_SIZE; -- unsigned long address; -- -- if (SHARED_KERNEL_PMD) -- return; -- -- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); -- for (address = start; -- address >= TASK_SIZE && address < hypervisor_virt_start; -- address += 1UL << PMD_SHIFT) { -- if (!test_bit(sync_index(address), insync)) { -- unsigned long flags; -- struct page *page; -- -- spin_lock_irqsave(&pgd_lock, flags); -- /* XEN: failure path assumes non-empty pgd_list. */ -- if (unlikely(!pgd_list)) { -- spin_unlock_irqrestore(&pgd_lock, flags); -- return; -- } -- for (page = pgd_list; page; page = -- (struct page *)page->index) { -- spinlock_t *lock = page->mapping -- ? &((struct mm_struct *)page->mapping) -- ->page_table_lock -- : NULL; -- pmd_t *pmd; -- -- if (lock) -- spin_lock(lock); -- pmd = vmalloc_sync_one(page_address(page), -- address); -- if (lock) -- spin_unlock(lock); -- -- if (!pmd) { -- BUG_ON(page != pgd_list); -- break; -- } -- } -- spin_unlock_irqrestore(&pgd_lock, flags); -- if (!page) -- set_bit(sync_index(address), insync); -- } -- if (address == start && test_bit(sync_index(address), insync)) -- start = address + (1UL << PMD_SHIFT); -- } --} ---- head.orig/arch/x86/mm/fault_64-xen.c 2011-07-26 09:28:01.000000000 +0200 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 -@@ -1,693 +0,0 @@ --/* -- * linux/arch/x86-64/mm/fault.c -- * -- * Copyright (C) 1995 Linus Torvalds -- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. -- */ -- --#include <linux/signal.h> --#include <linux/sched.h> --#include <linux/kernel.h> --#include <linux/errno.h> --#include <linux/string.h> --#include <linux/types.h> --#include <linux/ptrace.h> --#include <linux/mman.h> --#include <linux/mm.h> --#include <linux/smp.h> --#include <linux/interrupt.h> --#include <linux/init.h> --#include <linux/tty.h> --#include <linux/vt_kern.h> /* For unblank_screen() */ --#include <linux/compiler.h> --#include <linux/vmalloc.h> --#include <linux/module.h> --#include <linux/kprobes.h> --#include <linux/uaccess.h> --#include <linux/kdebug.h> --#include <linux/kprobes.h> -- --#include <asm/system.h> --#include <asm/pgalloc.h> --#include <asm/smp.h> --#include <asm/tlbflush.h> --#include <asm/proto.h> --#include <asm-generic/sections.h> -- --/* Page fault error code bits */ --#define PF_PROT (1<<0) /* or no page found */ --#define PF_WRITE (1<<1) --#define PF_USER (1<<2) --#define PF_RSVD (1<<3) --#define PF_INSTR (1<<4) -- --#ifdef CONFIG_KPROBES --static inline int notify_page_fault(struct pt_regs *regs) --{ -- int ret = 0; -- -- /* kprobe_running() needs smp_processor_id() */ -- if (!user_mode(regs)) { -- preempt_disable(); -- if (kprobe_running() && kprobe_fault_handler(regs, 14)) -- ret = 1; -- preempt_enable(); -- } -- -- return ret; --} --#else --static inline int notify_page_fault(struct pt_regs *regs) --{ -- return 0; --} --#endif -- --/* Sometimes the CPU reports invalid exceptions on prefetch. -- Check that here and ignore. -- Opcode checker based on code by Richard Brunner */ --static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, -- unsigned long error_code) --{ -- unsigned char *instr; -- int scan_more = 1; -- int prefetch = 0; -- unsigned char *max_instr; -- -- /* If it was a exec fault ignore */ -- if (error_code & PF_INSTR) -- return 0; -- -- instr = (unsigned char __user *)convert_rip_to_linear(current, regs); -- max_instr = instr + 15; -- -- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) -- return 0; -- -- while (scan_more && instr < max_instr) { -- unsigned char opcode; -- unsigned char instr_hi; -- unsigned char instr_lo; -- -- if (probe_kernel_address(instr, opcode)) -- break; -- -- instr_hi = opcode & 0xf0; -- instr_lo = opcode & 0x0f; -- instr++; -- -- switch (instr_hi) { -- case 0x20: -- case 0x30: -- /* Values 0x26,0x2E,0x36,0x3E are valid x86 -- prefixes. In long mode, the CPU will signal -- invalid opcode if some of these prefixes are -- present so we will never get here anyway */ -- scan_more = ((instr_lo & 7) == 0x6); -- break; -- -- case 0x40: -- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes -- Need to figure out under what instruction mode the -- instruction was issued ... */ -- /* Could check the LDT for lm, but for now it's good -- enough to assume that long mode only uses well known -- segments or kernel. */ -- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS) -- || (regs->cs == FLAT_USER_CS64); -- break; -- -- case 0x60: -- /* 0x64 thru 0x67 are valid prefixes in all modes. */ -- scan_more = (instr_lo & 0xC) == 0x4; -- break; -- case 0xF0: -- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ -- scan_more = !instr_lo || (instr_lo>>1) == 1; -- break; -- case 0x00: -- /* Prefetch instruction is 0x0F0D or 0x0F18 */ -- scan_more = 0; -- if (probe_kernel_address(instr, opcode)) -- break; -- prefetch = (instr_lo == 0xF) && -- (opcode == 0x0D || opcode == 0x18); -- break; -- default: -- scan_more = 0; -- break; -- } -- } -- return prefetch; --} -- --static int bad_address(void *p) --{ -- unsigned long dummy; -- return probe_kernel_address((unsigned long *)p, dummy); --} -- --void dump_pagetable(unsigned long address) --{ -- pgd_t *pgd; -- pud_t *pud; -- pmd_t *pmd; -- pte_t *pte; -- -- pgd = (pgd_t *)read_cr3(); -- -- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); -- pgd += pgd_index(address); -- if (bad_address(pgd)) goto bad; -- printk("PGD %lx ", pgd_val(*pgd)); -- if (!pgd_present(*pgd)) goto ret; -- -- pud = pud_offset(pgd, address); -- if (bad_address(pud)) goto bad; -- printk("PUD %lx ", pud_val(*pud)); -- if (!pud_present(*pud)) goto ret; -- -- pmd = pmd_offset(pud, address); -- if (bad_address(pmd)) goto bad; -- printk("PMD %lx ", pmd_val(*pmd)); -- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; -- -- pte = pte_offset_kernel(pmd, address); -- if (bad_address(pte)) goto bad; -- printk("PTE %lx", pte_val(*pte)); --ret: -- printk("\n"); -- return; --bad: -- printk("BAD\n"); --} -- --static const char errata93_warning[] = --KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" --KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" --KERN_ERR "******* Please consider a BIOS update.\n" --KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; -- --/* Workaround for K8 erratum #93 & buggy BIOS. -- BIOS SMM functions are required to use a specific workaround -- to avoid corruption of the 64bit RIP register on C stepping K8. -- A lot of BIOS that didn't get tested properly miss this. -- The OS sees this as a page fault with the upper 32bits of RIP cleared. -- Try to work around it here. -- Note we only handle faults in kernel here. */ -- --static int is_errata93(struct pt_regs *regs, unsigned long address) --{ -- static int warned; -- if (address != regs->rip) -- return 0; -- if ((address >> 32) != 0) -- return 0; -- address |= 0xffffffffUL << 32; -- if ((address >= (u64)_stext && address <= (u64)_etext) || -- (address >= MODULES_VADDR && address <= MODULES_END)) { -- if (!warned) { -- printk(errata93_warning); -- warned = 1; -- } -- regs->rip = address; -- return 1; -- } -- return 0; --} -- --static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, -- unsigned long error_code) --{ -- unsigned long flags = oops_begin(); -- struct task_struct *tsk; -- -- printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", -- current->comm, address); -- dump_pagetable(address); -- tsk = current; -- tsk->thread.cr2 = address; -- tsk->thread.trap_no = 14; -- tsk->thread.error_code = error_code; -- __die("Bad pagetable", regs, error_code); -- oops_end(flags); -- do_exit(SIGKILL); --} -- --/* -- * Handle a fault on the vmalloc area -- * -- * This assumes no large pages in there. -- */ --static int vmalloc_fault(unsigned long address) --{ -- pgd_t *pgd, *pgd_ref; -- pud_t *pud, *pud_ref; -- pmd_t *pmd, *pmd_ref; -- pte_t *pte, *pte_ref; -- -- /* Copy kernel mappings over when needed. This can also -- happen within a race in page table update. In the later -- case just flush. */ -- -- /* On Xen the line below does not always work. Needs investigating! */ -- /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ -- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); -- pgd += pgd_index(address); -- pgd_ref = pgd_offset_k(address); -- if (pgd_none(*pgd_ref)) -- return -1; -- if (pgd_none(*pgd)) -- set_pgd(pgd, *pgd_ref); -- else -- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); -- -- /* Below here mismatches are bugs because these lower tables -- are shared */ -- -- pud = pud_offset(pgd, address); -- pud_ref = pud_offset(pgd_ref, address); -- if (pud_none(*pud_ref)) -- return -1; -- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) -- BUG(); -- pmd = pmd_offset(pud, address); -- pmd_ref = pmd_offset(pud_ref, address); -- if (pmd_none(*pmd_ref)) -- return -1; -- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) -- BUG(); -- pte_ref = pte_offset_kernel(pmd_ref, address); -- if (!pte_present(*pte_ref)) -- return -1; -- pte = pte_offset_kernel(pmd, address); -- /* Don't use pte_page here, because the mappings can point -- outside mem_map, and the NUMA hash lookup cannot handle -- that. */ -- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) -- BUG(); -- return 0; --} -- --int show_unhandled_signals = 1; -- -- --#define MEM_VERBOSE 1 -- --#ifdef MEM_VERBOSE --#define MEM_LOG(_f, _a...) \ -- printk("fault.c:[%d]-> " _f "\n", \ -- __LINE__ , ## _a ) --#else --#define MEM_LOG(_f, _a...) ((void)0) --#endif -- --static int spurious_fault(struct pt_regs *regs, -- unsigned long address, -- unsigned long error_code) --{ -- pgd_t *pgd; -- pud_t *pud; -- pmd_t *pmd; -- pte_t *pte; -- --#ifdef CONFIG_XEN -- /* Faults in hypervisor area are never spurious. */ -- if ((address >= HYPERVISOR_VIRT_START) && -- (address < HYPERVISOR_VIRT_END)) -- return 0; --#endif -- -- /* Reserved-bit violation or user access to kernel space? */ -- if (error_code & (PF_RSVD|PF_USER)) -- return 0; -- -- pgd = init_mm.pgd + pgd_index(address); -- if (!pgd_present(*pgd)) -- return 0; -- -- pud = pud_offset(pgd, address); -- if (!pud_present(*pud)) -- return 0; -- -- pmd = pmd_offset(pud, address); -- if (!pmd_present(*pmd)) -- return 0; -- -- pte = pte_offset_kernel(pmd, address); -- if (!pte_present(*pte)) -- return 0; -- if ((error_code & PF_WRITE) && !pte_write(*pte)) -- return 0; -- if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX)) -- return 0; -- -- return 1; --} -- --/* -- * This routine handles page faults. It determines the address, -- * and the problem, and then passes it off to one of the appropriate -- * routines. -- */ --asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, -- unsigned long error_code) --{ -- struct task_struct *tsk; -- struct mm_struct *mm; -- struct vm_area_struct * vma; -- unsigned long address; -- const struct exception_table_entry *fixup; -- int write, fault; -- unsigned long flags; -- siginfo_t info; -- -- if (!user_mode(regs)) -- error_code &= ~PF_USER; /* means kernel */ -- -- /* -- * We can fault from pretty much anywhere, with unknown IRQ state. -- */ -- trace_hardirqs_fixup(); -- -- tsk = current; -- mm = tsk->mm; -- prefetchw(&mm->mmap_sem); -- -- /* get the address */ -- address = read_cr2(); -- -- info.si_code = SEGV_MAPERR; -- -- -- /* -- * We fault-in kernel-space virtual memory on-demand. The -- * 'reference' page table is init_mm.pgd. -- * -- * NOTE! We MUST NOT take any locks for this case. We may -- * be in an interrupt or a critical region, and should -- * only copy the information from the master page table, -- * nothing more. -- * -- * This verifies that the fault happens in kernel space -- * (error_code & 4) == 0, and that the fault was not a -- * protection error (error_code & 9) == 0. -- */ -- if (unlikely(address >= TASK_SIZE64)) { -- /* -- * Don't check for the module range here: its PML4 -- * is always initialized because it's shared with the main -- * kernel text. Only vmalloc may need PML4 syncups. -- */ -- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && -- ((address >= VMALLOC_START && address < VMALLOC_END))) { -- if (vmalloc_fault(address) >= 0) -- return; -- } -- /* Can take a spurious fault if mapping changes R/O -> R/W. */ -- if (spurious_fault(regs, address, error_code)) -- return; -- if (notify_page_fault(regs)) -- return; -- /* -- * Don't take the mm semaphore here. If we fixup a prefetch -- * fault we could otherwise deadlock. -- */ -- goto bad_area_nosemaphore; -- } -- -- if (notify_page_fault(regs)) -- return; -- -- if (likely(regs->eflags & X86_EFLAGS_IF)) -- local_irq_enable(); -- -- if (unlikely(error_code & PF_RSVD)) -- pgtable_bad(address, regs, error_code); -- -- /* -- * If we're in an interrupt or have no user -- * context, we must not take the fault.. -- */ -- if (unlikely(in_atomic() || !mm)) -- goto bad_area_nosemaphore; -- -- /* -- * User-mode registers count as a user access even for any -- * potential system fault or CPU buglet. -- */ -- if (user_mode_vm(regs)) -- error_code |= PF_USER; -- -- again: -- /* When running in the kernel we expect faults to occur only to -- * addresses in user space. All other faults represent errors in the -- * kernel and should generate an OOPS. Unfortunately, in the case of an -- * erroneous fault occurring in a code path which already holds mmap_sem -- * we will deadlock attempting to validate the fault against the -- * address space. Luckily the kernel only validly references user -- * space from well defined areas of code, which are listed in the -- * exceptions table. -- * -- * As the vast majority of faults will be valid we will only perform -- * the source reference check when there is a possibility of a deadlock. -- * Attempt to lock the address space, if we cannot we then validate the -- * source. If this is invalid we can skip the address space check, -- * thus avoiding the deadlock. -- */ -- if (!down_read_trylock(&mm->mmap_sem)) { -- if ((error_code & PF_USER) == 0 && -- !search_exception_tables(regs->rip)) -- goto bad_area_nosemaphore; -- down_read(&mm->mmap_sem); -- } -- -- vma = find_vma(mm, address); -- if (!vma) -- goto bad_area; -- if (likely(vma->vm_start <= address)) -- goto good_area; -- if (!(vma->vm_flags & VM_GROWSDOWN)) -- goto bad_area; -- if (error_code & 4) { -- /* Allow userspace just enough access below the stack pointer -- * to let the 'enter' instruction work. -- */ -- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp) -- goto bad_area; -- } -- if (expand_stack(vma, address)) -- goto bad_area; --/* -- * Ok, we have a good vm_area for this memory access, so -- * we can handle it.. -- */ --good_area: -- info.si_code = SEGV_ACCERR; -- write = 0; -- switch (error_code & (PF_PROT|PF_WRITE)) { -- default: /* 3: write, present */ -- /* fall through */ -- case PF_WRITE: /* write, not present */ -- if (!(vma->vm_flags & VM_WRITE)) -- goto bad_area; -- write++; -- break; -- case PF_PROT: /* read, present */ -- goto bad_area; -- case 0: /* read, not present */ -- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) -- goto bad_area; -- } -- -- /* -- * If for any reason at all we couldn't handle the fault, -- * make sure we exit gracefully rather than endlessly redo -- * the fault. -- */ -- fault = handle_mm_fault(mm, vma, address, write); -- if (unlikely(fault & VM_FAULT_ERROR)) { -- if (fault & VM_FAULT_OOM) -- goto out_of_memory; -- else if (fault & VM_FAULT_SIGBUS) -- goto do_sigbus; -- BUG(); -- } -- if (fault & VM_FAULT_MAJOR) -- tsk->maj_flt++; -- else -- tsk->min_flt++; -- up_read(&mm->mmap_sem); -- return; -- --/* -- * Something tried to access memory that isn't in our memory map.. -- * Fix it, but check if it's kernel or user first.. -- */ --bad_area: -- up_read(&mm->mmap_sem); -- --bad_area_nosemaphore: -- /* User mode accesses just cause a SIGSEGV */ -- if (error_code & PF_USER) { -- -- /* -- * It's possible to have interrupts off here. -- */ -- local_irq_enable(); -- -- if (is_prefetch(regs, address, error_code)) -- return; -- -- /* Work around K8 erratum #100 K8 in compat mode -- occasionally jumps to illegal addresses >4GB. We -- catch this here in the page fault handler because -- these addresses are not reachable. Just detect this -- case and return. Any code segment in LDT is -- compatibility mode. */ -- if ((regs->cs == __USER32_CS || regs->cs == FLAT_USER_CS32 || -- (regs->cs & (1<<2))) && (address >> 32)) -- return; -- -- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && -- printk_ratelimit()) { -- printk( -- "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", -- tsk->pid > 1 ? KERN_INFO : KERN_EMERG, -- tsk->comm, tsk->pid, address, regs->rip, -- regs->rsp, error_code); -- } -- -- tsk->thread.cr2 = address; -- /* Kernel addresses are always protection faults */ -- tsk->thread.error_code = error_code | (address >= TASK_SIZE); -- tsk->thread.trap_no = 14; -- info.si_signo = SIGSEGV; -- info.si_errno = 0; -- /* info.si_code has been set above */ -- info.si_addr = (void __user *)address; -- force_sig_info(SIGSEGV, &info, tsk); -- return; -- } -- --no_context: -- -- /* Are we prepared to handle this kernel fault? */ -- fixup = search_exception_tables(regs->rip); -- if (fixup) { -- regs->rip = fixup->fixup; -- return; -- } -- -- /* -- * Hall of shame of CPU/BIOS bugs. -- */ -- -- if (is_prefetch(regs, address, error_code)) -- return; -- -- if (is_errata93(regs, address)) -- return; -- --/* -- * Oops. The kernel tried to access some bad page. We'll have to -- * terminate things with extreme prejudice. -- */ -- -- flags = oops_begin(); -- -- if (address < PAGE_SIZE) -- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); -- else -- printk(KERN_ALERT "Unable to handle kernel paging request"); -- printk(" at %016lx RIP: \n" KERN_ALERT,address); -- printk_address(regs->rip); -- dump_pagetable(address); -- tsk->thread.cr2 = address; -- tsk->thread.trap_no = 14; -- tsk->thread.error_code = error_code; -- __die("Oops", regs, error_code); -- /* Executive summary in case the body of the oops scrolled away */ -- printk(KERN_EMERG "CR2: %016lx\n", address); -- oops_end(flags); -- do_exit(SIGKILL); -- --/* -- * We ran out of memory, or some other thing happened to us that made -- * us unable to handle the page fault gracefully. -- */ --out_of_memory: -- up_read(&mm->mmap_sem); -- if (is_global_init(current)) { -- yield(); -- goto again; -- } -- printk("VM: killing process %s\n", tsk->comm); -- if (error_code & 4) -- do_group_exit(SIGKILL); -- goto no_context; -- --do_sigbus: -- up_read(&mm->mmap_sem); -- -- /* Kernel mode? Handle exceptions or die */ -- if (!(error_code & PF_USER)) -- goto no_context; -- -- tsk->thread.cr2 = address; -- tsk->thread.error_code = error_code; -- tsk->thread.trap_no = 14; -- info.si_signo = SIGBUS; -- info.si_errno = 0; -- info.si_code = BUS_ADRERR; -- info.si_addr = (void __user *)address; -- force_sig_info(SIGBUS, &info, tsk); -- return; --} -- --DEFINE_SPINLOCK(pgd_lock); --LIST_HEAD(pgd_list); -- --#define pgd_page_table(what, pg) \ -- spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock) -- --void vmalloc_sync_all(void) --{ -- /* Note that races in the updates of insync and start aren't -- problematic: -- insync can only get set bits added, and updates to start are only -- improving performance (without affecting correctness if undone). */ -- static DECLARE_BITMAP(insync, PTRS_PER_PGD); -- static unsigned long start = VMALLOC_START & PGDIR_MASK; -- unsigned long address; -- -- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { -- if (!test_bit(pgd_index(address), insync)) { -- const pgd_t *pgd_ref = pgd_offset_k(address); -- struct page *page; -- -- if (pgd_none(*pgd_ref)) -- continue; -- spin_lock(&pgd_lock); -- list_for_each_entry(page, &pgd_list, lru) { -- pgd_t *pgd; -- pgd = (pgd_t *)page_address(page) + pgd_index(address); -- -- pgd_page_table(lock, page); -- if (pgd_none(*pgd)) -- set_pgd(pgd, *pgd_ref); -- else -- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); -- pgd_page_table(unlock, page); -- } -- spin_unlock(&pgd_lock); -- set_bit(pgd_index(address), insync); -- } -- if (address == start) -- start = address + PGDIR_SIZE; -- } -- /* Check that there is no need to do the same for the modules area. */ -- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); -- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == -- (__START_KERNEL & PGDIR_MASK))); --} + } --- head.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head/arch/x86/mm/highmem_32-xen.c 2011-01-31 18:01:51.000000000 +0100 @@ -18,6 +18,49 @@ void kunmap(struct page *page) @@ -15810,7 +14443,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) vdso32-images = $(vdso32.so-y:%=vdso32-%.so) ---- head.orig/arch/x86/vdso/vdso32/syscall.S 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/vdso/vdso32/syscall.S 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/vdso/vdso32/syscall.S 2011-01-31 18:01:51.000000000 +0100 @@ -19,8 +19,10 @@ __kernel_vsyscall: .Lpush_ebp: @@ -15823,7 +14456,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches movl %ebp, %ecx popl %ebp .Lpop_ebp: ---- head.orig/arch/x86/vdso/vdso32.S 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/vdso/vdso32.S 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/vdso/vdso32.S 2011-01-31 18:01:51.000000000 +0100 @@ -19,4 +19,16 @@ vdso32_sysenter_start: .incbin "arch/x86/vdso/vdso32-sysenter.so" @@ -17513,8 +16146,8 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches + spinning->ticket == token) { +#if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING + token = spinning->irq_count -+ < per_cpu(_irq_count, cpu) -+ ? ticket_drop(spinning, token, cpu) : -2; ++ < per_cpu(_irq_count, cpu) ++ ? ticket_drop(spinning, token, cpu) : -2; +#endif + break; + } @@ -17714,7 +16347,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches .resume = xenoprof_resume, .suspend = xenoprof_suspend }; ---- head.orig/arch/x86/include/uapi/asm/e820.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/include/uapi/asm/e820.h 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/include/uapi/asm/e820.h 2013-01-08 11:47:19.000000000 +0100 @@ -60,7 +60,11 @@ struct e820map { struct e820entry map[E820_X_MAX]; @@ -17728,7 +16361,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches #define ISA_END_ADDRESS 0x100000 #define BIOS_BEGIN 0x000a0000 ---- head.orig/arch/x86/include/asm/hardirq.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/include/asm/hardirq.h 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/include/asm/hardirq.h 2013-05-23 17:11:22.000000000 +0200 @@ -21,11 +21,15 @@ typedef struct { #ifdef CONFIG_SMP @@ -25184,7 +23817,7 @@ Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches void leave_mm(int cpu); #else static inline void leave_mm(int cpu) ---- head.orig/arch/x86/include/asm/ptrace.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/include/asm/ptrace.h 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/include/asm/ptrace.h 2013-01-08 11:47:39.000000000 +0100 @@ -224,7 +224,9 @@ static inline unsigned long regs_get_ker } diff --git a/patches.xen/xen3-patch-2.6.26 b/patches.xen/xen3-patch-2.6.26 index 1658d1844d..e0453e121a 100644 --- a/patches.xen/xen3-patch-2.6.26 +++ b/patches.xen/xen3-patch-2.6.26 @@ -9,7 +9,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches 3.1/init/Kconfig (done differently) --- head.orig/arch/x86/Kconfig 2013-05-23 17:11:13.000000000 +0200 -+++ head/arch/x86/Kconfig 2013-05-23 17:17:20.000000000 +0200 ++++ head/arch/x86/Kconfig 2013-07-02 09:36:56.000000000 +0200 @@ -53,7 +53,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select SYSCTL_EXCEPTION_TRACE @@ -36,7 +36,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches ---help--- Allow the kernel linear mapping to use 1GB pages on CPUs that support it. This can improve the kernel's performance a tiny bit by -@@ -2422,6 +2423,4 @@ source "crypto/Kconfig" +@@ -2423,6 +2424,4 @@ source "crypto/Kconfig" source "arch/x86/kvm/Kconfig" @@ -689,7 +689,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches } void mtrr_ap_init(void) ---- head.orig/arch/x86/kernel/cpu/bugs.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/kernel/cpu/bugs.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/kernel/cpu/bugs.c 2013-05-23 17:17:14.000000000 +0200 @@ -75,10 +75,12 @@ static void __init check_fpu(void) @@ -704,7 +704,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches } void __init check_bugs(void) ---- head.orig/arch/x86/kernel/cpu/proc.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/kernel/cpu/proc.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/kernel/cpu/proc.c 2013-05-23 17:16:30.000000000 +0200 @@ -10,7 +10,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, @@ -1797,7 +1797,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches return error; } ---- head.orig/arch/x86/kernel/mmconf-fam10h_64.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/kernel/mmconf-fam10h_64.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/kernel/mmconf-fam10h_64.c 2011-01-31 18:07:35.000000000 +0100 @@ -205,12 +205,20 @@ void __cpuinit fam10h_check_enable_mmcfg return; @@ -5955,1020 +5955,39 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches - .stop = c_stop, - .show = show_cpuinfo, -}; ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- head.orig/arch/x86/kernel/smp-xen.c 2011-01-31 18:01:51.000000000 +0100 +++ head/arch/x86/kernel/smp-xen.c 2011-01-31 18:07:35.000000000 +0100 -@@ -0,0 +1,327 @@ -+/* -+ * Intel SMP support routines. -+ * -+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> -+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> -+ * (c) 2002,2003 Andi Kleen, SuSE Labs. -+ * +@@ -5,6 +5,8 @@ + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * + * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> + * -+ * This code is released under the GNU General Public License version 2 or -+ * later. -+ */ -+ -+#include <linux/init.h> -+ -+#include <linux/mm.h> -+#include <linux/delay.h> -+#include <linux/spinlock.h> -+#include <linux/kernel_stat.h> -+#include <linux/mc146818rtc.h> + * This code is released under the GNU General Public License version 2 or + * later. + */ +@@ -14,289 +16,108 @@ + #include <linux/mm.h> + #include <linux/delay.h> + #include <linux/spinlock.h> +-#include <linux/smp.h> + #include <linux/kernel_stat.h> + #include <linux/mc146818rtc.h> +#include <linux/cache.h> -+#include <linux/interrupt.h> + #include <linux/interrupt.h> +#include <linux/cpu.h> -+ -+#include <asm/mtrr.h> -+#include <asm/tlbflush.h> -+#include <asm/mmu_context.h> -+#include <asm/proto.h> -+#include <mach_ipi.h> -+#include <xen/evtchn.h> -+/* -+ * Some notes on x86 processor bugs affecting SMP operation: -+ * -+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. -+ * The Linux implications for SMP are handled as follows: -+ * -+ * Pentium III / [Xeon] -+ * None of the E1AP-E3AP errata are visible to the user. -+ * -+ * E1AP. see PII A1AP -+ * E2AP. see PII A2AP -+ * E3AP. see PII A3AP -+ * -+ * Pentium II / [Xeon] -+ * None of the A1AP-A3AP errata are visible to the user. -+ * -+ * A1AP. see PPro 1AP -+ * A2AP. see PPro 2AP -+ * A3AP. see PPro 7AP -+ * -+ * Pentium Pro -+ * None of 1AP-9AP errata are visible to the normal user, -+ * except occasional delivery of 'spurious interrupt' as trap #15. -+ * This is very rare and a non-problem. -+ * -+ * 1AP. Linux maps APIC as non-cacheable -+ * 2AP. worked around in hardware -+ * 3AP. fixed in C0 and above steppings microcode update. -+ * Linux does not use excessive STARTUP_IPIs. -+ * 4AP. worked around in hardware -+ * 5AP. symmetric IO mode (normal Linux operation) not affected. -+ * 'noapic' mode has vector 0xf filled out properly. -+ * 6AP. 'noapic' mode might be affected - fixed in later steppings -+ * 7AP. We do not assume writes to the LVT deassering IRQs -+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup -+ * 9AP. We do not use mixed mode -+ * -+ * Pentium -+ * There is a marginal case where REP MOVS on 100MHz SMP -+ * machines with B stepping processors can fail. XXX should provide -+ * an L1cache=Writethrough or L1cache=off option. -+ * -+ * B stepping CPUs may hang. There are hardware work arounds -+ * for this. We warn about it in case your board doesn't have the work -+ * arounds. Basically that's so I can tell anyone with a B stepping -+ * CPU and SMP problems "tough". -+ * -+ * Specific items [From Pentium Processor Specification Update] -+ * -+ * 1AP. Linux doesn't use remote read -+ * 2AP. Linux doesn't trust APIC errors -+ * 3AP. We work around this -+ * 4AP. Linux never generated 3 interrupts of the same priority -+ * to cause a lost local interrupt. -+ * 5AP. Remote read is never used -+ * 6AP. not affected - worked around in hardware -+ * 7AP. not affected - worked around in hardware -+ * 8AP. worked around in hardware - we get explicit CS errors if not -+ * 9AP. only 'noapic' mode affected. Might generate spurious -+ * interrupts, we log only the first one and count the -+ * rest silently. -+ * 10AP. not affected - worked around in hardware -+ * 11AP. Linux reads the APIC between writes to avoid this, as per -+ * the documentation. Make sure you preserve this as it affects -+ * the C stepping chips too. -+ * 12AP. not affected - worked around in hardware -+ * 13AP. not affected - worked around in hardware -+ * 14AP. we always deassert INIT during bootup -+ * 15AP. not affected - worked around in hardware -+ * 16AP. not affected - worked around in hardware -+ * 17AP. not affected - worked around in hardware -+ * 18AP. not affected - worked around in hardware -+ * 19AP. not affected - worked around in BIOS -+ * -+ * If this sounds worrying believe me these bugs are either ___RARE___, -+ * or are signal timing bugs worked around in hardware and there's -+ * about nothing of note with C stepping upwards. -+ */ -+ -+/* -+ * this function sends a 'reschedule' IPI to another CPU. -+ * it goes straight through and wastes no time serializing -+ * anything. Worst case is that we lose a reschedule ... -+ */ -+void xen_smp_send_reschedule(int cpu) -+{ -+ if (unlikely(cpu_is_offline(cpu))) { -+ WARN_ON(1); -+ return; -+ } -+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -+} -+ -+/* -+ * Structure and data for smp_call_function(). This is designed to minimise -+ * static memory requirements. It also looks cleaner. -+ */ -+static DEFINE_SPINLOCK(call_lock); -+ -+struct call_data_struct { -+ void (*func) (void *info); -+ void *info; -+ atomic_t started; -+ atomic_t finished; -+ int wait; -+}; -+ -+void lock_ipi_call_lock(void) -+{ -+ spin_lock_irq(&call_lock); -+} -+ -+void unlock_ipi_call_lock(void) -+{ -+ spin_unlock_irq(&call_lock); -+} -+ -+static struct call_data_struct *call_data; -+ -+static void __smp_call_function(void (*func) (void *info), void *info, -+ int nonatomic, int wait) -+{ -+ struct call_data_struct data; -+ int cpus = num_online_cpus() - 1; -+ -+ if (!cpus) -+ return; -+ -+ data.func = func; -+ data.info = info; -+ atomic_set(&data.started, 0); -+ data.wait = wait; -+ if (wait) -+ atomic_set(&data.finished, 0); -+ -+ call_data = &data; -+ mb(); -+ -+ /* Send a message to all other CPUs and wait for them to respond */ -+ send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ -+ /* Wait for response */ -+ while (atomic_read(&data.started) != cpus) -+ cpu_relax(); -+ -+ if (wait) -+ while (atomic_read(&data.finished) != cpus) -+ cpu_relax(); -+} -+ -+ -+/** -+ * smp_call_function_mask(): Run a function on a set of other CPUs. -+ * @mask: The set of cpus to run on. Must not include the current cpu. -+ * @func: The function to run. This must be fast and non-blocking. -+ * @info: An arbitrary pointer to pass to the function. -+ * @wait: If true, wait (atomically) until function has completed on other CPUs. -+ * -+ * Returns 0 on success, else a negative status code. -+ * -+ * If @wait is true, then returns once @func has returned; otherwise -+ * it returns just before the target cpu calls @func. -+ * -+ * You must not call this function with disabled interrupts or from a -+ * hardware interrupt handler or from a bottom half handler. -+ */ -+int -+xen_smp_call_function_mask(cpumask_t mask, -+ void (*func)(void *), void *info, -+ int wait) -+{ -+ struct call_data_struct data; -+ cpumask_t allbutself; -+ int cpus; -+ -+ /* Can deadlock when called with interrupts disabled */ -+ WARN_ON(irqs_disabled()); -+ -+ /* Holding any lock stops cpus from going down. */ -+ spin_lock(&call_lock); -+ -+ allbutself = cpu_online_map; -+ cpu_clear(smp_processor_id(), allbutself); -+ -+ cpus_and(mask, mask, allbutself); -+ cpus = cpus_weight(mask); -+ -+ if (!cpus) { -+ spin_unlock(&call_lock); -+ return 0; -+ } -+ -+ data.func = func; -+ data.info = info; -+ atomic_set(&data.started, 0); -+ data.wait = wait; -+ if (wait) -+ atomic_set(&data.finished, 0); -+ -+ call_data = &data; -+ wmb(); -+ -+ /* Send a message to other CPUs */ -+ if (cpus_equal(mask, allbutself) && -+ cpus_equal(cpu_online_map, cpu_callout_map)) -+ send_IPI_allbutself(CALL_FUNCTION_VECTOR); -+ else -+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR); -+ -+ /* Wait for response */ -+ while (atomic_read(&data.started) != cpus) -+ cpu_relax(); -+ -+ if (wait) -+ while (atomic_read(&data.finished) != cpus) -+ cpu_relax(); -+ spin_unlock(&call_lock); -+ -+ return 0; -+} -+ -+static void stop_this_cpu(void *dummy) -+{ -+ local_irq_disable(); -+ /* -+ * Remove this CPU: -+ */ -+ cpu_clear(smp_processor_id(), cpu_online_map); -+ disable_all_local_evtchn(); -+ if (hlt_works(smp_processor_id())) -+ for (;;) halt(); -+ for (;;); -+} -+ -+/* -+ * this function calls the 'stop' function on all other CPUs in the system. -+ */ -+ -+void xen_smp_send_stop(void) -+{ -+ int nolock; -+ unsigned long flags; -+ -+ /* Don't deadlock on the call lock in panic */ -+ nolock = !spin_trylock(&call_lock); -+ local_irq_save(flags); -+ __smp_call_function(stop_this_cpu, NULL, 0, 0); -+ if (!nolock) -+ spin_unlock(&call_lock); -+ disable_all_local_evtchn(); -+ local_irq_restore(flags); -+} -+ -+/* -+ * Reschedule call back. Nothing to do, -+ * all the work is done automatically when -+ * we return from the interrupt. -+ */ -+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) -+{ -+#ifdef CONFIG_X86_32 -+ __get_cpu_var(irq_stat).irq_resched_count++; -+#else -+ add_pda(irq_resched_count, 1); -+#endif -+ return IRQ_HANDLED; -+} -+ -+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) -+{ -+ void (*func) (void *info) = call_data->func; -+ void *info = call_data->info; -+ int wait = call_data->wait; -+ -+ /* -+ * Notify initiating CPU that I've grabbed the data and am -+ * about to execute the function -+ */ -+ mb(); -+ atomic_inc(&call_data->started); -+ /* -+ * At this point the info structure may be out of scope unless wait==1 -+ */ -+ (*func)(info); -+#ifdef CONFIG_X86_32 -+ __get_cpu_var(irq_stat).irq_call_count++; -+#else -+ add_pda(irq_call_count, 1); -+#endif -+ -+ if (wait) { -+ mb(); -+ atomic_inc(&call_data->finished); -+ } -+ -+ return IRQ_HANDLED; -+} ---- head.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 18:01:51.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 -@@ -1,647 +0,0 @@ --/* -- * Intel SMP support routines. -- * -- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> -- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> -- * -- * This code is released under the GNU General Public License version 2 or -- * later. -- */ -- --#include <linux/init.h> -- --#include <linux/mm.h> --#include <linux/delay.h> --#include <linux/spinlock.h> --#include <linux/kernel_stat.h> --#include <linux/mc146818rtc.h> --#include <linux/cache.h> --#include <linux/interrupt.h> --#include <linux/cpu.h> --#include <linux/module.h> -- --#include <asm/mtrr.h> --#include <asm/tlbflush.h> --#include <asm/mmu_context.h> --#if 0 --#include <mach_apic.h> --#endif --#include <xen/evtchn.h> -- --/* -- * Some notes on x86 processor bugs affecting SMP operation: -- * -- * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. -- * The Linux implications for SMP are handled as follows: -- * -- * Pentium III / [Xeon] -- * None of the E1AP-E3AP errata are visible to the user. -- * -- * E1AP. see PII A1AP -- * E2AP. see PII A2AP -- * E3AP. see PII A3AP -- * -- * Pentium II / [Xeon] -- * None of the A1AP-A3AP errata are visible to the user. -- * -- * A1AP. see PPro 1AP -- * A2AP. see PPro 2AP -- * A3AP. see PPro 7AP -- * -- * Pentium Pro -- * None of 1AP-9AP errata are visible to the normal user, -- * except occasional delivery of 'spurious interrupt' as trap #15. -- * This is very rare and a non-problem. -- * -- * 1AP. Linux maps APIC as non-cacheable -- * 2AP. worked around in hardware -- * 3AP. fixed in C0 and above steppings microcode update. -- * Linux does not use excessive STARTUP_IPIs. -- * 4AP. worked around in hardware -- * 5AP. symmetric IO mode (normal Linux operation) not affected. -- * 'noapic' mode has vector 0xf filled out properly. -- * 6AP. 'noapic' mode might be affected - fixed in later steppings -- * 7AP. We do not assume writes to the LVT deassering IRQs -- * 8AP. We do not enable low power mode (deep sleep) during MP bootup -- * 9AP. We do not use mixed mode -- * -- * Pentium -- * There is a marginal case where REP MOVS on 100MHz SMP -- * machines with B stepping processors can fail. XXX should provide -- * an L1cache=Writethrough or L1cache=off option. -- * -- * B stepping CPUs may hang. There are hardware work arounds -- * for this. We warn about it in case your board doesn't have the work -- * arounds. Basically that's so I can tell anyone with a B stepping -- * CPU and SMP problems "tough". -- * -- * Specific items [From Pentium Processor Specification Update] -- * -- * 1AP. Linux doesn't use remote read -- * 2AP. Linux doesn't trust APIC errors -- * 3AP. We work around this -- * 4AP. Linux never generated 3 interrupts of the same priority -- * to cause a lost local interrupt. -- * 5AP. Remote read is never used -- * 6AP. not affected - worked around in hardware -- * 7AP. not affected - worked around in hardware -- * 8AP. worked around in hardware - we get explicit CS errors if not -- * 9AP. only 'noapic' mode affected. Might generate spurious -- * interrupts, we log only the first one and count the -- * rest silently. -- * 10AP. not affected - worked around in hardware -- * 11AP. Linux reads the APIC between writes to avoid this, as per -- * the documentation. Make sure you preserve this as it affects -- * the C stepping chips too. -- * 12AP. not affected - worked around in hardware -- * 13AP. not affected - worked around in hardware -- * 14AP. we always deassert INIT during bootup -- * 15AP. not affected - worked around in hardware -- * 16AP. not affected - worked around in hardware -- * 17AP. not affected - worked around in hardware -- * 18AP. not affected - worked around in hardware -- * 19AP. not affected - worked around in BIOS -- * -- * If this sounds worrying believe me these bugs are either ___RARE___, -- * or are signal timing bugs worked around in hardware and there's -- * about nothing of note with C stepping upwards. -- */ -- --DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; -- --/* -- * the following functions deal with sending IPIs between CPUs. -- * -- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. -- */ -- --static inline int __prepare_ICR (unsigned int shortcut, int vector) --{ -- unsigned int icr = shortcut | APIC_DEST_LOGICAL; -- -- switch (vector) { -- default: -- icr |= APIC_DM_FIXED | vector; -- break; -- case NMI_VECTOR: -- icr |= APIC_DM_NMI; -- break; -- } -- return icr; --} -- --static inline int __prepare_ICR2 (unsigned int mask) --{ -- return SET_APIC_DEST_FIELD(mask); --} -- --DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); -- --static inline void __send_IPI_one(unsigned int cpu, int vector) --{ -- int irq = per_cpu(ipi_to_irq, cpu)[vector]; -- BUG_ON(irq < 0); -- notify_remote_via_irq(irq); --} -- --void __send_IPI_shortcut(unsigned int shortcut, int vector) --{ -- int cpu; -- -- switch (shortcut) { -- case APIC_DEST_SELF: -- __send_IPI_one(smp_processor_id(), vector); -- break; -- case APIC_DEST_ALLBUT: -- for (cpu = 0; cpu < NR_CPUS; ++cpu) { -- if (cpu == smp_processor_id()) -- continue; -- if (cpu_isset(cpu, cpu_online_map)) { -- __send_IPI_one(cpu, vector); -- } -- } -- break; -- default: -- printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, -- vector); -- break; -- } --} -- --void send_IPI_self(int vector) --{ -- __send_IPI_shortcut(APIC_DEST_SELF, vector); --} -- --/* -- * This is only used on smaller machines. -- */ --void send_IPI_mask_bitmask(cpumask_t mask, int vector) --{ -- unsigned long flags; -- unsigned int cpu; -- -- local_irq_save(flags); -- WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]); -- -- for (cpu = 0; cpu < NR_CPUS; ++cpu) { -- if (cpu_isset(cpu, mask)) { -- __send_IPI_one(cpu, vector); -- } -- } -- -- local_irq_restore(flags); --} -- --void send_IPI_mask_sequence(cpumask_t mask, int vector) --{ -- -- send_IPI_mask_bitmask(mask, vector); --} -- --#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */ -- --#if 0 /* XEN */ --/* -- * Smarter SMP flushing macros. -- * c/o Linus Torvalds. -- * -- * These mean you can really definitely utterly forget about -- * writing to user space from interrupts. (Its not allowed anyway). -- * -- * Optimizations Manfred Spraul <manfred@colorfullife.com> -- */ -- --static cpumask_t flush_cpumask; --static struct mm_struct * flush_mm; --static unsigned long flush_va; --static DEFINE_SPINLOCK(tlbstate_lock); -- --/* -- * We cannot call mmdrop() because we are in interrupt context, -- * instead update mm->cpu_vm_mask. -- * -- * We need to reload %cr3 since the page tables may be going -- * away from under us.. -- */ --void leave_mm(int cpu) --{ -- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) -- BUG(); -- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); -- load_cr3(swapper_pg_dir); --} --EXPORT_SYMBOL_GPL(leave_mm); -- --/* -- * -- * The flush IPI assumes that a thread switch happens in this order: -- * [cpu0: the cpu that switches] -- * 1) switch_mm() either 1a) or 1b) -- * 1a) thread switch to a different mm -- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); -- * Stop ipi delivery for the old mm. This is not synchronized with -- * the other cpus, but smp_invalidate_interrupt ignore flush ipis -- * for the wrong mm, and in the worst case we perform a superfluous -- * tlb flush. -- * 1a2) set cpu_tlbstate to TLBSTATE_OK -- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 -- * was in lazy tlb mode. -- * 1a3) update cpu_tlbstate[].active_mm -- * Now cpu0 accepts tlb flushes for the new mm. -- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); -- * Now the other cpus will send tlb flush ipis. -- * 1a4) change cr3. -- * 1b) thread switch without mm change -- * cpu_tlbstate[].active_mm is correct, cpu0 already handles -- * flush ipis. -- * 1b1) set cpu_tlbstate to TLBSTATE_OK -- * 1b2) test_and_set the cpu bit in cpu_vm_mask. -- * Atomically set the bit [other cpus will start sending flush ipis], -- * and test the bit. -- * 1b3) if the bit was 0: leave_mm was called, flush the tlb. -- * 2) switch %%esp, ie current -- * -- * The interrupt must handle 2 special cases: -- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. -- * - the cpu performs speculative tlb reads, i.e. even if the cpu only -- * runs in kernel space, the cpu could load tlb entries for user space -- * pages. -- * -- * The good news is that cpu_tlbstate is local to each cpu, no -- * write/read ordering problems. -- */ -- --/* -- * TLB flush IPI: -- * -- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. -- * 2) Leave the mm if we are in the lazy tlb mode. -- */ -- --irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id) --{ -- unsigned long cpu; -- -- cpu = get_cpu(); -- -- if (!cpu_isset(cpu, flush_cpumask)) -- goto out; -- /* -- * This was a BUG() but until someone can quote me the -- * line from the intel manual that guarantees an IPI to -- * multiple CPUs is retried _only_ on the erroring CPUs -- * its staying as a return -- * -- * BUG(); -- */ -- -- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { -- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { -- if (flush_va == TLB_FLUSH_ALL) -- local_flush_tlb(); -- else -- __flush_tlb_one(flush_va); -- } else -- leave_mm(cpu); -- } -- smp_mb__before_clear_bit(); -- cpu_clear(cpu, flush_cpumask); -- smp_mb__after_clear_bit(); --out: -- put_cpu_no_resched(); -- __get_cpu_var(irq_stat).irq_tlb_count++; -- -- return IRQ_HANDLED; --} -- --void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, -- unsigned long va) --{ -- cpumask_t cpumask = *cpumaskp; -- -- /* -- * A couple of (to be removed) sanity checks: -- * -- * - current CPU must not be in mask -- * - mask must exist :) -- */ -- BUG_ON(cpus_empty(cpumask)); -- BUG_ON(cpu_isset(smp_processor_id(), cpumask)); -- BUG_ON(!mm); -- --#ifdef CONFIG_HOTPLUG_CPU -- /* If a CPU which we ran on has gone down, OK. */ -- cpus_and(cpumask, cpumask, cpu_online_map); -- if (unlikely(cpus_empty(cpumask))) -- return; --#endif -- -- /* -- * i'm not happy about this global shared spinlock in the -- * MM hot path, but we'll see how contended it is. -- * AK: x86-64 has a faster method that could be ported. -- */ -- spin_lock(&tlbstate_lock); -- -- flush_mm = mm; -- flush_va = va; -- cpus_or(flush_cpumask, cpumask, flush_cpumask); -- /* -- * We have to send the IPI only to -- * CPUs affected. -- */ -- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); -- -- while (!cpus_empty(flush_cpumask)) -- /* nothing. lockup detection does not belong here */ -- cpu_relax(); -- -- flush_mm = NULL; -- flush_va = 0; -- spin_unlock(&tlbstate_lock); --} -- --void flush_tlb_current_task(void) --{ -- struct mm_struct *mm = current->mm; -- cpumask_t cpu_mask; -- -- preempt_disable(); -- cpu_mask = mm->cpu_vm_mask; -- cpu_clear(smp_processor_id(), cpu_mask); -- -- local_flush_tlb(); -- if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); -- preempt_enable(); --} -- --void flush_tlb_mm (struct mm_struct * mm) --{ -- cpumask_t cpu_mask; -- -- preempt_disable(); -- cpu_mask = mm->cpu_vm_mask; -- cpu_clear(smp_processor_id(), cpu_mask); -- -- if (current->active_mm == mm) { -- if (current->mm) -- local_flush_tlb(); -- else -- leave_mm(smp_processor_id()); -- } -- if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); -- -- preempt_enable(); --} -- --void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) --{ -- struct mm_struct *mm = vma->vm_mm; -- cpumask_t cpu_mask; -- -- preempt_disable(); -- cpu_mask = mm->cpu_vm_mask; -- cpu_clear(smp_processor_id(), cpu_mask); -- -- if (current->active_mm == mm) { -- if(current->mm) -- __flush_tlb_one(va); -- else -- leave_mm(smp_processor_id()); -- } -- -- if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, va); -- -- preempt_enable(); --} --EXPORT_SYMBOL(flush_tlb_page); -- --static void do_flush_tlb_all(void* info) --{ -- unsigned long cpu = smp_processor_id(); -- -- __flush_tlb_all(); -- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) -- leave_mm(cpu); --} -- --void flush_tlb_all(void) --{ -- on_each_cpu(do_flush_tlb_all, NULL, 1, 1); --} -- --#endif /* XEN */ -- --/* -- * this function sends a 'reschedule' IPI to another CPU. -- * it goes straight through and wastes no time serializing -- * anything. Worst case is that we lose a reschedule ... -- */ --void xen_smp_send_reschedule(int cpu) --{ -- WARN_ON(cpu_is_offline(cpu)); -- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); --} -- --/* -- * Structure and data for smp_call_function(). This is designed to minimise -- * static memory requirements. It also looks cleaner. -- */ --static DEFINE_SPINLOCK(call_lock); -- --struct call_data_struct { -- void (*func) (void *info); -- void *info; -- atomic_t started; -- atomic_t finished; -- int wait; --}; -- --void lock_ipi_call_lock(void) --{ -- spin_lock_irq(&call_lock); --} -- --void unlock_ipi_call_lock(void) --{ -- spin_unlock_irq(&call_lock); --} -- --static struct call_data_struct *call_data; -- --static void __smp_call_function(void (*func) (void *info), void *info, -- int nonatomic, int wait) --{ -- struct call_data_struct data; -- int cpus = num_online_cpus() - 1; -- -- if (!cpus) -- return; -- -- data.func = func; -- data.info = info; -- atomic_set(&data.started, 0); -- data.wait = wait; -- if (wait) -- atomic_set(&data.finished, 0); -- -- call_data = &data; -- mb(); -- -- /* Send a message to all other CPUs and wait for them to respond */ -- send_IPI_allbutself(CALL_FUNCTION_VECTOR); -- -- /* Wait for response */ -- while (atomic_read(&data.started) != cpus) -- cpu_relax(); -- -- if (wait) -- while (atomic_read(&data.finished) != cpus) -- cpu_relax(); --} -- -- --/** -- * smp_call_function_mask(): Run a function on a set of other CPUs. -- * @mask: The set of cpus to run on. Must not include the current cpu. -- * @func: The function to run. This must be fast and non-blocking. -- * @info: An arbitrary pointer to pass to the function. -- * @wait: If true, wait (atomically) until function has completed on other CPUs. -- * -- * Returns 0 on success, else a negative status code. -- * -- * If @wait is true, then returns once @func has returned; otherwise -- * it returns just before the target cpu calls @func. -- * -- * You must not call this function with disabled interrupts or from a -- * hardware interrupt handler or from a bottom half handler. -- */ --int --xen_smp_call_function_mask(cpumask_t mask, -- void (*func)(void *), void *info, -- int wait) --{ -- struct call_data_struct data; -- cpumask_t allbutself; -- int cpus; -- -- /* Can deadlock when called with interrupts disabled */ -- WARN_ON(irqs_disabled()); -- -- /* Holding any lock stops cpus from going down. */ -- spin_lock(&call_lock); -- -- allbutself = cpu_online_map; -- cpu_clear(smp_processor_id(), allbutself); -- -- cpus_and(mask, mask, allbutself); -- cpus = cpus_weight(mask); -- -- if (!cpus) { -- spin_unlock(&call_lock); -- return 0; -- } -- -- data.func = func; -- data.info = info; -- atomic_set(&data.started, 0); -- data.wait = wait; -- if (wait) -- atomic_set(&data.finished, 0); -- -- call_data = &data; -- mb(); -- -- /* Send a message to other CPUs */ -- if (cpus_equal(mask, allbutself)) -- send_IPI_allbutself(CALL_FUNCTION_VECTOR); -- else -- send_IPI_mask(mask, CALL_FUNCTION_VECTOR); -- -- /* Wait for response */ -- while (atomic_read(&data.started) != cpus) -- cpu_relax(); -- -- if (wait) -- while (atomic_read(&data.finished) != cpus) -- cpu_relax(); -- spin_unlock(&call_lock); -- -- return 0; --} -- --static void stop_this_cpu (void * dummy) --{ -- local_irq_disable(); -- /* -- * Remove this CPU: -- */ -- cpu_clear(smp_processor_id(), cpu_online_map); -- disable_all_local_evtchn(); -- if (cpu_data(smp_processor_id()).hlt_works_ok) -- for(;;) halt(); -- for (;;); --} -- --/* -- * this function calls the 'stop' function on all other CPUs in the system. -- */ -- --void xen_smp_send_stop(void) --{ -- /* Don't deadlock on the call lock in panic */ -- int nolock = !spin_trylock(&call_lock); -- unsigned long flags; -- -- local_irq_save(flags); -- __smp_call_function(stop_this_cpu, NULL, 0, 0); -- if (!nolock) -- spin_unlock(&call_lock); -- disable_all_local_evtchn(); -- local_irq_restore(flags); --} -- --/* -- * Reschedule call back. Nothing to do, -- * all the work is done automatically when -- * we return from the interrupt. -- */ --irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) --{ -- __get_cpu_var(irq_stat).irq_resched_count++; -- -- return IRQ_HANDLED; --} -- --#include <linux/kallsyms.h> --irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) --{ -- void (*func) (void *info) = call_data->func; -- void *info = call_data->info; -- int wait = call_data->wait; -- -- /* -- * Notify initiating CPU that I've grabbed the data and am -- * about to execute the function -- */ -- mb(); -- atomic_inc(&call_data->started); -- /* -- * At this point the info structure may be out of scope unless wait==1 -- */ -- irq_enter(); -- (*func)(info); -- __get_cpu_var(irq_stat).irq_call_count++; -- irq_exit(); -- -- if (wait) { -- mb(); -- atomic_inc(&call_data->finished); -- } -- -- return IRQ_HANDLED; --} ---- head.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 18:01:51.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 -@@ -1,554 +0,0 @@ --/* -- * Intel SMP support routines. -- * -- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> -- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> -- * (c) 2002,2003 Andi Kleen, SuSE Labs. -- * -- * This code is released under the GNU General Public License version 2 or -- * later. -- */ -- --#include <linux/init.h> -- --#include <linux/mm.h> --#include <linux/delay.h> --#include <linux/spinlock.h> --#include <linux/smp.h> --#include <linux/kernel_stat.h> --#include <linux/mc146818rtc.h> --#include <linux/interrupt.h> -- --#include <asm/mtrr.h> + + #include <asm/mtrr.h> -#include <asm/pgalloc.h> --#include <asm/tlbflush.h> + #include <asm/tlbflush.h> -#include <asm/mach_apic.h> --#include <asm/mmu_context.h> --#include <asm/proto.h> + #include <asm/mmu_context.h> + #include <asm/proto.h> -#include <asm/apicdef.h> -#include <asm/idle.h> -#ifdef CONFIG_XEN --#include <xen/evtchn.h> ++#include <mach_ipi.h> + #include <xen/evtchn.h> -#endif - -#ifndef CONFIG_XEN @@ -7022,8 +6041,9 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches -} -EXPORT_SYMBOL_GPL(leave_mm); - --/* -- * + /* ++ * Some notes on x86 processor bugs affecting SMP operation: + * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) @@ -7056,20 +6076,92 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. -- * ++ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. ++ * The Linux implications for SMP are handled as follows: + * - * The good news is that cpu mmu_state is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: -- * ++ * Pentium III / [Xeon] ++ * None of the E1AP-E3AP errata are visible to the user. + * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. -- */ -- ++ * E1AP. see PII A1AP ++ * E2AP. see PII A2AP ++ * E3AP. see PII A3AP ++ * ++ * Pentium II / [Xeon] ++ * None of the A1AP-A3AP errata are visible to the user. ++ * ++ * A1AP. see PPro 1AP ++ * A2AP. see PPro 2AP ++ * A3AP. see PPro 7AP ++ * ++ * Pentium Pro ++ * None of 1AP-9AP errata are visible to the normal user, ++ * except occasional delivery of 'spurious interrupt' as trap #15. ++ * This is very rare and a non-problem. ++ * ++ * 1AP. Linux maps APIC as non-cacheable ++ * 2AP. worked around in hardware ++ * 3AP. fixed in C0 and above steppings microcode update. ++ * Linux does not use excessive STARTUP_IPIs. ++ * 4AP. worked around in hardware ++ * 5AP. symmetric IO mode (normal Linux operation) not affected. ++ * 'noapic' mode has vector 0xf filled out properly. ++ * 6AP. 'noapic' mode might be affected - fixed in later steppings ++ * 7AP. We do not assume writes to the LVT deassering IRQs ++ * 8AP. We do not enable low power mode (deep sleep) during MP bootup ++ * 9AP. We do not use mixed mode ++ * ++ * Pentium ++ * There is a marginal case where REP MOVS on 100MHz SMP ++ * machines with B stepping processors can fail. XXX should provide ++ * an L1cache=Writethrough or L1cache=off option. ++ * ++ * B stepping CPUs may hang. There are hardware work arounds ++ * for this. We warn about it in case your board doesn't have the work ++ * arounds. Basically that's so I can tell anyone with a B stepping ++ * CPU and SMP problems "tough". ++ * ++ * Specific items [From Pentium Processor Specification Update] ++ * ++ * 1AP. Linux doesn't use remote read ++ * 2AP. Linux doesn't trust APIC errors ++ * 3AP. We work around this ++ * 4AP. Linux never generated 3 interrupts of the same priority ++ * to cause a lost local interrupt. ++ * 5AP. Remote read is never used ++ * 6AP. not affected - worked around in hardware ++ * 7AP. not affected - worked around in hardware ++ * 8AP. worked around in hardware - we get explicit CS errors if not ++ * 9AP. only 'noapic' mode affected. Might generate spurious ++ * interrupts, we log only the first one and count the ++ * rest silently. ++ * 10AP. not affected - worked around in hardware ++ * 11AP. Linux reads the APIC between writes to avoid this, as per ++ * the documentation. Make sure you preserve this as it affects ++ * the C stepping chips too. ++ * 12AP. not affected - worked around in hardware ++ * 13AP. not affected - worked around in hardware ++ * 14AP. we always deassert INIT during bootup ++ * 15AP. not affected - worked around in hardware ++ * 16AP. not affected - worked around in hardware ++ * 17AP. not affected - worked around in hardware ++ * 18AP. not affected - worked around in hardware ++ * 19AP. not affected - worked around in BIOS ++ * ++ * If this sounds worrying believe me these bugs are either ___RARE___, ++ * or are signal timing bugs worked around in hardware and there's ++ * about nothing of note with C stepping upwards. + */ + -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) -{ - int cpu; @@ -7229,43 +6321,35 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches -} -#endif /* Xen */ - --/* -- * this function sends a 'reschedule' IPI to another CPU. -- * it goes straight through and wastes no time serializing -- * anything. Worst case is that we lose a reschedule ... -- */ + /* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ - -void smp_send_reschedule(int cpu) --{ -- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); --} -- --/* -- * Structure and data for smp_call_function(). This is designed to minimise -- * static memory requirements. It also looks cleaner. -- */ --static DEFINE_SPINLOCK(call_lock); -- --struct call_data_struct { -- void (*func) (void *info); -- void *info; -- atomic_t started; -- atomic_t finished; -- int wait; --}; -- ++void xen_smp_send_reschedule(int cpu) + { ++ if (unlikely(cpu_is_offline(cpu))) { ++ WARN_ON(1); ++ return; ++ } + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); + } + +@@ -314,8 +135,6 @@ struct call_data_struct { + int wait; + }; + -static struct call_data_struct * call_data; - --void lock_ipi_call_lock(void) --{ -- spin_lock_irq(&call_lock); --} -- --void unlock_ipi_call_lock(void) --{ -- spin_unlock_irq(&call_lock); --} -- + void lock_ipi_call_lock(void) + { + spin_lock_irq(&call_lock); +@@ -326,26 +145,16 @@ void unlock_ipi_call_lock(void) + spin_unlock_irq(&call_lock); + } + -/* - * this function sends a 'generic call function' IPI to all other CPU - * of the system defined in the mask. @@ -7273,8 +6357,12 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches -static int __smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) --{ -- struct call_data_struct data; ++static struct call_data_struct *call_data; ++ ++static void __smp_call_function(void (*func) (void *info), void *info, ++ int nonatomic, int wait) + { + struct call_data_struct data; - cpumask_t allbutself; - int cpus; - @@ -7283,69 +6371,85 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); -- -- if (!cpus) ++ int cpus = num_online_cpus() - 1; + + if (!cpus) - return 0; -- -- data.func = func; -- data.info = info; -- atomic_set(&data.started, 0); -- data.wait = wait; -- if (wait) -- atomic_set(&data.finished, 0); -- -- call_data = &data; ++ return; + + data.func = func; + data.info = info; +@@ -355,26 +164,21 @@ static int __smp_call_function_mask(cpum + atomic_set(&data.finished, 0); + + call_data = &data; - wmb(); -- ++ mb(); + - /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); -- -- /* Wait for response */ -- while (atomic_read(&data.started) != cpus) -- cpu_relax(); -- ++ /* Send a message to all other CPUs and wait for them to respond */ ++ send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + - if (!wait) - return 0; -- ++ if (wait) ++ while (atomic_read(&data.finished) != cpus) ++ cpu_relax(); ++} + - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -- + - return 0; -} --/** -- * smp_call_function_mask(): Run a function on a set of other CPUs. -- * @mask: The set of cpus to run on. Must not include the current cpu. -- * @func: The function to run. This must be fast and non-blocking. -- * @info: An arbitrary pointer to pass to the function. -- * @wait: If true, wait (atomically) until function has completed on other CPUs. -- * + /** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. +@@ -382,7 +186,7 @@ static int __smp_call_function_mask(cpum + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * - * Returns 0 on success, else a negative status code. -- * -- * If @wait is true, then returns once @func has returned; otherwise -- * it returns just before the target cpu calls @func. -- * -- * You must not call this function with disabled interrupts or from a -- * hardware interrupt handler or from a bottom half handler. -- */ ++ * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. +@@ -390,80 +194,60 @@ static int __smp_call_function_mask(cpum + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ -int smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) --{ ++int ++xen_smp_call_function_mask(cpumask_t mask, ++ void (*func)(void *), void *info, ++ int wait) + { - int ret; -- -- /* Can deadlock when called with interrupts disabled */ -- WARN_ON(irqs_disabled()); -- -- spin_lock(&call_lock); ++ struct call_data_struct data; ++ cpumask_t allbutself; ++ int cpus; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + ++ /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); - ret = __smp_call_function_mask(mask, func, info, wait); - spin_unlock(&call_lock); - return ret; -} -EXPORT_SYMBOL(smp_call_function_mask); -- + -/* - * smp_call_function_single - Run a function on a specific CPU - * @func: The function to run. This must be fast and non-blocking. @@ -7364,25 +6468,39 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches -{ - /* prevent preemption and reschedule on another processor */ - int ret, me = get_cpu(); -- ++ allbutself = cpu_online_map; ++ cpu_clear(smp_processor_id(), allbutself); + - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); -- ++ cpus_and(mask, mask, allbutself); ++ cpus = cpus_weight(mask); + - if (cpu == me) { - local_irq_disable(); - func(info); - local_irq_enable(); - put_cpu(); -- return 0; -- } -- ++ if (!cpus) { ++ spin_unlock(&call_lock); + return 0; + } + - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); -- ++ data.func = func; ++ data.info = info; ++ atomic_set(&data.started, 0); ++ data.wait = wait; ++ if (wait) ++ atomic_set(&data.finished, 0); + - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); -- ++ call_data = &data; ++ wmb(); + -/* - * smp_call_function - run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. @@ -7402,96 +6520,126 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); --} ++ /* Send a message to other CPUs */ ++ if (cpus_equal(mask, allbutself) && ++ cpus_equal(cpu_online_map, cpu_callout_map)) ++ send_IPI_allbutself(CALL_FUNCTION_VECTOR); ++ else ++ send_IPI_mask(mask, CALL_FUNCTION_VECTOR); ++ ++ /* Wait for response */ ++ while (atomic_read(&data.started) != cpus) ++ cpu_relax(); ++ ++ if (wait) ++ while (atomic_read(&data.finished) != cpus) ++ cpu_relax(); ++ spin_unlock(&call_lock); ++ ++ return 0; + } -EXPORT_SYMBOL(smp_call_function); -- --static void stop_this_cpu(void *dummy) --{ -- local_irq_disable(); -- /* -- * Remove this CPU: -- */ -- cpu_clear(smp_processor_id(), cpu_online_map); -- disable_all_local_evtchn(); + + static void stop_this_cpu(void *dummy) + { +@@ -473,24 +257,24 @@ static void stop_this_cpu(void *dummy) + */ + cpu_clear(smp_processor_id(), cpu_online_map); + disable_all_local_evtchn(); - for (;;) - halt(); --} -- ++ if (hlt_works(smp_processor_id())) ++ for (;;) halt(); ++ for (;;); + } + -void smp_send_stop(void) --{ -- int nolock; -- unsigned long flags; -- ++/* ++ * this function calls the 'stop' function on all other CPUs in the system. ++ */ ++ ++void xen_smp_send_stop(void) + { + int nolock; + unsigned long flags; + -#ifndef CONFIG_XEN - if (reboot_force) - return; -#endif - -- /* Don't deadlock on the call lock in panic */ -- nolock = !spin_trylock(&call_lock); -- local_irq_save(flags); + /* Don't deadlock on the call lock in panic */ + nolock = !spin_trylock(&call_lock); + local_irq_save(flags); - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); -- if (!nolock) -- spin_unlock(&call_lock); -- disable_all_local_evtchn(); -- local_irq_restore(flags); --} -- --/* -- * Reschedule call back. Nothing to do, -- * all the work is done automatically when -- * we return from the interrupt. -- */ ++ __smp_call_function(stop_this_cpu, NULL, 0, 0); + if (!nolock) + spin_unlock(&call_lock); + disable_all_local_evtchn(); +@@ -502,34 +286,22 @@ void smp_send_stop(void) + * all the work is done automatically when + * we return from the interrupt. + */ -#ifndef CONFIG_XEN -asmlinkage void smp_reschedule_interrupt(void) -#else -asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx) -#endif --{ ++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) + { -#ifndef CONFIG_XEN - ack_APIC_irq(); -#endif -- add_pda(irq_resched_count, 1); ++#ifdef CONFIG_X86_32 ++ __get_cpu_var(irq_stat).irq_resched_count++; ++#else + add_pda(irq_resched_count, 1); -#ifdef CONFIG_XEN - return IRQ_HANDLED; --#endif --} -- + #endif ++ return IRQ_HANDLED; + } + -#ifndef CONFIG_XEN -asmlinkage void smp_call_function_interrupt(void) -#else -asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx) -#endif --{ -- void (*func) (void *info) = call_data->func; -- void *info = call_data->info; -- int wait = call_data->wait; -- ++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) + { + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + -#ifndef CONFIG_XEN - ack_APIC_irq(); -#endif -- /* -- * Notify initiating CPU that I've grabbed the data and am -- * about to execute the function -- */ -- mb(); -- atomic_inc(&call_data->started); -- /* -- * At this point the info structure may be out of scope unless wait==1 -- */ + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function +@@ -539,16 +311,17 @@ asmlinkage irqreturn_t smp_call_function + /* + * At this point the info structure may be out of scope unless wait==1 + */ - exit_idle(); - irq_enter(); -- (*func)(info); -- add_pda(irq_call_count, 1); + (*func)(info); ++#ifdef CONFIG_X86_32 ++ __get_cpu_var(irq_stat).irq_call_count++; ++#else + add_pda(irq_call_count, 1); - irq_exit(); -- if (wait) { -- mb(); -- atomic_inc(&call_data->finished); -- } ++#endif ++ + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } -#ifdef CONFIG_XEN -- return IRQ_HANDLED; ++ + return IRQ_HANDLED; -#endif --} + } --- head.orig/arch/x86/kernel/time-xen.c 2012-02-10 13:27:37.000000000 +0100 +++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:27:59.000000000 +0100 @@ -709,8 +709,6 @@ static void init_missing_ticks_accountin @@ -12556,7 +11704,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches arch_fix_phys_package_id(pr->id, object.integer.value); return 0; ---- head.orig/drivers/firmware/Kconfig 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/firmware/Kconfig 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/firmware/Kconfig 2013-06-05 13:51:23.000000000 +0200 @@ -19,7 +19,7 @@ config EDD @@ -12620,7 +11768,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches EXPORT_SYMBOL(pci_disable_msix); /** ---- head.orig/drivers/video/Kconfig 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/video/Kconfig 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/video/Kconfig 2013-05-23 17:16:58.000000000 +0200 @@ -2262,7 +2262,7 @@ config FB_VIRTUAL @@ -13668,7 +12816,7 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches }; static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma) ---- head.orig/drivers/xen/features.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/xen/features.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/xen/features.c 2011-01-31 18:07:35.000000000 +0100 @@ -9,14 +9,21 @@ #include <linux/cache.h> diff --git a/patches.xen/xen3-patch-2.6.32 b/patches.xen/xen3-patch-2.6.32 index 8dfc6e0046..561098abd1 100644 --- a/patches.xen/xen3-patch-2.6.32 +++ b/patches.xen/xen3-patch-2.6.32 @@ -211,9 +211,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches - .quad sys_perf_counter_open + .quad sys_perf_event_open ia32_syscall_end: ---- head.orig/arch/x86/include/asm/irq.h 2013-06-20 14:56:42.000000000 +0200 -+++ head/arch/x86/include/asm/irq.h 2011-04-13 13:55:46.000000000 +0200 -@@ -17,7 +17,8 @@ struct ctl_table; +--- head.orig/arch/x86/include/asm/irq.h 2013-07-02 09:20:53.000000000 +0200 ++++ head/arch/x86/include/asm/irq.h 2013-07-02 09:40:47.000000000 +0200 +@@ -41,7 +41,8 @@ extern int vector_used_by_percpu_irq(uns extern void init_ISA_irqs(void); @@ -223,7 +223,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif ---- head.orig/arch/x86/include/asm/uv/uv_hub.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/include/asm/uv/uv_hub.h 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/include/asm/uv/uv_hub.h 2011-02-01 14:54:13.000000000 +0100 @@ -11,7 +11,7 @@ #ifndef _ASM_X86_UV_UV_HUB_H @@ -725,9 +725,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches endif disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o i8237.o i8253.o \ ---- head.orig/arch/x86/kernel/apic/hw_nmi.c 2013-06-20 14:56:42.000000000 +0200 -+++ head/arch/x86/kernel/apic/hw_nmi.c 2011-04-13 13:55:59.000000000 +0200 -@@ -26,6 +26,10 @@ u64 hw_nmi_get_sample_period(int watchdo +--- head.orig/arch/x86/kernel/apic/hw_nmi.c 2013-07-02 09:20:53.000000000 +0200 ++++ head/arch/x86/kernel/apic/hw_nmi.c 2013-07-02 09:41:16.000000000 +0200 +@@ -27,6 +27,10 @@ u64 hw_nmi_get_sample_period(int watchdo #endif #ifdef arch_trigger_all_cpu_backtrace @@ -738,7 +738,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -@@ -46,7 +50,11 @@ void arch_trigger_all_cpu_backtrace(void +@@ -47,7 +51,11 @@ void arch_trigger_all_cpu_backtrace(void cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); @@ -1775,7 +1775,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #endif /* Make sure %fs and %gs are initialized properly in idle threads */ ---- head.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/kernel/cpu/mcheck/mce-inject.c 2012-10-23 15:27:54.000000000 +0200 @@ -152,7 +152,7 @@ static void raise_mce(struct mce *m) if (context == MCJ_CTX_RANDOM) @@ -4030,7 +4030,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #endif /* CONFIG_X86_32 */ #ifdef CONFIG_XEN ---- head.orig/arch/x86/platform/sfi/sfi.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/platform/sfi/sfi.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/platform/sfi/sfi.c 2011-02-02 08:45:00.000000000 +0100 @@ -32,6 +32,7 @@ #include <asm/apic.h> @@ -5768,7 +5768,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches BUG(); } else { xen_l1_entry_update(ptep, entry); ---- head.orig/arch/x86/mm/physaddr.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/arch/x86/mm/physaddr.c 2013-07-02 09:20:53.000000000 +0200 +++ head/arch/x86/mm/physaddr.c 2013-03-21 15:12:19.000000000 +0100 @@ -9,6 +9,10 @@ @@ -6030,7 +6030,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches pr->cdev = thermal_cooling_device_register("Processor", device, &processor_cooling_ops); ---- head.orig/drivers/char/agp/agp.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/agp.h 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/agp.h 2011-02-01 14:54:13.000000000 +0100 @@ -31,6 +31,10 @@ @@ -6043,7 +6043,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #define PFX "agpgart: " //#define AGP_DEBUG 1 ---- head.orig/drivers/char/agp/amd-k7-agp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/amd-k7-agp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/amd-k7-agp.c 2011-02-17 10:18:42.000000000 +0100 @@ -142,7 +142,7 @@ static int amd_create_gatt_table(struct @@ -6063,7 +6063,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches page_dir.remapped+GET_PAGE_DIR_OFF(addr)); readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */ } ---- head.orig/drivers/char/agp/amd64-agp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/amd64-agp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/amd64-agp.c 2013-01-14 13:52:00.000000000 +0100 @@ -178,7 +178,7 @@ static const struct aper_size_info_32 am @@ -6083,7 +6083,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches amd64_aperture_sizes[bridge->aperture_size_idx].size); agp_remove_bridge(bridge); agp_put_bridge(bridge); ---- head.orig/drivers/char/agp/ati-agp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/ati-agp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/ati-agp.c 2011-02-01 14:54:13.000000000 +0100 @@ -361,7 +361,7 @@ static int ati_create_gatt_table(struct @@ -6103,7 +6103,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches page_dir.remapped+GET_PAGE_DIR_OFF(addr)); readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */ } ---- head.orig/drivers/char/agp/efficeon-agp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/efficeon-agp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/efficeon-agp.c 2011-02-01 14:54:13.000000000 +0100 @@ -227,7 +227,7 @@ static int efficeon_create_gatt_table(st @@ -6145,7 +6145,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #define USE_PCI_DMA_API 1 #else #define USE_PCI_DMA_API 0 ---- head.orig/drivers/char/agp/sworks-agp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/char/agp/sworks-agp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/char/agp/sworks-agp.c 2011-02-01 14:54:13.000000000 +0100 @@ -155,7 +155,7 @@ static int serverworks_create_gatt_table /* Create a fake scratch directory */ @@ -6185,7 +6185,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #endif #endif /* IOATDMA_H */ ---- head.orig/drivers/dma/ioat/dma_v2.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/dma/ioat/dma_v2.h 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/dma/ioat/dma_v2.h 2013-05-23 17:33:47.000000000 +0200 @@ -178,4 +178,10 @@ int ioat2_quiesce(struct ioat_chan_commo int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); @@ -6198,7 +6198,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches +#endif + #endif /* IOATDMA_V2_H */ ---- head.orig/drivers/dma/ioat/hw.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/dma/ioat/hw.h 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/dma/ioat/hw.h 2013-05-23 17:33:52.000000000 +0200 @@ -64,7 +64,11 @@ #define IOAT_VER_3_3 0x33 /* Version 3.3 */ @@ -6212,9 +6212,9 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches struct ioat_dma_descriptor { uint32_t size; ---- head.orig/drivers/gpu/drm/radeon/radeon_device.c 2013-06-20 14:56:42.000000000 +0200 -+++ head/drivers/gpu/drm/radeon/radeon_device.c 2013-06-04 13:57:29.000000000 +0200 -@@ -590,6 +590,18 @@ int radeon_dummy_page_init(struct radeon +--- head.orig/drivers/gpu/drm/radeon/radeon_device.c 2013-07-02 09:20:53.000000000 +0200 ++++ head/drivers/gpu/drm/radeon/radeon_device.c 2013-07-02 09:41:04.000000000 +0200 +@@ -585,6 +585,18 @@ int radeon_dummy_page_init(struct radeon rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); if (rdev->dummy_page.page == NULL) return -ENOMEM; @@ -6484,7 +6484,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches nr++; for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { ---- head.orig/drivers/sfi/sfi_core.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/sfi/sfi_core.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/sfi/sfi_core.c 2011-02-01 14:54:13.000000000 +0100 @@ -486,6 +486,11 @@ void __init sfi_init(void) if (!acpi_disabled) @@ -6788,7 +6788,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches if (err) { pr_err("Xen suspend can't run on CPU0 (%d)\n", err); goto fail; ---- head.orig/drivers/xen/dbgp.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/drivers/xen/dbgp.c 2013-07-02 09:20:53.000000000 +0200 +++ head/drivers/xen/dbgp.c 2012-05-02 15:15:24.000000000 +0200 @@ -2,7 +2,11 @@ #include <linux/usb.h> @@ -7048,7 +7048,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches #include <linux/proc_fs.h> #include <linux/notifier.h> #include <linux/mutex.h> ---- head.orig/fs/proc/kcore.c 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/fs/proc/kcore.c 2013-07-02 09:20:53.000000000 +0200 +++ head/fs/proc/kcore.c 2013-05-23 17:32:56.000000000 +0200 @@ -134,7 +134,7 @@ static void __kcore_update_ram(struct li } @@ -7071,7 +7071,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches ent->type = KCORE_RAM; list_add(&ent->list, &head); __kcore_update_ram(&head); ---- head.orig/include/linux/nmi.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/include/linux/nmi.h 2013-07-02 09:20:53.000000000 +0200 +++ head/include/linux/nmi.h 2011-02-16 09:06:03.000000000 +0100 @@ -18,6 +18,9 @@ #include <asm/nmi.h> @@ -7083,7 +7083,7 @@ Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); ---- head.orig/include/linux/usb/ehci_def.h 2013-06-20 14:56:42.000000000 +0200 +--- head.orig/include/linux/usb/ehci_def.h 2013-07-02 09:20:53.000000000 +0200 +++ head/include/linux/usb/ehci_def.h 2012-10-23 15:27:11.000000000 +0200 @@ -223,7 +223,7 @@ extern struct console early_dbgp_console diff --git a/patches.xen/xen3-patch-3.10-rc6 b/patches.xen/xen3-patch-3.10 index 90b2ca8cf9..6987b887ee 100644 --- a/patches.xen/xen3-patch-3.10-rc6 +++ b/patches.xen/xen3-patch-3.10 @@ -1,10 +1,10 @@ From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org> -Subject: Linux: 3.10-rc6 -Patch-mainline: 3.10-rc6 +Subject: Linux: 3.10 +Patch-mainline: 3.10 - This patch contains the differences between 3.9 and 3.10-rc6. + This patch contains the differences between 3.9 and 3.10. -Automatically created from "patch-3.10-rc4" by xen-port-patches.py +Automatically created from "patch-3.10" by xen-port-patches.py Acked-by: jbeulich@suse.com --- head.orig/arch/x86/include/asm/hypervisor.h 2013-05-23 17:21:57.000000000 +0200 @@ -258,7 +258,7 @@ Acked-by: jbeulich@suse.com #endif --- head.orig/arch/x86/kernel/process-xen.c 2013-03-25 09:13:57.000000000 +0100 -+++ head/arch/x86/kernel/process-xen.c 2013-05-27 17:24:56.000000000 +0200 ++++ head/arch/x86/kernel/process-xen.c 2013-07-02 10:03:05.000000000 +0200 @@ -125,30 +125,6 @@ void exit_thread(void) drop_fpu(me); } @@ -300,7 +300,7 @@ Acked-by: jbeulich@suse.com #ifndef CONFIG_SMP static inline void play_dead(void) -@@ -290,13 +268,7 @@ void exit_idle(void) +@@ -290,87 +268,40 @@ void exit_idle(void) } #endif @@ -311,14 +311,16 @@ Acked-by: jbeulich@suse.com - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -+void arch_cpu_idle_prepare(void) ++void arch_cpu_idle_enter(void) { - /* - * If we're the non-boot CPU, nothing set the stack canary up -@@ -306,71 +278,42 @@ void cpu_idle(void) - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); +- /* +- * If we're the non-boot CPU, nothing set the stack canary up +- * for us. CPU0 already has it initialized but no harm in +- * doing it again. This is a good place for updating it, as +- * we wont ever return from this function (so the invalid +- * canaries already on the stack wont ever trigger). +- */ +- boot_init_stack_canary(); - current_thread_info()->status |= TS_POLLING; - - while (1) { @@ -345,12 +347,9 @@ Acked-by: jbeulich@suse.com - - /* enter_idle() needs rcu for notifiers */ - rcu_idle_enter(); -+} - +- - if (cpuidle_idle_call()) - xen_idle(); -+void arch_cpu_idle_enter(void) -+{ + local_touch_nmi(); + enter_idle(); +} @@ -412,7 +411,7 @@ Acked-by: jbeulich@suse.com trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_APM_MODULE -@@ -400,20 +343,6 @@ void stop_this_cpu(void *dummy) +@@ -400,20 +331,6 @@ void stop_this_cpu(void *dummy) halt(); } @@ -433,7 +432,7 @@ Acked-by: jbeulich@suse.com #ifndef CONFIG_XEN bool amd_e400_c1e_detected; EXPORT_SYMBOL(amd_e400_c1e_detected); -@@ -433,9 +362,6 @@ void amd_e400_remove_cpu(int cpu) +@@ -433,9 +350,6 @@ void amd_e400_remove_cpu(int cpu) */ static void amd_e400_idle(void) { @@ -443,7 +442,7 @@ Acked-by: jbeulich@suse.com if (!amd_e400_c1e_detected) { u32 lo, hi; -@@ -481,13 +407,13 @@ void __cpuinit select_idle_routine(const +@@ -481,13 +395,13 @@ void __cpuinit select_idle_routine(const { #ifndef CONFIG_XEN #ifdef CONFIG_SMP @@ -460,7 +459,7 @@ Acked-by: jbeulich@suse.com /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; -@@ -512,8 +438,8 @@ static int __init idle_setup(char *str) +@@ -512,8 +426,8 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { pr_info("using polling idle threads\n"); @@ -1123,7 +1122,7 @@ Acked-by: jbeulich@suse.com return -ENODEV; } return 0; ---- head.orig/drivers/net/caif/Kconfig 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/net/caif/Kconfig 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/net/caif/Kconfig 2013-06-05 14:20:37.000000000 +0200 @@ -43,7 +43,7 @@ config CAIF_HSI @@ -1414,7 +1413,7 @@ Acked-by: jbeulich@suse.com + if (dev->msix_cap) + msix_set_enable(dev, 0); } ---- head.orig/drivers/platform/x86/Kconfig 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/platform/x86/Kconfig 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/platform/x86/Kconfig 2013-05-27 16:24:27.000000000 +0200 @@ -783,7 +783,7 @@ config APPLE_GMUX @@ -1425,7 +1424,7 @@ Acked-by: jbeulich@suse.com ---help--- This driver provides support for the pvpanic device. pvpanic is a paravirtualized device provided by QEMU; it lets a virtual machine ---- head.orig/drivers/scsi/lpfc/lpfc_init.c 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/scsi/lpfc/lpfc_init.c 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/scsi/lpfc/lpfc_init.c 2013-06-05 14:51:48.000000000 +0200 @@ -8414,9 +8414,6 @@ lpfc_sli4_set_affinity(struct lpfc_hba * int i, idx, saved_chann, used_chann, cpu, phys_id; @@ -1683,7 +1682,7 @@ Acked-by: jbeulich@suse.com } #ifdef MODULE ---- head.orig/drivers/xen/netback/netback.c 2013-06-13 12:14:42.000000000 +0200 +--- head.orig/drivers/xen/netback/netback.c 2013-07-02 09:43:02.000000000 +0200 +++ head/drivers/xen/netback/netback.c 2013-06-13 12:15:54.000000000 +0200 @@ -1714,6 +1714,8 @@ static void net_tx_action(unsigned long continue; @@ -2084,9 +2083,15 @@ Acked-by: jbeulich@suse.com } return pollflags; ---- head.orig/include/xen/net-util.h 2013-05-24 14:47:36.000000000 +0200 -+++ head/include/xen/net-util.h 2013-05-24 14:43:10.000000000 +0200 -@@ -11,7 +11,6 @@ static inline int skb_checksum_setup(str +--- head.orig/include/xen/net-util.h 2013-07-02 09:55:33.000000000 +0200 ++++ head/include/xen/net-util.h 2013-07-02 09:59:59.000000000 +0200 +@@ -6,13 +6,11 @@ + #include <linux/tcp.h> + #include <linux/udp.h> + #include <net/ip.h> +-#include <net/flow_keys.h> + + static inline int skb_checksum_setup(struct sk_buff *skb, unsigned long *fixup_counter) { struct iphdr *iph = (void *)skb->data; @@ -2094,7 +2099,7 @@ Acked-by: jbeulich@suse.com __be16 *csum = NULL; int err = -EPROTO; -@@ -33,21 +32,20 @@ static inline int skb_checksum_setup(str +@@ -35,22 +33,20 @@ static inline int skb_checksum_setup(str if (skb->protocol != htons(ETH_P_IP)) goto out; @@ -2102,6 +2107,7 @@ Acked-by: jbeulich@suse.com - if (th >= skb_tail_pointer(skb)) - goto out; - +- skb_set_transport_header(skb, 4 * iph->ihl); - skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: @@ -2124,7 +2130,7 @@ Acked-by: jbeulich@suse.com break; default: net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet," -@@ -56,9 +54,6 @@ static inline int skb_checksum_setup(str +@@ -59,9 +55,6 @@ static inline int skb_checksum_setup(str goto out; } @@ -2134,6 +2140,22 @@ Acked-by: jbeulich@suse.com if (csum) { *csum = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, +@@ -69,14 +62,7 @@ static inline int skb_checksum_setup(str + skb->ip_summed = CHECKSUM_PARTIAL; + } + +- if (!skb_transport_header_was_set(skb)) { +- struct flow_keys keys; +- +- if (skb_flow_dissect(skb, &keys)) +- skb_set_transport_header(skb, keys.thoff); +- else +- skb_reset_transport_header(skb); +- } ++ skb_probe_transport_header(skb, 0); + + err = 0; + out: --- head.orig/include/xen/xen_proc.h 2007-06-12 13:14:19.000000000 +0200 +++ head/include/xen/xen_proc.h 2013-05-27 17:41:43.000000000 +0200 @@ -4,9 +4,9 @@ diff --git a/patches.xen/xen3-patch-3.2 b/patches.xen/xen3-patch-3.2 index 00a970ea15..20c35e9155 100644 --- a/patches.xen/xen3-patch-3.2 +++ b/patches.xen/xen3-patch-3.2 @@ -3141,9 +3141,10 @@ Acked-by: jbeulich@suse.com + spinning->ticket == ticket) { #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING - token = spinning->irq_count +- < per_cpu(_irq_count, cpu) +- ? ticket_drop(spinning, token, cpu) : -2; + ticket = spinning->irq_count - < per_cpu(_irq_count, cpu) -- ? ticket_drop(spinning, token, cpu) : -2; ++ < per_cpu(_irq_count, cpu) + ? ticket_drop(spinning, ticket, cpu) : -2; #endif break; diff --git a/patches.xen/xen3-patch-3.3 b/patches.xen/xen3-patch-3.3 index 7ce1dfb4c9..cacf2172a4 100644 --- a/patches.xen/xen3-patch-3.3 +++ b/patches.xen/xen3-patch-3.3 @@ -3966,7 +3966,7 @@ Acked-by: jbeulich@suse.com netif_t *netif = netdev_priv(dev); --- head.orig/drivers/xen/netback/netback.c 2013-06-13 08:56:09.000000000 +0200 -+++ head/drivers/xen/netback/netback.c 2013-06-13 14:28:46.000000000 +0200 ++++ head/drivers/xen/netback/netback.c 2013-06-24 12:44:29.000000000 +0200 @@ -51,6 +51,12 @@ struct netbk_rx_meta { u8 copy:1; }; @@ -4162,15 +4162,14 @@ Acked-by: jbeulich@suse.com GNTMAP_host_map | GNTMAP_readonly, txp->gref, netif->domid); -@@ -1131,14 +1216,18 @@ static gnttab_map_grant_ref_t *netbk_get +@@ -1131,14 +1216,17 @@ static gnttab_map_grant_ref_t *netbk_get frag_set_pending_idx(&frags[i], pending_idx); } - return mop; -+ if ((void *)gop->map > (void *)gop->copy) -+ net_warn_ratelimited("%s: Grant op overrun (%p > %p)\n", -+ netdev_name(netif->dev), -+ gop->map, gop->copy); ++ if ((void *)gop->map > (void *)gop->copy && net_ratelimit()) ++ netdev_warn(netif->dev, "Grant op overrun (%p > %p)\n", ++ gop->map, gop->copy); } -static int netbk_tx_check_mop(struct sk_buff *skb, @@ -4266,9 +4265,9 @@ Acked-by: jbeulich@suse.com - *mopp = mop + 1; + gop->map = mop; + gop->copy = cop; -+ if ((void *)mop > (void *)cop) -+ net_warn_ratelimited("%s: Grant op check overrun (%p > %p)\n", -+ netdev_name(netif->dev), mop, cop); ++ if ((void *)mop > (void *)cop && net_ratelimit()) ++ netdev_warn(netif->dev, "Grant op check overrun (%p > %p)\n", ++ mop, cop); return err; } diff --git a/patches.xen/xen3-patch-3.7 b/patches.xen/xen3-patch-3.7 index b3578fd1f2..4c9c3c8dcb 100644 --- a/patches.xen/xen3-patch-3.7 +++ b/patches.xen/xen3-patch-3.7 @@ -7,9 +7,9 @@ Patch-mainline: 3.7 Automatically created from "patch-3.7" by xen-port-patches.py Acked-by: jbeulich@suse.com ---- head.orig/arch/arm/Kconfig 2013-06-20 14:56:41.000000000 +0200 -+++ head/arch/arm/Kconfig 2013-05-23 17:55:07.000000000 +0200 -@@ -1783,9 +1783,9 @@ config CC_STACKPROTECTOR +--- head.orig/arch/arm/Kconfig 2013-07-02 09:20:52.000000000 +0200 ++++ head/arch/arm/Kconfig 2013-07-02 09:43:11.000000000 +0200 +@@ -1807,9 +1807,9 @@ config CC_STACKPROTECTOR config XEN_DOM0 def_bool y @@ -21,7 +21,7 @@ Acked-by: jbeulich@suse.com bool "Xen guest support on ARM (EXPERIMENTAL)" depends on ARM && AEABI && OF depends on CPU_V7 && !CPU_V6 ---- head.orig/arch/arm/Makefile 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/arm/Makefile 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/arm/Makefile 2013-05-23 17:55:09.000000000 +0200 @@ -248,7 +248,7 @@ endif core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ @@ -32,7 +32,7 @@ Acked-by: jbeulich@suse.com core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ # If we have a machine-specific directory, then include it in the build. ---- head.orig/arch/arm/include/asm/xen/interface.h 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/arm/include/asm/xen/interface.h 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/arm/include/asm/xen/interface.h 2012-10-31 11:29:25.000000000 +0100 @@ -11,14 +11,14 @@ @@ -2640,7 +2640,7 @@ Acked-by: jbeulich@suse.com }; static int adjust_tjmax(struct platform_data *c, u32 id, struct device *dev) ---- head.orig/drivers/remoteproc/Kconfig 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/remoteproc/Kconfig 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/remoteproc/Kconfig 2012-11-02 12:55:30.000000000 +0100 @@ -1,4 +1,5 @@ menu "Remoteproc drivers" @@ -2923,7 +2923,7 @@ Acked-by: jbeulich@suse.com return rc; } ---- head.orig/drivers/xen/fallback.c 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/xen/fallback.c 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/xen/fallback.c 2013-04-03 10:48:55.000000000 +0200 @@ -3,7 +3,16 @@ #include <linux/bug.h> @@ -2970,8 +2970,8 @@ Acked-by: jbeulich@suse.com /* This flag prevents this VM area being copied on a fork(). A better * behaviour might be to explicitly carry out the appropriate mappings ---- head.orig/drivers/xen/netback/netback.c 2013-06-13 14:28:46.000000000 +0200 -+++ head/drivers/xen/netback/netback.c 2013-06-13 12:14:42.000000000 +0200 +--- head.orig/drivers/xen/netback/netback.c 2013-06-24 12:44:29.000000000 +0200 ++++ head/drivers/xen/netback/netback.c 2013-07-02 09:43:02.000000000 +0200 @@ -36,6 +36,7 @@ #include "common.h" @@ -3163,6 +3163,33 @@ Acked-by: jbeulich@suse.com RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); irq = netif->irq; +@@ -1216,9 +1256,10 @@ void netbk_get_requests(netif_t *netif, + frag_set_pending_idx(&frags[i], pending_idx); + } + +- if ((void *)gop->map > (void *)gop->copy && net_ratelimit()) +- netdev_warn(netif->dev, "Grant op overrun (%p > %p)\n", +- gop->map, gop->copy); ++ if ((void *)gop->map > (void *)gop->copy) ++ net_warn_ratelimited("%s: Grant op overrun (%p > %p)\n", ++ netdev_name(netif->dev), ++ gop->map, gop->copy); + } + + static int netbk_tx_check_gop(struct sk_buff *skb, +@@ -1317,9 +1358,9 @@ static int netbk_tx_check_gop(struct sk_ + + gop->map = mop; + gop->copy = cop; +- if ((void *)mop > (void *)cop && net_ratelimit()) +- netdev_warn(netif->dev, "Grant op check overrun (%p > %p)\n", +- mop, cop); ++ if ((void *)mop > (void *)cop) ++ net_warn_ratelimited("%s: Grant op check overrun (%p > %p)\n", ++ netdev_name(netif->dev), mop, cop); + return err; + } + --- head.orig/drivers/xen/netfront/netfront.c 2013-05-10 14:37:51.000000000 +0200 +++ head/drivers/xen/netfront/netfront.c 2013-05-10 14:38:37.000000000 +0200 @@ -572,7 +572,7 @@ static void backend_changed(struct xenbu @@ -3581,7 +3608,7 @@ Acked-by: jbeulich@suse.com err = xenbus_scanf(XBT_NIL, "control", "platform-feature-xs_reset_watches", "%d", &supported); ---- head.orig/include/uapi/xen/evtchn.h 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/include/uapi/xen/evtchn.h 2013-07-02 09:20:52.000000000 +0200 +++ head/include/uapi/xen/evtchn.h 2012-10-31 16:44:01.000000000 +0100 @@ -1,88 +1 @@ -/****************************************************************************** diff --git a/patches.xen/xen3-patch-3.9 b/patches.xen/xen3-patch-3.9 index 7c5bafc313..faf34921a9 100644 --- a/patches.xen/xen3-patch-3.9 +++ b/patches.xen/xen3-patch-3.9 @@ -44,7 +44,7 @@ Acked-by: jbeulich@suse.com ALIGN GLOBAL(stub32_clone) ---- head.orig/arch/x86/include/asm/efi.h 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/x86/include/asm/efi.h 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/x86/include/asm/efi.h 2013-06-20 15:32:48.000000000 +0200 @@ -106,7 +106,11 @@ extern void efi_memory_uc(u64 addr, unsi @@ -75,7 +75,7 @@ Acked-by: jbeulich@suse.com static inline unsigned long __phys_addr_nodebug(unsigned long x) { ---- head.orig/arch/x86/include/asm/processor.h 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/x86/include/asm/processor.h 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/x86/include/asm/processor.h 2013-05-23 17:57:56.000000000 +0200 @@ -974,7 +974,7 @@ extern unsigned long arch_align_stack(un extern void free_init_pages(char *what, unsigned long begin, unsigned long end); @@ -1629,7 +1629,7 @@ Acked-by: jbeulich@suse.com - start_kernel(); } ---- head.orig/arch/x86/kernel/head_64.S 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/x86/kernel/head_64.S 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/x86/kernel/head_64.S 2013-06-04 13:59:45.000000000 +0200 @@ -465,7 +465,7 @@ NEXT_PAGE(early_dynamic_pgts) @@ -1679,9 +1679,9 @@ Acked-by: jbeulich@suse.com { struct thread_struct *t = ¤t->thread; unsigned int old = t->iopl >> 12; ---- head.orig/arch/x86/kernel/process.c 2013-06-20 14:56:41.000000000 +0200 -+++ head/arch/x86/kernel/process.c 2013-05-23 17:57:32.000000000 +0200 -@@ -329,7 +329,7 @@ void default_idle(void) +--- head.orig/arch/x86/kernel/process.c 2013-07-02 09:20:52.000000000 +0200 ++++ head/arch/x86/kernel/process.c 2013-07-02 09:49:24.000000000 +0200 +@@ -317,7 +317,7 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif @@ -4534,7 +4534,7 @@ Acked-by: jbeulich@suse.com void __meminit vmemmap_populate_print_last(void) { if (p_start) { ---- head.orig/arch/x86/mm/mm_internal.h 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/arch/x86/mm/mm_internal.h 2013-07-02 09:20:52.000000000 +0200 +++ head/arch/x86/mm/mm_internal.h 2013-04-03 17:43:19.000000000 +0200 @@ -14,6 +14,8 @@ unsigned long kernel_physical_mapping_in unsigned long page_size_mask); @@ -5010,7 +5010,7 @@ Acked-by: jbeulich@suse.com { 0x36, ANY, 100000 }, /* Atom Cedar Trail/Cedarview (N2xxx, D2xxx) */ }; ---- head.orig/drivers/misc/vmw_vmci/Kconfig 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/misc/vmw_vmci/Kconfig 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/misc/vmw_vmci/Kconfig 2013-06-04 13:59:32.000000000 +0200 @@ -4,7 +4,7 @@ @@ -5056,7 +5056,7 @@ Acked-by: jbeulich@suse.com void pci_msi_shutdown(struct pci_dev *dev) { int pirq, pos; ---- head.orig/drivers/thermal/Kconfig 2013-06-20 14:56:41.000000000 +0200 +--- head.orig/drivers/thermal/Kconfig 2013-07-02 09:20:52.000000000 +0200 +++ head/drivers/thermal/Kconfig 2013-05-23 17:57:14.000000000 +0200 @@ -162,7 +162,7 @@ config DB8500_CPUFREQ_COOLING config INTEL_POWERCLAMP @@ -5298,32 +5298,22 @@ Acked-by: jbeulich@suse.com } u; } __attribute__((__packed__)); #endif ---- head.orig/kernel/context_tracking.c 2013-06-20 14:56:41.000000000 +0200 -+++ head/kernel/context_tracking.c 2013-06-05 15:05:02.000000000 +0200 -@@ -15,6 +15,7 @@ - */ - - #include <linux/context_tracking.h> -+#include <linux/kconfig.h> - #include <linux/rcupdate.h> - #include <linux/sched.h> - #include <linux/hardirq.h> -@@ -103,6 +103,10 @@ void user_exit(void) +--- head.orig/kernel/context_tracking.c 2013-07-02 09:20:52.000000000 +0200 ++++ head/kernel/context_tracking.c 2013-07-02 09:49:19.000000000 +0200 +@@ -142,6 +142,8 @@ void user_exit(void) local_irq_restore(flags); } -+#if IS_ENABLED(CONFIG_KVM) -+ -+#include <linux/kvm_host.h> ++#ifndef CONFIG_XEN + void guest_enter(void) { if (vtime_accounting_enabled()) -@@ -121,6 +125,7 @@ void guest_exit(void) +@@ -160,6 +162,7 @@ void guest_exit(void) } EXPORT_SYMBOL_GPL(guest_exit); -+#endif ++#endif /* CONFIG_XEN */ /** * context_tracking_task_switch - context switch the syscall callbacks @@ -5539,3 +5529,45 @@ Acked-by: jbeulich@suse.com mask = dma_get_seg_boundary(hwdev); offset_slots = -IO_TLB_SEGSIZE; +--- head.orig/include/xen/net-util.h 2013-05-24 14:47:36.000000000 +0200 ++++ head/include/xen/net-util.h 2013-07-02 09:55:33.000000000 +0200 +@@ -6,6 +6,7 @@ + #include <linux/tcp.h> + #include <linux/udp.h> + #include <net/ip.h> ++#include <net/flow_keys.h> + + static inline int skb_checksum_setup(struct sk_buff *skb, + unsigned long *fixup_counter) +@@ -15,6 +16,7 @@ static inline int skb_checksum_setup(str + __be16 *csum = NULL; + int err = -EPROTO; + ++ skb_reset_network_header(skb); + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (!skb_is_gso(skb)) +@@ -37,6 +39,7 @@ static inline int skb_checksum_setup(str + if (th >= skb_tail_pointer(skb)) + goto out; + ++ skb_set_transport_header(skb, 4 * iph->ihl); + skb->csum_start = th - skb->head; + switch (iph->protocol) { + case IPPROTO_TCP: +@@ -66,6 +69,15 @@ static inline int skb_checksum_setup(str + skb->ip_summed = CHECKSUM_PARTIAL; + } + ++ if (!skb_transport_header_was_set(skb)) { ++ struct flow_keys keys; ++ ++ if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_reset_transport_header(skb); ++ } ++ + err = 0; + out: + return err; diff --git a/series.conf b/series.conf index e00f331756..ea032e4c57 100644 --- a/series.conf +++ b/series.conf @@ -602,7 +602,7 @@ patches.xen/xen3-patch-3.7 patches.xen/xen3-patch-3.8 patches.xen/xen3-patch-3.9 - patches.xen/xen3-patch-3.10-rc6 + patches.xen/xen3-patch-3.10 # ports of other patches patches.xen/xen3-010-acpi_initrd_override_tables.patch |