[PATCH 00/35] KVM updates for the 2.6.26 merge window (part II) - Kernel

This is a discussion on [PATCH 00/35] KVM updates for the 2.6.26 merge window (part II) - Kernel ; These thirty-five patches comprise the second batch of the updates I have queued for 2.6.26. Please review. arch/x86/Kconfig | 8 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/crash.c | 3 +- arch/x86/kernel/kvm.c | 247 ++++++++++++++ arch/x86/kernel/kvmclock.c | 27 ++ arch/x86/kernel/reboot.c | 13 ...

+ Reply to Thread
Page 1 of 2 1 2 LastLast
Results 1 to 20 of 32

Thread: [PATCH 00/35] KVM updates for the 2.6.26 merge window (part II)

  1. [PATCH 00/35] KVM updates for the 2.6.26 merge window (part II)

    These thirty-five patches comprise the second batch of the updates I have
    queued for 2.6.26. Please review.

    arch/x86/Kconfig | 8 +
    arch/x86/kernel/Makefile | 1 +
    arch/x86/kernel/crash.c | 3 +-
    arch/x86/kernel/kvm.c | 247 ++++++++++++++
    arch/x86/kernel/kvmclock.c | 27 ++
    arch/x86/kernel/reboot.c | 13 +-
    arch/x86/kernel/setup_32.c | 1 +
    arch/x86/kernel/setup_64.c | 2 +
    arch/x86/kvm/Kconfig | 2 +-
    arch/x86/kvm/Makefile | 3 +-
    arch/x86/kvm/i8254.c | 600 ++++++++++++++++++++++++++++++++++
    arch/x86/kvm/i8254.h | 62 ++++
    arch/x86/kvm/irq.c | 3 +
    arch/x86/kvm/lapic.c | 8 +-
    arch/x86/kvm/mmu.c | 426 ++++++++++++++++++++++---
    arch/x86/kvm/paging_tmpl.h | 46 ++-
    arch/x86/kvm/segment_descriptor.h | 29 --
    arch/x86/kvm/svm.c | 41 ++-
    arch/x86/kvm/svm.h | 3 +
    arch/x86/kvm/tss.h | 59 ++++
    arch/x86/kvm/vmx.c | 70 +++-
    arch/x86/kvm/x86.c | 647 +++++++++++++++++++++++++++++++++----
    arch/x86/kvm/x86_emulate.c | 4 +-
    include/asm-x86/kvm.h | 21 ++
    include/asm-x86/kvm_host.h | 54 +++-
    include/asm-x86/kvm_para.h | 32 ++-
    include/asm-x86/reboot.h | 2 +
    include/linux/kvm.h | 7 +
    include/linux/kvm_host.h | 21 +-
    include/linux/kvm_para.h | 11 +-
    virt/kvm/kvm_main.c | 78 ++++-
    31 files changed, 2302 insertions(+), 229 deletions(-)
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH 04/35] KVM: SVM: make iopm_base static

    From: Harvey Harrison

    Fixes sparse warning as well.
    arch/x86/kvm/svm.c:69:15: warning: symbol 'iopm_base' was not declared. Should it be static?

    Signed-off-by: Harvey Harrison
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/svm.c | 2 +-
    1 files changed, 1 insertions(+), 1 deletions(-)

    diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
    index 7d73e93..ff6e5c8 100644
    --- a/arch/x86/kvm/svm.c
    +++ b/arch/x86/kvm/svm.c
    @@ -66,7 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
    return container_of(vcpu, struct vcpu_svm, vcpu);
    }

    -unsigned long iopm_base;
    +static unsigned long iopm_base;

    struct kvm_ldttss_desc {
    u16 limit0;
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [PATCH 19/35] x86: KVM guest: add basic paravirt support

    From: Marcelo Tosatti

    Add basic KVM paravirt support. Avoid vm-exits on IO delays.

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Avi Kivity
    ---
    arch/x86/Kconfig | 8 ++++++
    arch/x86/kernel/Makefile | 1 +
    arch/x86/kernel/kvm.c | 52 ++++++++++++++++++++++++++++++++++++++++++++
    arch/x86/kernel/setup_32.c | 1 +
    arch/x86/kernel/setup_64.c | 2 +
    include/linux/kvm_para.h | 6 +++++
    6 files changed, 70 insertions(+), 0 deletions(-)
    create mode 100644 arch/x86/kernel/kvm.c

    diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
    index e59ea05..75d2700 100644
    --- a/arch/x86/Kconfig
    +++ b/arch/x86/Kconfig
    @@ -381,6 +381,14 @@ config KVM_CLOCK
    provides the guest with timing infrastructure such as time of day, and
    system time

    +config KVM_GUEST
    + bool "KVM Guest support"
    + select PARAVIRT
    + depends on !(X86_VISWS || X86_VOYAGER)
    + help
    + This option enables various optimizations for running under the KVM
    + hypervisor.
    +
    source "arch/x86/lguest/Kconfig"

    config PARAVIRT
    diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
    index a3379a3..1cc9d42 100644
    --- a/arch/x86/kernel/Makefile
    +++ b/arch/x86/kernel/Makefile
    @@ -77,6 +77,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
    obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o

    obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
    +obj-$(CONFIG_KVM_GUEST) += kvm.o
    obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
    obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o

    diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
    new file mode 100644
    index 0000000..a8e36da
    --- /dev/null
    +++ b/arch/x86/kernel/kvm.c
    @@ -0,0 +1,52 @@
    +/*
    + * KVM paravirt_ops implementation
    + *
    + * This program is free software; you can redistribute it and/or modify
    + * it under the terms of the GNU General Public License as published by
    + * the Free Software Foundation; either version 2 of the License, or
    + * (at your option) any later version.
    + *
    + * This program is distributed in the hope that it will be useful,
    + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    + * GNU General Public License for more details.
    + *
    + * You should have received a copy of the GNU General Public License
    + * along with this program; if not, write to the Free Software
    + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
    + *
    + * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar
    + * Copyright IBM Corporation, 2007
    + * Authors: Anthony Liguori
    + */
    +
    +#include
    +#include
    +#include
    +#include
    +#include
    +
    +/*
    + * No need for any "IO delay" on KVM
    + */
    +static void kvm_io_delay(void)
    +{
    +}
    +
    +static void paravirt_ops_setup(void)
    +{
    + pv_info.name = "KVM";
    + pv_info.paravirt_enabled = 1;
    +
    + if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
    + pv_cpu_ops.io_delay = kvm_io_delay;
    +
    +}
    +
    +void __init kvm_guest_init(void)
    +{
    + if (!kvm_para_available())
    + return;
    +
    + paravirt_ops_setup();
    +}
    diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
    index 3ef92a2..65f3a23 100644
    --- a/arch/x86/kernel/setup_32.c
    +++ b/arch/x86/kernel/setup_32.c
    @@ -782,6 +782,7 @@ void __init setup_arch(char **cmdline_p)
    */
    vmi_init();
    #endif
    + kvm_guest_init();

    /*
    * NOTE: before this point _nobody_ is allowed to allocate
    diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
    index 26b676f..10a8ff5 100644
    --- a/arch/x86/kernel/setup_64.c
    +++ b/arch/x86/kernel/setup_64.c
    @@ -452,6 +452,8 @@ void __init setup_arch(char **cmdline_p)
    init_apic_mappings();
    ioapic_init_mappings();

    + kvm_guest_init();
    +
    /*
    * We trust e820 completely. No explicit ROM probing in memory.
    */
    diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
    index 5497aac..9c462c9 100644
    --- a/include/linux/kvm_para.h
    +++ b/include/linux/kvm_para.h
    @@ -20,6 +20,12 @@
    #include

    #ifdef __KERNEL__
    +#ifdef CONFIG_KVM_GUEST
    +void __init kvm_guest_init(void);
    +#else
    +#define kvm_guest_init() do { } while (0)
    +#endif
    +
    static inline int kvm_para_has_feature(unsigned int feature)
    {
    if (kvm_arch_para_features() & (1UL << feature))
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. [PATCH 11/35] KVM: detect if VCPU triple faults

    From: Joerg Roedel

    In the current inject_page_fault path KVM only checks if there is another PF
    pending and injects a DF then. But it has to check for a pending DF too to
    detect a shutdown condition in the VCPU. If this is not detected the VCPU goes
    to a PF -> DF -> PF loop when it should triple fault. This patch detects this
    condition and handles it with an KVM_SHUTDOWN exit to userspace.

    Signed-off-by: Joerg Roedel
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/x86.c | 20 +++++++++++++++-----
    include/linux/kvm_host.h | 1 +
    2 files changed, 16 insertions(+), 5 deletions(-)

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index dbcff38..491eda3 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -155,11 +155,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
    u32 error_code)
    {
    ++vcpu->stat.pf_guest;
    - if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
    - printk(KERN_DEBUG "kvm: inject_page_fault:"
    - " double fault 0x%lx\n", addr);
    - vcpu->arch.exception.nr = DF_VECTOR;
    - vcpu->arch.exception.error_code = 0;
    + if (vcpu->arch.exception.pending) {
    + if (vcpu->arch.exception.nr == PF_VECTOR) {
    + printk(KERN_DEBUG "kvm: inject_page_fault:"
    + " double fault 0x%lx\n", addr);
    + vcpu->arch.exception.nr = DF_VECTOR;
    + vcpu->arch.exception.error_code = 0;
    + } else if (vcpu->arch.exception.nr == DF_VECTOR) {
    + /* triple fault -> shutdown */
    + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
    + }
    return;
    }
    vcpu->arch.cr2 = addr;
    @@ -2676,6 +2681,11 @@ again:
    r = 0;
    goto out;
    }
    + if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
    + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
    + r = 0;
    + goto out;
    + }
    }

    kvm_inject_pending_timer_irqs(vcpu);
    diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
    index 9750bb3..958e003 100644
    --- a/include/linux/kvm_host.h
    +++ b/include/linux/kvm_host.h
    @@ -38,6 +38,7 @@
    #define KVM_REQ_MIGRATE_TIMER 1
    #define KVM_REQ_REPORT_TPR_ACCESS 2
    #define KVM_REQ_MMU_RELOAD 3
    +#define KVM_REQ_TRIPLE_FAULT 4

    struct kvm_vcpu;
    extern struct kmem_cache *kvm_vcpu_cache;
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. [PATCH 09/35] KVM: Prefix control register accessors with kvm_ to avoid namespace pollution

    Names like 'set_cr3()' look dangerously close to affecting the host.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/vmx.c | 14 ++++++------
    arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++----------------------
    include/asm-x86/kvm_host.h | 12 +++++-----
    3 files changed, 36 insertions(+), 36 deletions(-)

    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index f46ad03..5034503 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -1683,7 +1683,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
    vmx->vcpu.arch.rmode.active = 0;

    vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
    - set_cr8(&vmx->vcpu, 0);
    + kvm_set_cr8(&vmx->vcpu, 0);
    msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
    if (vmx->vcpu.vcpu_id == 0)
    msr |= MSR_IA32_APICBASE_BSP;
    @@ -2026,22 +2026,22 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    switch (cr) {
    case 0:
    vcpu_load_rsp_rip(vcpu);
    - set_cr0(vcpu, vcpu->arch.regs[reg]);
    + kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
    skip_emulated_instruction(vcpu);
    return 1;
    case 3:
    vcpu_load_rsp_rip(vcpu);
    - set_cr3(vcpu, vcpu->arch.regs[reg]);
    + kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
    skip_emulated_instruction(vcpu);
    return 1;
    case 4:
    vcpu_load_rsp_rip(vcpu);
    - set_cr4(vcpu, vcpu->arch.regs[reg]);
    + kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
    skip_emulated_instruction(vcpu);
    return 1;
    case 8:
    vcpu_load_rsp_rip(vcpu);
    - set_cr8(vcpu, vcpu->arch.regs[reg]);
    + kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
    skip_emulated_instruction(vcpu);
    if (irqchip_in_kernel(vcpu->kvm))
    return 1;
    @@ -2067,14 +2067,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    return 1;
    case 8:
    vcpu_load_rsp_rip(vcpu);
    - vcpu->arch.regs[reg] = get_cr8(vcpu);
    + vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
    vcpu_put_rsp_rip(vcpu);
    skip_emulated_instruction(vcpu);
    return 1;
    }
    break;
    case 3: /* lmsw */
    - lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
    + kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);

    skip_emulated_instruction(vcpu);
    return 1;
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 0458bd5..dbcff38 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -237,7 +237,7 @@ out:
    return changed;
    }

    -void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
    +void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
    {
    if (cr0 & CR0_RESERVED_BITS) {
    printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
    @@ -295,15 +295,15 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
    kvm_mmu_reset_context(vcpu);
    return;
    }
    -EXPORT_SYMBOL_GPL(set_cr0);
    +EXPORT_SYMBOL_GPL(kvm_set_cr0);

    -void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
    +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
    {
    - set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
    + kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
    }
    -EXPORT_SYMBOL_GPL(lmsw);
    +EXPORT_SYMBOL_GPL(kvm_lmsw);

    -void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
    +void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
    {
    if (cr4 & CR4_RESERVED_BITS) {
    printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
    @@ -334,9 +334,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
    vcpu->arch.cr4 = cr4;
    kvm_mmu_reset_context(vcpu);
    }
    -EXPORT_SYMBOL_GPL(set_cr4);
    +EXPORT_SYMBOL_GPL(kvm_set_cr4);

    -void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
    +void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
    {
    if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
    kvm_mmu_flush_tlb(vcpu);
    @@ -388,9 +388,9 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
    }
    up_read(&vcpu->kvm->slots_lock);
    }
    -EXPORT_SYMBOL_GPL(set_cr3);
    +EXPORT_SYMBOL_GPL(kvm_set_cr3);

    -void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
    +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
    {
    if (cr8 & CR8_RESERVED_BITS) {
    printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
    @@ -402,16 +402,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
    else
    vcpu->arch.cr8 = cr8;
    }
    -EXPORT_SYMBOL_GPL(set_cr8);
    +EXPORT_SYMBOL_GPL(kvm_set_cr8);

    -unsigned long get_cr8(struct kvm_vcpu *vcpu)
    +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
    {
    if (irqchip_in_kernel(vcpu->kvm))
    return kvm_lapic_get_cr8(vcpu);
    else
    return vcpu->arch.cr8;
    }
    -EXPORT_SYMBOL_GPL(get_cr8);
    +EXPORT_SYMBOL_GPL(kvm_get_cr8);

    /*
    * List of msr numbers which we expose to userspace through KVM_GET_MSRS
    @@ -2462,7 +2462,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
    unsigned long *rflags)
    {
    - lmsw(vcpu, msw);
    + kvm_lmsw(vcpu, msw);
    *rflags = kvm_x86_ops->get_rflags(vcpu);
    }

    @@ -2479,7 +2479,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
    case 4:
    return vcpu->arch.cr4;
    case 8:
    - return get_cr8(vcpu);
    + return kvm_get_cr8(vcpu);
    default:
    vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
    return 0;
    @@ -2491,20 +2491,20 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
    {
    switch (cr) {
    case 0:
    - set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
    + kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
    *rflags = kvm_x86_ops->get_rflags(vcpu);
    break;
    case 2:
    vcpu->arch.cr2 = val;
    break;
    case 3:
    - set_cr3(vcpu, val);
    + kvm_set_cr3(vcpu, val);
    break;
    case 4:
    - set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
    + kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
    break;
    case 8:
    - set_cr8(vcpu, val & 0xfUL);
    + kvm_set_cr8(vcpu, val & 0xfUL);
    break;
    default:
    vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
    @@ -2602,7 +2602,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
    struct kvm_run *kvm_run)
    {
    kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
    - kvm_run->cr8 = get_cr8(vcpu);
    + kvm_run->cr8 = kvm_get_cr8(vcpu);
    kvm_run->apic_base = kvm_get_apic_base(vcpu);
    if (irqchip_in_kernel(vcpu->kvm))
    kvm_run->ready_for_interrupt_injection = 1;
    @@ -2803,7 +2803,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

    /* re-sync apic's tpr */
    if (!irqchip_in_kernel(vcpu->kvm))
    - set_cr8(vcpu, kvm_run->cr8);
    + kvm_set_cr8(vcpu, kvm_run->cr8);

    if (vcpu->arch.pio.cur_count) {
    r = complete_pio(vcpu);
    @@ -2961,7 +2961,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    sregs->cr2 = vcpu->arch.cr2;
    sregs->cr3 = vcpu->arch.cr3;
    sregs->cr4 = vcpu->arch.cr4;
    - sregs->cr8 = get_cr8(vcpu);
    + sregs->cr8 = kvm_get_cr8(vcpu);
    sregs->efer = vcpu->arch.shadow_efer;
    sregs->apic_base = kvm_get_apic_base(vcpu);

    @@ -3007,7 +3007,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
    vcpu->arch.cr3 = sregs->cr3;

    - set_cr8(vcpu, sregs->cr8);
    + kvm_set_cr8(vcpu, sregs->cr8);

    mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
    kvm_x86_ops->set_efer(vcpu, sregs->efer);
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 58291a6..024b57c 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -469,12 +469,12 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
    unsigned long value);

    -void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
    -void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
    -void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
    -void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
    -unsigned long get_cr8(struct kvm_vcpu *vcpu);
    -void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
    +void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
    +void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
    +void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
    +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
    +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
    +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);

    int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. [PATCH 12/35] KVM: replace remaining __FUNCTION__ occurances

    From: Harvey Harrison

    __FUNCTION__ is gcc-specific, use __func__

    Signed-off-by: Harvey Harrison
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/lapic.c | 8 ++++----
    arch/x86/kvm/mmu.c | 35 +++++++++++++++++------------------
    arch/x86/kvm/paging_tmpl.h | 14 +++++++-------
    arch/x86/kvm/svm.c | 14 +++++++-------
    arch/x86/kvm/vmx.c | 6 +++---
    arch/x86/kvm/x86.c | 12 ++++++------
    6 files changed, 44 insertions(+), 45 deletions(-)

    diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
    index 68a6b15..31280df 100644
    --- a/arch/x86/kvm/lapic.c
    +++ b/arch/x86/kvm/lapic.c
    @@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
    apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
    PRIx64 ", "
    "timer initial count 0x%x, period %lldns, "
    - "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__,
    + "expire @ 0x%016" PRIx64 ".\n", __func__,
    APIC_BUS_CYCLE_NS, ktime_to_ns(now),
    apic_get_reg(apic, APIC_TMICT),
    apic->timer.period,
    @@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
    /* too common printing */
    if (offset != APIC_EOI)
    apic_debug("%s: offset 0x%x with length 0x%x, and value is "
    - "0x%x\n", __FUNCTION__, offset, len, val);
    + "0x%x\n", __func__, offset, len, val);

    offset &= 0xff0;

    @@ -869,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
    struct kvm_lapic *apic;
    int i;

    - apic_debug("%s\n", __FUNCTION__);
    + apic_debug("%s\n", __func__);

    ASSERT(vcpu);
    apic = vcpu->arch.apic;
    @@ -907,7 +907,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
    apic_update_ppr(apic);

    apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
    - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
    + "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
    vcpu, kvm_apic_id(apic),
    vcpu->arch.apic_base, apic->base_address);
    }
    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index 1932a3a..414405b 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -649,7 +649,7 @@ static int is_empty_shadow_page(u64 *spt)

    for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
    if (*pos != shadow_trap_nonpresent_pte) {
    - printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
    + printk(KERN_ERR "%s: %p %llx\n", __func__,
    pos, *pos);
    return 0;
    }
    @@ -772,14 +772,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
    struct kvm_mmu_page *sp;
    struct hlist_node *node;

    - pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
    + pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
    index = kvm_page_table_hashfn(gfn);
    bucket = &kvm->arch.mmu_page_hash[index];
    hlist_for_each_entry(sp, node, bucket, hash_link)
    if (sp->gfn == gfn && !sp->role.metaphysical
    && !sp->role.invalid) {
    pgprintk("%s: found role %x\n",
    - __FUNCTION__, sp->role.word);
    + __func__, sp->role.word);
    return sp;
    }
    return NULL;
    @@ -810,21 +810,21 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
    quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
    role.quadrant = quadrant;
    }
    - pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
    + pgprintk("%s: looking gfn %lx role %x\n", __func__,
    gfn, role.word);
    index = kvm_page_table_hashfn(gfn);
    bucket = &vcpu->kvm->arch.mmu_page_hash[index];
    hlist_for_each_entry(sp, node, bucket, hash_link)
    if (sp->gfn == gfn && sp->role.word == role.word) {
    mmu_page_add_parent_pte(vcpu, sp, parent_pte);
    - pgprintk("%s: found\n", __FUNCTION__);
    + pgprintk("%s: found\n", __func__);
    return sp;
    }
    ++vcpu->kvm->stat.mmu_cache_miss;
    sp = kvm_mmu_alloc_page(vcpu, parent_pte);
    if (!sp)
    return sp;
    - pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
    + pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
    sp->gfn = gfn;
    sp->role = role;
    hlist_add_head(&sp->hash_link, bucket);
    @@ -960,13 +960,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
    struct hlist_node *node, *n;
    int r;

    - pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
    + pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
    r = 0;
    index = kvm_page_table_hashfn(gfn);
    bucket = &kvm->arch.mmu_page_hash[index];
    hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
    if (sp->gfn == gfn && !sp->role.metaphysical) {
    - pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
    + pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
    sp->role.word);
    kvm_mmu_zap_page(kvm, sp);
    r = 1;
    @@ -979,7 +979,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
    struct kvm_mmu_page *sp;

    while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
    - pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
    + pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
    kvm_mmu_zap_page(kvm, sp);
    }
    }
    @@ -1021,7 +1021,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,

    pgprintk("%s: spte %llx access %x write_fault %d"
    " user_fault %d gfn %lx\n",
    - __FUNCTION__, *shadow_pte, pt_access,
    + __func__, *shadow_pte, pt_access,
    write_fault, user_fault, gfn);

    if (is_rmap_pte(*shadow_pte)) {
    @@ -1047,7 +1047,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    }
    }

    -
    /*
    * We don't set the accessed bit, since we sometimes want to see
    * whether the guest actually used the pte (in order to detect
    @@ -1081,7 +1080,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    if (shadow ||
    (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
    pgprintk("%s: found shadow page for %lx, marking ro\n",
    - __FUNCTION__, gfn);
    + __func__, gfn);
    pte_access &= ~ACC_WRITE_MASK;
    if (is_writeble_pte(spte)) {
    spte &= ~PT_WRITABLE_MASK;
    @@ -1097,7 +1096,7 @@ unshadowed:
    if (pte_access & ACC_WRITE_MASK)
    mark_page_dirty(vcpu->kvm, gfn);

    - pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
    + pgprintk("%s: setting spte %llx\n", __func__, spte);
    pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
    (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
    (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
    @@ -1317,7 +1316,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
    gfn_t gfn;
    int r;

    - pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
    + pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
    r = mmu_topup_memory_caches(vcpu);
    if (r)
    return r;
    @@ -1395,7 +1394,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)

    static void paging_new_cr3(struct kvm_vcpu *vcpu)
    {
    - pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
    + pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
    mmu_free_roots(vcpu);
    }

    @@ -1691,7 +1690,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
    int npte;
    int r;

    - pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
    + pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
    mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
    spin_lock(&vcpu->kvm->mmu_lock);
    kvm_mmu_free_some_pages(vcpu);
    @@ -2139,7 +2138,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu)

    if (n_rmap != n_actual)
    printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
    - __FUNCTION__, audit_msg, n_rmap, n_actual);
    + __func__, audit_msg, n_rmap, n_actual);
    }

    static void audit_write_protection(struct kvm_vcpu *vcpu)
    @@ -2159,7 +2158,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
    if (*rmapp)
    printk(KERN_ERR "%s: (%s) shadow page has writable"
    " mappings: gfn %lx role %x\n",
    - __FUNCTION__, audit_msg, sp->gfn,
    + __func__, audit_msg, sp->gfn,
    sp->role.word);
    }
    }
    diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
    index 17f9d16..57abbd0 100644
    --- a/arch/x86/kvm/paging_tmpl.h
    +++ b/arch/x86/kvm/paging_tmpl.h
    @@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
    unsigned index, pt_access, pte_access;
    gpa_t pte_gpa;

    - pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
    + pgprintk("%s: addr %lx\n", __func__, addr);
    walk:
    walker->level = vcpu->arch.mmu.root_level;
    pte = vcpu->arch.cr3;
    @@ -155,7 +155,7 @@ walk:
    pte_gpa += index * sizeof(pt_element_t);
    walker->table_gfn[walker->level - 1] = table_gfn;
    walker->pte_gpa[walker->level - 1] = pte_gpa;
    - pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
    + pgprintk("%s: table_gfn[%d] %lx\n", __func__,
    walker->level - 1, table_gfn);

    kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
    @@ -222,7 +222,7 @@ walk:
    walker->pt_access = pt_access;
    walker->pte_access = pte_access;
    pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
    - __FUNCTION__, (u64)pte, pt_access, pte_access);
    + __func__, (u64)pte, pt_access, pte_access);
    return 1;

    not_present:
    @@ -256,7 +256,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
    set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
    return;
    }
    - pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
    + pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
    pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
    if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
    return;
    @@ -381,7 +381,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    struct page *page;
    int largepage = 0;

    - pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
    + pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
    kvm_mmu_audit(vcpu, "pre page fault");

    r = mmu_topup_memory_caches(vcpu);
    @@ -399,7 +399,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    * The page is not mapped by the guest. Let the guest handle it.
    */
    if (!r) {
    - pgprintk("%s: guest page fault\n", __FUNCTION__);
    + pgprintk("%s: guest page fault\n", __func__);
    inject_page_fault(vcpu, addr, walker.error_code);
    vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
    up_read(&vcpu->kvm->slots_lock);
    @@ -431,7 +431,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
    largepage, &write_pt, page);

    - pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
    + pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
    shadow_pte, *shadow_pte, write_pt);

    if (!write_pt)
    diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
    index ff6e5c8..b2c667f 100644
    --- a/arch/x86/kvm/svm.c
    +++ b/arch/x86/kvm/svm.c
    @@ -230,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
    struct vcpu_svm *svm = to_svm(vcpu);

    if (!svm->next_rip) {
    - printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
    + printk(KERN_DEBUG "%s: NOP\n", __func__);
    return;
    }
    if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
    printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
    - __FUNCTION__,
    + __func__,
    svm->vmcb->save.rip,
    svm->next_rip);

    @@ -996,7 +996,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
    }
    default:
    printk(KERN_DEBUG "%s: unexpected dr %u\n",
    - __FUNCTION__, dr);
    + __func__, dr);
    *exception = UD_VECTOR;
    return;
    }
    @@ -1109,7 +1109,7 @@ static int invalid_op_interception(struct vcpu_svm *svm,
    static int task_switch_interception(struct vcpu_svm *svm,
    struct kvm_run *kvm_run)
    {
    - pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__);
    + pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __func__);
    kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
    return 0;
    }
    @@ -1125,7 +1125,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
    struct kvm_run *kvm_run)
    {
    if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
    - pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
    + pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
    return 1;
    }

    @@ -1257,7 +1257,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
    case MSR_IA32_DEBUGCTLMSR:
    if (!svm_has(SVM_FEATURE_LBRV)) {
    pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
    - __FUNCTION__, data);
    + __func__, data);
    break;
    }
    if (data & DEBUGCTL_RESERVED_BITS)
    @@ -1419,7 +1419,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
    exit_code != SVM_EXIT_NPF)
    printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
    "exit_code 0x%x\n",
    - __FUNCTION__, svm->vmcb->control.exit_int_info,
    + __func__, svm->vmcb->control.exit_int_info,
    exit_code);

    if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index 5034503..7ef710a 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -1254,7 +1254,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
    guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
    if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
    printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
    - __FUNCTION__);
    + __func__);
    vmcs_write32(GUEST_TR_AR_BYTES,
    (guest_tr_ar & ~AR_TYPE_MASK)
    | AR_TYPE_BUSY_64_TSS);
    @@ -1909,7 +1909,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    if ((vect_info & VECTORING_INFO_VALID_MASK) &&
    !is_page_fault(intr_info))
    printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
    - "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
    + "intr info 0x%x\n", __func__, vect_info, intr_info);

    if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
    int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
    @@ -2275,7 +2275,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
    if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
    exit_reason != EXIT_REASON_EXCEPTION_NMI)
    printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
    - "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
    + "exit reason is 0x%x\n", __func__, exit_reason);
    if (exit_reason < kvm_vmx_max_exit_handlers
    && kvm_vmx_exit_handlers[exit_reason])
    return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 491eda3..bf78d65 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -563,15 +563,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
    break;
    case MSR_IA32_MC0_STATUS:
    pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
    - __FUNCTION__, data);
    + __func__, data);
    break;
    case MSR_IA32_MCG_STATUS:
    pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
    - __FUNCTION__, data);
    + __func__, data);
    break;
    case MSR_IA32_MCG_CTL:
    pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
    - __FUNCTION__, data);
    + __func__, data);
    break;
    case MSR_IA32_UCODE_REV:
    case MSR_IA32_UCODE_WRITE:
    @@ -1939,7 +1939,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
    *dest = kvm_x86_ops->get_dr(vcpu, dr);
    return X86EMUL_CONTINUE;
    default:
    - pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
    + pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
    return X86EMUL_UNHANDLEABLE;
    }
    }
    @@ -2486,7 +2486,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
    case 8:
    return kvm_get_cr8(vcpu);
    default:
    - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
    + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    return 0;
    }
    }
    @@ -2512,7 +2512,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
    kvm_set_cr8(vcpu, val & 0xfUL);
    break;
    default:
    - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
    + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    }
    }

    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. [PATCH 07/35] KVM: MMU: ignore zapped root pagetables

    From: Marcelo Tosatti

    Mark zapped root pagetables as invalid and ignore such pages during lookup.

    This is a problem with the cr3-target feature, where a zapped root table fools
    the faulting code into creating a read-only mapping. The result is a lockup
    if the instruction can't be emulated.

    Signed-off-by: Marcelo Tosatti
    Cc: Anthony Liguori
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 12 ++++++++++--
    arch/x86/kvm/x86.c | 12 ++++++++++++
    include/asm-x86/kvm_host.h | 1 +
    include/linux/kvm_host.h | 2 ++
    virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++
    5 files changed, 48 insertions(+), 2 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index f7541fe..103d008 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -667,7 +667,8 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
    index = kvm_page_table_hashfn(gfn);
    bucket = &kvm->arch.mmu_page_hash[index];
    hlist_for_each_entry(sp, node, bucket, hash_link)
    - if (sp->gfn == gfn && !sp->role.metaphysical) {
    + if (sp->gfn == gfn && !sp->role.metaphysical
    + && !sp->role.invalid) {
    pgprintk("%s: found role %x\n",
    __FUNCTION__, sp->role.word);
    return sp;
    @@ -792,8 +793,11 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
    if (!sp->root_count) {
    hlist_del(&sp->hash_link);
    kvm_mmu_free_page(kvm, sp);
    - } else
    + } else {
    list_move(&sp->link, &kvm->arch.active_mmu_pages);
    + sp->role.invalid = 1;
    + kvm_reload_remote_mmus(kvm);
    + }
    kvm_mmu_reset_last_pte_updated(kvm);
    }

    @@ -1073,6 +1077,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)

    sp = page_header(root);
    --sp->root_count;
    + if (!sp->root_count && sp->role.invalid)
    + kvm_mmu_zap_page(vcpu->kvm, sp);
    vcpu->arch.mmu.root_hpa = INVALID_PAGE;
    spin_unlock(&vcpu->kvm->mmu_lock);
    return;
    @@ -1085,6 +1091,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
    root &= PT64_BASE_ADDR_MASK;
    sp = page_header(root);
    --sp->root_count;
    + if (!sp->root_count && sp->role.invalid)
    + kvm_mmu_zap_page(vcpu->kvm, sp);
    }
    vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
    }
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 0dd038e..e8e6492 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -2658,6 +2658,10 @@ preempted:
    kvm_x86_ops->guest_debug_pre(vcpu);

    again:
    + if (vcpu->requests)
    + if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
    + kvm_mmu_unload(vcpu);
    +
    r = kvm_mmu_reload(vcpu);
    if (unlikely(r))
    goto out;
    @@ -2689,6 +2693,14 @@ again:
    goto out;
    }

    + if (vcpu->requests)
    + if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
    + local_irq_enable();
    + preempt_enable();
    + r = 1;
    + goto out;
    + }
    +
    if (signal_pending(current)) {
    local_irq_enable();
    preempt_enable();
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 4f5a71a..7535839 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -140,6 +140,7 @@ union kvm_mmu_page_role {
    unsigned pad_for_nice_hex_output : 6;
    unsigned metaphysical : 1;
    unsigned access : 3;
    + unsigned invalid : 1;
    };
    };

    diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
    index eb88d32..994278f 100644
    --- a/include/linux/kvm_host.h
    +++ b/include/linux/kvm_host.h
    @@ -37,6 +37,7 @@
    #define KVM_REQ_TLB_FLUSH 0
    #define KVM_REQ_MIGRATE_TIMER 1
    #define KVM_REQ_REPORT_TPR_ACCESS 2
    +#define KVM_REQ_MMU_RELOAD 3

    struct kvm_vcpu;
    extern struct kmem_cache *kvm_vcpu_cache;
    @@ -190,6 +191,7 @@ void kvm_resched(struct kvm_vcpu *vcpu);
    void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
    void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
    void kvm_flush_remote_tlbs(struct kvm *kvm);
    +void kvm_reload_remote_mmus(struct kvm *kvm);

    long kvm_arch_dev_ioctl(struct file *filp,
    unsigned int ioctl, unsigned long arg);
    diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
    index cf6df51..c41eb57 100644
    --- a/virt/kvm/kvm_main.c
    +++ b/virt/kvm/kvm_main.c
    @@ -119,6 +119,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
    smp_call_function_mask(cpus, ack_flush, NULL, 1);
    }

    +void kvm_reload_remote_mmus(struct kvm *kvm)
    +{
    + int i, cpu;
    + cpumask_t cpus;
    + struct kvm_vcpu *vcpu;
    +
    + cpus_clear(cpus);
    + for (i = 0; i < KVM_MAX_VCPUS; ++i) {
    + vcpu = kvm->vcpus[i];
    + if (!vcpu)
    + continue;
    + if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
    + continue;
    + cpu = vcpu->cpu;
    + if (cpu != -1 && cpu != raw_smp_processor_id())
    + cpu_set(cpu, cpus);
    + }
    + if (cpus_empty(cpus))
    + return;
    + smp_call_function_mask(cpus, ack_flush, NULL, 1);
    +}
    +
    +
    int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
    {
    struct page *page;
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. [PATCH 06/35] KVM: Implement dummy values for MSR_PERF_STATUS

    From: Alexander Graf

    Darwin relies on this and ceases to work without.

    Signed-off-by: Alexander Graf
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/x86.c | 8 +++++++-
    1 files changed, 7 insertions(+), 1 deletions(-)

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index cf6261e..0dd038e 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -426,6 +426,7 @@ static u32 msrs_to_save[] = {
    MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
    #endif
    MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
    + MSR_IA32_PERF_STATUS,
    };

    static unsigned num_msrs_to_save;
    @@ -653,7 +654,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    case MSR_IA32_MC0_MISC+12:
    case MSR_IA32_MC0_MISC+16:
    case MSR_IA32_UCODE_REV:
    - case MSR_IA32_PERF_STATUS:
    case MSR_IA32_EBL_CR_POWERON:
    /* MTRR registers */
    case 0xfe:
    @@ -669,6 +669,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    case MSR_IA32_MISC_ENABLE:
    data = vcpu->arch.ia32_misc_enable_msr;
    break;
    + case MSR_IA32_PERF_STATUS:
    + /* TSC increment by tick */
    + data = 1000ULL;
    + /* CPU multiplier */
    + data |= (((uint64_t)4ULL) << 40);
    + break;
    case MSR_EFER:
    data = vcpu->arch.shadow_efer;
    break;
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. [PATCH 10/35] KVM: Use kzalloc to avoid allocating kvm_regs from kernel stack

    From: Xiantao Zhang

    Since the size of kvm_regs is too big to allocate from kernel stack on ia64,
    use kzalloc to allocate it.

    Signed-off-by: Xiantao Zhang
    Signed-off-by: Avi Kivity
    ---
    virt/kvm/kvm_main.c | 33 ++++++++++++++++++++++-----------
    1 files changed, 22 insertions(+), 11 deletions(-)

    diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
    index 0dabf58..30bf832 100644
    --- a/virt/kvm/kvm_main.c
    +++ b/virt/kvm/kvm_main.c
    @@ -849,28 +849,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
    r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
    break;
    case KVM_GET_REGS: {
    - struct kvm_regs kvm_regs;
    + struct kvm_regs *kvm_regs;

    - memset(&kvm_regs, 0, sizeof kvm_regs);
    - r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
    - if (r)
    + r = -ENOMEM;
    + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
    + if (!kvm_regs)
    goto out;
    + r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
    + if (r)
    + goto out_free1;
    r = -EFAULT;
    - if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
    - goto out;
    + if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
    + goto out_free1;
    r = 0;
    +out_free1:
    + kfree(kvm_regs);
    break;
    }
    case KVM_SET_REGS: {
    - struct kvm_regs kvm_regs;
    + struct kvm_regs *kvm_regs;

    - r = -EFAULT;
    - if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
    + r = -ENOMEM;
    + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
    + if (!kvm_regs)
    goto out;
    - r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
    + r = -EFAULT;
    + if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
    + goto out_free2;
    + r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
    if (r)
    - goto out;
    + goto out_free2;
    r = 0;
    +out_free2:
    + kfree(kvm_regs);
    break;
    }
    case KVM_GET_SREGS: {
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. [PATCH 03/35] KVM: x86 emulator: fix sparse warnings in x86_emulate.c

    From: Harvey Harrison

    Nesting __emulate_2op_nobyte inside__emulate_2op produces many shadowed
    variable warnings on the internal variable _tmp used by both macros.

    Change the outer macro to use __tmp.

    Avoids a sparse warning like the following at every call site of __emulate_2op
    arch/x86/kvm/x86_emulate.c:1091:3: warning: symbol '_tmp' shadows an earlier one
    arch/x86/kvm/x86_emulate.c:1091:3: originally declared here
    [18 more warnings suppressed]

    Signed-off-by: Harvey Harrison
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/x86_emulate.c | 4 ++--
    1 files changed, 2 insertions(+), 2 deletions(-)

    diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
    index cacdcf5..f59ed93 100644
    --- a/arch/x86/kvm/x86_emulate.c
    +++ b/arch/x86/kvm/x86_emulate.c
    @@ -371,7 +371,7 @@ static u16 group2_table[] = {

    #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_w y,_lx,_ly,_qx,_qy) \
    do { \
    - unsigned long _tmp; \
    + unsigned long __tmp; \
    switch ((_dst).bytes) { \
    case 1: \
    __asm__ __volatile__ ( \
    @@ -379,7 +379,7 @@ static u16 group2_table[] = {
    _op"b %"_bx"3,%1; " \
    _POST_EFLAGS("0", "4", "2") \
    : "=m" (_eflags), "=m" ((_dst).val), \
    - "=&r" (_tmp) \
    + "=&r" (__tmp) \
    : _by ((_src).val), "i" (EFLAGS_MASK)); \
    break; \
    default: \
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. [PATCH 08/35] KVM: MMU: large page support

    From: Marcelo Tosatti

    Create large pages mappings if the guest PTE's are marked as such and
    the underlying memory is hugetlbfs backed. If the largepage contains
    write-protected pages, a large pte is not used.

    Gives a consistent 2% improvement for data copies on ram mounted
    filesystem, without NPT/EPT.

    Anthony measures a 4% improvement on 4-way kernbench, with NPT.

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 222 +++++++++++++++++++++++++++++++++++++++-----
    arch/x86/kvm/paging_tmpl.h | 32 +++++-
    arch/x86/kvm/x86.c | 1 +
    include/asm-x86/kvm_host.h | 9 ++
    include/linux/kvm_host.h | 5 +
    virt/kvm/kvm_main.c | 22 ++++-
    6 files changed, 259 insertions(+), 32 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index 103d008..1932a3a 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -27,6 +27,7 @@
    #include
    #include
    #include
    +#include

    #include
    #include
    @@ -211,6 +212,11 @@ static int is_shadow_present_pte(u64 pte)
    && pte != shadow_notrap_nonpresent_pte;
    }

    +static int is_large_pte(u64 pte)
    +{
    + return pte & PT_PAGE_SIZE_MASK;
    +}
    +
    static int is_writeble_pte(unsigned long pte)
    {
    return pte & PT_WRITABLE_MASK;
    @@ -350,16 +356,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
    }

    /*
    + * Return the pointer to the largepage write count for a given
    + * gfn, handling slots that are not large page aligned.
    + */
    +static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
    +{
    + unsigned long idx;
    +
    + idx = (gfn / KVM_PAGES_PER_HPAGE) -
    + (slot->base_gfn / KVM_PAGES_PER_HPAGE);
    + return &slot->lpage_info[idx].write_count;
    +}
    +
    +static void account_shadowed(struct kvm *kvm, gfn_t gfn)
    +{
    + int *write_count;
    +
    + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
    + *write_count += 1;
    + WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
    +}
    +
    +static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
    +{
    + int *write_count;
    +
    + write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
    + *write_count -= 1;
    + WARN_ON(*write_count < 0);
    +}
    +
    +static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
    +{
    + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
    + int *largepage_idx;
    +
    + if (slot) {
    + largepage_idx = slot_largepage_idx(gfn, slot);
    + return *largepage_idx;
    + }
    +
    + return 1;
    +}
    +
    +static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
    +{
    + struct vm_area_struct *vma;
    + unsigned long addr;
    +
    + addr = gfn_to_hva(kvm, gfn);
    + if (kvm_is_error_hva(addr))
    + return 0;
    +
    + vma = find_vma(current->mm, addr);
    + if (vma && is_vm_hugetlb_page(vma))
    + return 1;
    +
    + return 0;
    +}
    +
    +static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
    +{
    + struct kvm_memory_slot *slot;
    +
    + if (has_wrprotected_page(vcpu->kvm, large_gfn))
    + return 0;
    +
    + if (!host_largepage_backed(vcpu->kvm, large_gfn))
    + return 0;
    +
    + slot = gfn_to_memslot(vcpu->kvm, large_gfn);
    + if (slot && slot->dirty_bitmap)
    + return 0;
    +
    + return 1;
    +}
    +
    +/*
    * Take gfn and return the reverse mapping to it.
    * Note: gfn must be unaliased before this function get called
    */

    -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
    +static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
    {
    struct kvm_memory_slot *slot;
    + unsigned long idx;

    slot = gfn_to_memslot(kvm, gfn);
    - return &slot->rmap[gfn - slot->base_gfn];
    + if (!lpage)
    + return &slot->rmap[gfn - slot->base_gfn];
    +
    + idx = (gfn / KVM_PAGES_PER_HPAGE) -
    + (slot->base_gfn / KVM_PAGES_PER_HPAGE);
    +
    + return &slot->lpage_info[idx].rmap_pde;
    }

    /*
    @@ -371,7 +461,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
    * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
    * containing more mappings.
    */
    -static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
    +static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
    {
    struct kvm_mmu_page *sp;
    struct kvm_rmap_desc *desc;
    @@ -383,7 +473,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
    gfn = unalias_gfn(vcpu->kvm, gfn);
    sp = page_header(__pa(spte));
    sp->gfns[spte - sp->spt] = gfn;
    - rmapp = gfn_to_rmap(vcpu->kvm, gfn);
    + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
    if (!*rmapp) {
    rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
    *rmapp = (unsigned long)spte;
    @@ -449,7 +539,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
    kvm_release_page_dirty(page);
    else
    kvm_release_page_clean(page);
    - rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
    + rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
    if (!*rmapp) {
    printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
    BUG();
    @@ -515,7 +605,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
    int write_protected = 0;

    gfn = unalias_gfn(kvm, gfn);
    - rmapp = gfn_to_rmap(kvm, gfn);
    + rmapp = gfn_to_rmap(kvm, gfn, 0);

    spte = rmap_next(kvm, rmapp, NULL);
    while (spte) {
    @@ -528,8 +618,27 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
    }
    spte = rmap_next(kvm, rmapp, spte);
    }
    + /* check for huge page mappings */
    + rmapp = gfn_to_rmap(kvm, gfn, 1);
    + spte = rmap_next(kvm, rmapp, NULL);
    + while (spte) {
    + BUG_ON(!spte);
    + BUG_ON(!(*spte & PT_PRESENT_MASK));
    + BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
    + pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
    + if (is_writeble_pte(*spte)) {
    + rmap_remove(kvm, spte);
    + --kvm->stat.lpages;
    + set_shadow_pte(spte, shadow_trap_nonpresent_pte);
    + write_protected = 1;
    + }
    + spte = rmap_next(kvm, rmapp, spte);
    + }
    +
    if (write_protected)
    kvm_flush_remote_tlbs(kvm);
    +
    + account_shadowed(kvm, gfn);
    }

    #ifdef MMU_DEBUG
    @@ -747,11 +856,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
    for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
    ent = pt[i];

    + if (is_shadow_present_pte(ent)) {
    + if (!is_large_pte(ent)) {
    + ent &= PT64_BASE_ADDR_MASK;
    + mmu_page_remove_parent_pte(page_header(ent),
    + &pt[i]);
    + } else {
    + --kvm->stat.lpages;
    + rmap_remove(kvm, &pt[i]);
    + }
    + }
    pt[i] = shadow_trap_nonpresent_pte;
    - if (!is_shadow_present_pte(ent))
    - continue;
    - ent &= PT64_BASE_ADDR_MASK;
    - mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
    }
    kvm_flush_remote_tlbs(kvm);
    }
    @@ -791,6 +906,8 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
    }
    kvm_mmu_page_unlink_children(kvm, sp);
    if (!sp->root_count) {
    + if (!sp->role.metaphysical)
    + unaccount_shadowed(kvm, sp->gfn);
    hlist_del(&sp->hash_link);
    kvm_mmu_free_page(kvm, sp);
    } else {
    @@ -894,7 +1011,8 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
    static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    unsigned pt_access, unsigned pte_access,
    int user_fault, int write_fault, int dirty,
    - int *ptwrite, gfn_t gfn, struct page *page)
    + int *ptwrite, int largepage, gfn_t gfn,
    + struct page *page)
    {
    u64 spte;
    int was_rmapped = 0;
    @@ -907,15 +1025,29 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    write_fault, user_fault, gfn);

    if (is_rmap_pte(*shadow_pte)) {
    - if (host_pfn != page_to_pfn(page)) {
    + /*
    + * If we overwrite a PTE page pointer with a 2MB PMD, unlink
    + * the parent of the now unreachable PTE.
    + */
    + if (largepage && !is_large_pte(*shadow_pte)) {
    + struct kvm_mmu_page *child;
    + u64 pte = *shadow_pte;
    +
    + child = page_header(pte & PT64_BASE_ADDR_MASK);
    + mmu_page_remove_parent_pte(child, shadow_pte);
    + } else if (host_pfn != page_to_pfn(page)) {
    pgprintk("hfn old %lx new %lx\n",
    host_pfn, page_to_pfn(page));
    rmap_remove(vcpu->kvm, shadow_pte);
    + } else {
    + if (largepage)
    + was_rmapped = is_large_pte(*shadow_pte);
    + else
    + was_rmapped = 1;
    }
    - else
    - was_rmapped = 1;
    }

    +
    /*
    * We don't set the accessed bit, since we sometimes want to see
    * whether the guest actually used the pte (in order to detect
    @@ -930,6 +1062,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    spte |= PT_PRESENT_MASK;
    if (pte_access & ACC_USER_MASK)
    spte |= PT_USER_MASK;
    + if (largepage)
    + spte |= PT_PAGE_SIZE_MASK;

    spte |= page_to_phys(page);

    @@ -944,7 +1078,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    }

    shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
    - if (shadow) {
    + if (shadow ||
    + (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
    pgprintk("%s: found shadow page for %lx, marking ro\n",
    __FUNCTION__, gfn);
    pte_access &= ~ACC_WRITE_MASK;
    @@ -963,10 +1098,17 @@ unshadowed:
    mark_page_dirty(vcpu->kvm, gfn);

    pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
    + pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
    + (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
    + (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
    set_shadow_pte(shadow_pte, spte);
    + if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK)
    + && (spte & PT_PRESENT_MASK))
    + ++vcpu->kvm->stat.lpages;
    +
    page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
    if (!was_rmapped) {
    - rmap_add(vcpu, shadow_pte, gfn);
    + rmap_add(vcpu, shadow_pte, gfn, largepage);
    if (!is_rmap_pte(*shadow_pte))
    kvm_release_page_clean(page);
    } else {
    @@ -984,7 +1126,8 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
    }

    static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
    - gfn_t gfn, struct page *page, int level)
    + int largepage, gfn_t gfn, struct page *page,
    + int level)
    {
    hpa_t table_addr = vcpu->arch.mmu.root_hpa;
    int pt_write = 0;
    @@ -998,7 +1141,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,

    if (level == 1) {
    mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
    - 0, write, 1, &pt_write, gfn, page);
    + 0, write, 1, &pt_write, 0, gfn, page);
    + return pt_write;
    + }
    +
    + if (largepage && level == 2) {
    + mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
    + 0, write, 1, &pt_write, 1, gfn, page);
    return pt_write;
    }

    @@ -1027,12 +1176,18 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
    static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
    {
    int r;
    + int largepage = 0;

    struct page *page;

    down_read(&vcpu->kvm->slots_lock);

    down_read(&current->mm->mmap_sem);
    + if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
    + gfn &= ~(KVM_PAGES_PER_HPAGE-1);
    + largepage = 1;
    + }
    +
    page = gfn_to_page(vcpu->kvm, gfn);
    up_read(&current->mm->mmap_sem);

    @@ -1045,7 +1200,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)

    spin_lock(&vcpu->kvm->mmu_lock);
    kvm_mmu_free_some_pages(vcpu);
    - r = __direct_map(vcpu, v, write, gfn, page, PT32E_ROOT_LEVEL);
    + r = __direct_map(vcpu, v, write, largepage, gfn, page,
    + PT32E_ROOT_LEVEL);
    spin_unlock(&vcpu->kvm->mmu_lock);

    up_read(&vcpu->kvm->slots_lock);
    @@ -1180,6 +1336,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
    {
    struct page *page;
    int r;
    + int largepage = 0;
    + gfn_t gfn = gpa >> PAGE_SHIFT;

    ASSERT(vcpu);
    ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
    @@ -1189,7 +1347,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
    return r;

    down_read(&current->mm->mmap_sem);
    - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
    + if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
    + gfn &= ~(KVM_PAGES_PER_HPAGE-1);
    + largepage = 1;
    + }
    + page = gfn_to_page(vcpu->kvm, gfn);
    if (is_error_page(page)) {
    kvm_release_page_clean(page);
    up_read(&current->mm->mmap_sem);
    @@ -1198,7 +1360,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
    spin_lock(&vcpu->kvm->mmu_lock);
    kvm_mmu_free_some_pages(vcpu);
    r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
    - gpa >> PAGE_SHIFT, page, TDP_ROOT_LEVEL);
    + largepage, gfn, page, TDP_ROOT_LEVEL);
    spin_unlock(&vcpu->kvm->mmu_lock);
    up_read(&current->mm->mmap_sem);

    @@ -1397,7 +1559,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,

    pte = *spte;
    if (is_shadow_present_pte(pte)) {
    - if (sp->role.level == PT_PAGE_TABLE_LEVEL)
    + if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
    + is_large_pte(pte))
    rmap_remove(vcpu->kvm, spte);
    else {
    child = page_header(pte & PT64_BASE_ADDR_MASK);
    @@ -1405,6 +1568,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
    }
    }
    set_shadow_pte(spte, shadow_trap_nonpresent_pte);
    + if (is_large_pte(pte))
    + --vcpu->kvm->stat.lpages;
    }

    static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
    @@ -1412,7 +1577,8 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
    u64 *spte,
    const void *new)
    {
    - if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
    + if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
    + && !vcpu->arch.update_pte.largepage) {
    ++vcpu->kvm->stat.mmu_pde_zapped;
    return;
    }
    @@ -1460,6 +1626,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
    u64 gpte = 0;
    struct page *page;

    + vcpu->arch.update_pte.largepage = 0;
    +
    if (bytes != 4 && bytes != 8)
    return;

    @@ -1487,9 +1655,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
    return;
    gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;

    - down_read(&vcpu->kvm->slots_lock);
    + down_read(&current->mm->mmap_sem);
    + if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
    + gfn &= ~(KVM_PAGES_PER_HPAGE-1);
    + vcpu->arch.update_pte.largepage = 1;
    + }
    page = gfn_to_page(vcpu->kvm, gfn);
    - up_read(&vcpu->kvm->slots_lock);
    + up_read(&current->mm->mmap_sem);

    if (is_error_page(page)) {
    kvm_release_page_clean(page);
    diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
    index 4b55f46..17f9d16 100644
    --- a/arch/x86/kvm/paging_tmpl.h
    +++ b/arch/x86/kvm/paging_tmpl.h
    @@ -248,6 +248,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
    pt_element_t gpte;
    unsigned pte_access;
    struct page *npage;
    + int largepage = vcpu->arch.update_pte.largepage;

    gpte = *(const pt_element_t *)pte;
    if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
    @@ -264,7 +265,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
    return;
    get_page(npage);
    mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
    - gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
    + gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
    + npage);
    }

    /*
    @@ -272,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
    */
    static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
    struct guest_walker *walker,
    - int user_fault, int write_fault, int *ptwrite,
    - struct page *page)
    + int user_fault, int write_fault, int largepage,
    + int *ptwrite, struct page *page)
    {
    hpa_t shadow_addr;
    int level;
    @@ -301,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
    shadow_ent = ((u64 *)__va(shadow_addr)) + index;
    if (level == PT_PAGE_TABLE_LEVEL)
    break;
    - if (is_shadow_present_pte(*shadow_ent)) {
    +
    + if (largepage && level == PT_DIRECTORY_LEVEL)
    + break;
    +
    + if (is_shadow_present_pte(*shadow_ent)
    + && !is_large_pte(*shadow_ent)) {
    shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
    continue;
    }

    + if (is_large_pte(*shadow_ent))
    + rmap_remove(vcpu->kvm, shadow_ent);
    +
    if (level - 1 == PT_PAGE_TABLE_LEVEL
    && walker->level == PT_DIRECTORY_LEVEL) {
    metaphysical = 1;
    @@ -339,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
    mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
    user_fault, write_fault,
    walker->ptes[walker->level-1] & PT_DIRTY_MASK,
    - ptwrite, walker->gfn, page);
    + ptwrite, largepage, walker->gfn, page);

    return shadow_ent;
    }
    @@ -369,6 +379,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    int write_pt = 0;
    int r;
    struct page *page;
    + int largepage = 0;

    pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
    kvm_mmu_audit(vcpu, "pre page fault");
    @@ -396,6 +407,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    }

    down_read(&current->mm->mmap_sem);
    + if (walker.level == PT_DIRECTORY_LEVEL) {
    + gfn_t large_gfn;
    + large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
    + if (is_largepage_backed(vcpu, large_gfn)) {
    + walker.gfn = large_gfn;
    + largepage = 1;
    + }
    + }
    page = gfn_to_page(vcpu->kvm, walker.gfn);
    up_read(&current->mm->mmap_sem);

    @@ -410,7 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
    spin_lock(&vcpu->kvm->mmu_lock);
    kvm_mmu_free_some_pages(vcpu);
    shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
    - &write_pt, page);
    + largepage, &write_pt, page);
    +
    pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
    shadow_pte, *shadow_pte, write_pt);

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index e8e6492..0458bd5 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -88,6 +88,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
    { "mmu_recycled", VM_STAT(mmu_recycled) },
    { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
    { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
    + { "largepages", VM_STAT(lpages) },
    { NULL }
    };

    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 7535839..58291a6 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -38,6 +38,13 @@
    #define INVALID_PAGE (~(hpa_t)0)
    #define UNMAPPED_GVA (~(gpa_t)0)

    +/* shadow tables are PAE even on non-PAE hosts */
    +#define KVM_HPAGE_SHIFT 21
    +#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
    +#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
    +
    +#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
    +
    #define DE_VECTOR 0
    #define UD_VECTOR 6
    #define NM_VECTOR 7
    @@ -229,6 +236,7 @@ struct kvm_vcpu_arch {
    struct {
    gfn_t gfn; /* presumed gfn during guest pte update */
    struct page *page; /* page corresponding to that gfn */
    + int largepage;
    } update_pte;

    struct i387_fxsave_struct host_fx_image;
    @@ -306,6 +314,7 @@ struct kvm_vm_stat {
    u32 mmu_recycled;
    u32 mmu_cache_miss;
    u32 remote_tlb_flush;
    + u32 lpages;
    };

    struct kvm_vcpu_stat {
    diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
    index 994278f..9750bb3 100644
    --- a/include/linux/kvm_host.h
    +++ b/include/linux/kvm_host.h
    @@ -103,6 +103,10 @@ struct kvm_memory_slot {
    unsigned long flags;
    unsigned long *rmap;
    unsigned long *dirty_bitmap;
    + struct {
    + unsigned long rmap_pde;
    + int write_count;
    + } *lpage_info;
    unsigned long userspace_addr;
    int user_alloc;
    };
    @@ -169,6 +173,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
    int user_alloc);
    gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
    struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
    +unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
    void kvm_release_page_clean(struct page *page);
    void kvm_release_page_dirty(struct page *page);
    int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
    diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
    index c41eb57..0dabf58 100644
    --- a/virt/kvm/kvm_main.c
    +++ b/virt/kvm/kvm_main.c
    @@ -212,9 +212,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
    if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
    vfree(free->dirty_bitmap);

    + if (!dont || free->lpage_info != dont->lpage_info)
    + vfree(free->lpage_info);
    +
    free->npages = 0;
    free->dirty_bitmap = NULL;
    free->rmap = NULL;
    + free->lpage_info = NULL;
    }

    void kvm_free_physmem(struct kvm *kvm)
    @@ -324,6 +328,22 @@ int __kvm_set_memory_region(struct kvm *kvm,
    new.user_alloc = user_alloc;
    new.userspace_addr = mem->userspace_addr;
    }
    + if (npages && !new.lpage_info) {
    + int largepages = npages / KVM_PAGES_PER_HPAGE;
    + if (npages % KVM_PAGES_PER_HPAGE)
    + largepages++;
    + new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
    +
    + if (!new.lpage_info)
    + goto out_free;
    +
    + memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
    +
    + if (base_gfn % KVM_PAGES_PER_HPAGE)
    + new.lpage_info[0].write_count = 1;
    + if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
    + new.lpage_info[largepages-1].write_count = 1;
    + }

    /* Allocate page dirty bitmap if needed */
    if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
    @@ -467,7 +487,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
    }
    EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);

    -static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
    +unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
    {
    struct kvm_memory_slot *slot;

    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [PATCH 18/35] KVM: add basic paravirt support

    From: Marcelo Tosatti

    Add basic KVM paravirt support. Avoid vm-exits on IO delays.

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/x86.c | 1 +
    include/asm-x86/kvm_para.h | 3 ++-
    include/linux/kvm.h | 1 +
    3 files changed, 4 insertions(+), 1 deletions(-)

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 621a8e3..1b9e695 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -820,6 +820,7 @@ int kvm_dev_ioctl_check_extension(long ext)
    case KVM_CAP_EXT_CPUID:
    case KVM_CAP_CLOCKSOURCE:
    case KVM_CAP_PIT:
    + case KVM_CAP_NOP_IO_DELAY:
    r = 1;
    break;
    case KVM_CAP_VAPIC:
    diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
    index 5ab7d3d..ed5df3a 100644
    --- a/include/asm-x86/kvm_para.h
    +++ b/include/asm-x86/kvm_para.h
    @@ -10,7 +10,8 @@
    * paravirtualization, the appropriate feature bit should be checked.
    */
    #define KVM_CPUID_FEATURES 0x40000001
    -#define KVM_FEATURE_CLOCKSOURCE 0
    +#define KVM_FEATURE_CLOCKSOURCE 0
    +#define KVM_FEATURE_NOP_IO_DELAY 1

    #define MSR_KVM_WALL_CLOCK 0x11
    #define MSR_KVM_SYSTEM_TIME 0x12
    diff --git a/include/linux/kvm.h b/include/linux/kvm.h
    index a2f3274..76f0947 100644
    --- a/include/linux/kvm.h
    +++ b/include/linux/kvm.h
    @@ -237,6 +237,7 @@ struct kvm_vapic_addr {
    #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
    #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
    #define KVM_CAP_PIT 11
    +#define KVM_CAP_NOP_IO_DELAY 12

    /*
    * ioctls for VM fds
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. [PATCH 01/35] KVM: Use x86's segment descriptor struct instead of private definition

    The x86 desc_struct unification allows us to remove segment_descriptor.h.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/segment_descriptor.h | 29 -----------------------------
    arch/x86/kvm/vmx.c | 3 +--
    arch/x86/kvm/x86.c | 15 +++++++--------
    3 files changed, 8 insertions(+), 39 deletions(-)
    delete mode 100644 arch/x86/kvm/segment_descriptor.h

    diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
    deleted file mode 100644
    index 56fc4c8..0000000
    --- a/arch/x86/kvm/segment_descriptor.h
    +++ /dev/null
    @@ -1,29 +0,0 @@
    -#ifndef __SEGMENT_DESCRIPTOR_H
    -#define __SEGMENT_DESCRIPTOR_H
    -
    -struct segment_descriptor {
    - u16 limit_low;
    - u16 base_low;
    - u8 base_mid;
    - u8 type : 4;
    - u8 system : 1;
    - u8 dpl : 2;
    - u8 present : 1;
    - u8 limit_high : 4;
    - u8 avl : 1;
    - u8 long_mode : 1;
    - u8 default_op : 1;
    - u8 granularity : 1;
    - u8 base_high;
    -} __attribute__((packed));
    -
    -#ifdef CONFIG_X86_64
    -/* LDT or TSS descriptor in the GDT. 16 bytes. */
    -struct segment_descriptor_64 {
    - struct segment_descriptor s;
    - u32 base_higher;
    - u32 pad_zero;
    -};
    -
    -#endif
    -#endif
    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index 2d5ccec..f46ad03 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -17,7 +17,6 @@

    #include "irq.h"
    #include "vmx.h"
    -#include "segment_descriptor.h"
    #include "mmu.h"

    #include
    @@ -388,7 +387,7 @@ static void reload_tss(void)
    * VT restores TR but not its size. Useless.
    */
    struct descriptor_table gdt;
    - struct segment_descriptor *descs;
    + struct desc_struct *descs;

    get_gdt(&gdt);
    descs = (void *)gdt.base;
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index b7c32f6..a063f44 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -15,7 +15,6 @@
    */

    #include
    -#include "segment_descriptor.h"
    #include "irq.h"
    #include "mmu.h"

    @@ -29,6 +28,7 @@

    #include
    #include
    +#include

    #define MAX_IO_MSRS 256
    #define CR0_RESERVED_BITS \
    @@ -94,7 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
    unsigned long segment_base(u16 selector)
    {
    struct descriptor_table gdt;
    - struct segment_descriptor *d;
    + struct desc_struct *d;
    unsigned long table_base;
    unsigned long v;

    @@ -110,13 +110,12 @@ unsigned long segment_base(u16 selector)
    asm("sldt %0" : "=g"(ldt_selector));
    table_base = segment_base(ldt_selector);
    }
    - d = (struct segment_descriptor *)(table_base + (selector & ~7));
    - v = d->base_low | ((unsigned long)d->base_mid << 16) |
    - ((unsigned long)d->base_high << 24);
    + d = (struct desc_struct *)(table_base + (selector & ~7));
    + v = d->base0 | ((unsigned long)d->base1 << 16) |
    + ((unsigned long)d->base2 << 24);
    #ifdef CONFIG_X86_64
    - if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
    - v |= ((unsigned long) \
    - ((struct segment_descriptor_64 *)d)->base_higher) << 32;
    + if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
    + v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
    #endif
    return v;
    }
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. [PATCH 32/35] KVM: no longer EXPERIMENTAL

    Long overdue.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/Kconfig | 2 +-
    1 files changed, 1 insertions(+), 1 deletions(-)

    diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
    index 41962e7..76c70ab 100644
    --- a/arch/x86/kvm/Kconfig
    +++ b/arch/x86/kvm/Kconfig
    @@ -19,7 +19,7 @@ if VIRTUALIZATION

    config KVM
    tristate "Kernel-based Virtual Machine (KVM) support"
    - depends on HAVE_KVM && EXPERIMENTAL
    + depends on HAVE_KVM
    select PREEMPT_NOTIFIERS
    select ANON_INODES
    ---help---
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. [PATCH 14/35] KVM: Remove pointless desc_ptr #ifdef

    The desc_struct changes left an unnecessary #ifdef; remove it.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/svm.c | 4 ----
    1 files changed, 0 insertions(+), 4 deletions(-)

    diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
    index b2c667f..51741f9 100644
    --- a/arch/x86/kvm/svm.c
    +++ b/arch/x86/kvm/svm.c
    @@ -290,11 +290,7 @@ static void svm_hardware_enable(void *garbage)

    struct svm_cpu_data *svm_data;
    uint64_t efer;
    -#ifdef CONFIG_X86_64
    - struct desc_ptr gdt_descr;
    -#else
    struct desc_ptr gdt_descr;
    -#endif
    struct desc_struct *gdt;
    int me = raw_smp_processor_id();

    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. [PATCH 35/35] KVM: x86: hardware task switching support

    From: Izik Eidus

    This emulates the x86 hardware task switch mechanism in software, as it is
    unsupported by either vmx or svm. It allows operating systems which use it,
    like freedos, to run as kvm guests.

    Signed-off-by: Izik Eidus
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/svm.c | 15 ++-
    arch/x86/kvm/svm.h | 3 +
    arch/x86/kvm/tss.h | 59 +++++++
    arch/x86/kvm/vmx.c | 15 ++
    arch/x86/kvm/x86.c | 408 ++++++++++++++++++++++++++++++++++++++++++++
    include/asm-x86/kvm_host.h | 9 +
    6 files changed, 506 insertions(+), 3 deletions(-)
    create mode 100644 arch/x86/kvm/tss.h

    diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
    index c1c1b97..ad27346 100644
    --- a/arch/x86/kvm/svm.c
    +++ b/arch/x86/kvm/svm.c
    @@ -1112,9 +1112,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
    static int task_switch_interception(struct vcpu_svm *svm,
    struct kvm_run *kvm_run)
    {
    - pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __func__);
    - kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
    - return 0;
    + u16 tss_selector;
    +
    + tss_selector = (u16)svm->vmcb->control.exit_info_1;
    + if (svm->vmcb->control.exit_info_2 &
    + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
    + return kvm_task_switch(&svm->vcpu, tss_selector,
    + TASK_SWITCH_IRET);
    + if (svm->vmcb->control.exit_info_2 &
    + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
    + return kvm_task_switch(&svm->vcpu, tss_selector,
    + TASK_SWITCH_JMP);
    + return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
    }

    static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
    diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
    index 5fd5049..1b8afa7 100644
    --- a/arch/x86/kvm/svm.h
    +++ b/arch/x86/kvm/svm.h
    @@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
    #define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
    #define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR

    +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
    +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
    +
    #define SVM_EXIT_READ_CR0 0x000
    #define SVM_EXIT_READ_CR3 0x003
    #define SVM_EXIT_READ_CR4 0x004
    diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
    new file mode 100644
    index 0000000..622aa10
    --- /dev/null
    +++ b/arch/x86/kvm/tss.h
    @@ -0,0 +1,59 @@
    +#ifndef __TSS_SEGMENT_H
    +#define __TSS_SEGMENT_H
    +
    +struct tss_segment_32 {
    + u32 prev_task_link;
    + u32 esp0;
    + u32 ss0;
    + u32 esp1;
    + u32 ss1;
    + u32 esp2;
    + u32 ss2;
    + u32 cr3;
    + u32 eip;
    + u32 eflags;
    + u32 eax;
    + u32 ecx;
    + u32 edx;
    + u32 ebx;
    + u32 esp;
    + u32 ebp;
    + u32 esi;
    + u32 edi;
    + u32 es;
    + u32 cs;
    + u32 ss;
    + u32 ds;
    + u32 fs;
    + u32 gs;
    + u32 ldt_selector;
    + u16 t;
    + u16 io_map;
    +};
    +
    +struct tss_segment_16 {
    + u16 prev_task_link;
    + u16 sp0;
    + u16 ss0;
    + u16 sp1;
    + u16 ss1;
    + u16 sp2;
    + u16 ss2;
    + u16 ip;
    + u16 flag;
    + u16 ax;
    + u16 cx;
    + u16 dx;
    + u16 bx;
    + u16 sp;
    + u16 bp;
    + u16 si;
    + u16 di;
    + u16 es;
    + u16 cs;
    + u16 ss;
    + u16 ds;
    + u16 ldt;
    +};
    +
    +#endif
    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index 9b56032..cbca46a 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -2249,6 +2249,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    return 1;
    }

    +static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    +{
    + unsigned long exit_qualification;
    + u16 tss_selector;
    + int reason;
    +
    + exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
    +
    + reason = (u32)exit_qualification >> 30;
    + tss_selector = exit_qualification;
    +
    + return kvm_task_switch(vcpu, tss_selector, reason);
    +}
    +
    /*
    * The exit handlers return 1 if the exit was handled fully and guest execution
    * may resume. Otherwise they set the kvm_run parameter to indicate what needs
    @@ -2271,6 +2285,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
    [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
    [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
    [EXIT_REASON_WBINVD] = handle_wbinvd,
    + [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
    };

    static const int kvm_vmx_max_exit_handlers =
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 63afca1..acecde4 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -18,6 +18,7 @@
    #include "irq.h"
    #include "mmu.h"
    #include "i8254.h"
    +#include "tss.h"

    #include
    #include
    @@ -3077,6 +3078,413 @@ static void set_segment(struct kvm_vcpu *vcpu,
    kvm_x86_ops->set_segment(vcpu, var, seg);
    }

    +static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
    + struct kvm_segment *kvm_desct)
    +{
    + kvm_desct->base = seg_desc->base0;
    + kvm_desct->base |= seg_desc->base1 << 16;
    + kvm_desct->base |= seg_desc->base2 << 24;
    + kvm_desct->limit = seg_desc->limit0;
    + kvm_desct->limit |= seg_desc->limit << 16;
    + kvm_desct->selector = selector;
    + kvm_desct->type = seg_desc->type;
    + kvm_desct->present = seg_desc->p;
    + kvm_desct->dpl = seg_desc->dpl;
    + kvm_desct->db = seg_desc->d;
    + kvm_desct->s = seg_desc->s;
    + kvm_desct->l = seg_desc->l;
    + kvm_desct->g = seg_desc->g;
    + kvm_desct->avl = seg_desc->avl;
    + if (!selector)
    + kvm_desct->unusable = 1;
    + else
    + kvm_desct->unusable = 0;
    + kvm_desct->padding = 0;
    +}
    +
    +static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
    + u16 selector,
    + struct descriptor_table *dtable)
    +{
    + if (selector & 1 << 2) {
    + struct kvm_segment kvm_seg;
    +
    + get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
    +
    + if (kvm_seg.unusable)
    + dtable->limit = 0;
    + else
    + dtable->limit = kvm_seg.limit;
    + dtable->base = kvm_seg.base;
    + }
    + else
    + kvm_x86_ops->get_gdt(vcpu, dtable);
    +}
    +
    +/* allowed just for 8 bytes segments */
    +static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    + struct desc_struct *seg_desc)
    +{
    + struct descriptor_table dtable;
    + u16 index = selector >> 3;
    +
    + get_segment_descritptor_dtable(vcpu, selector, &dtable);
    +
    + if (dtable.limit < index * 8 + 7) {
    + kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
    + return 1;
    + }
    + return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
    +}
    +
    +/* allowed just for 8 bytes segments */
    +static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    + struct desc_struct *seg_desc)
    +{
    + struct descriptor_table dtable;
    + u16 index = selector >> 3;
    +
    + get_segment_descritptor_dtable(vcpu, selector, &dtable);
    +
    + if (dtable.limit < index * 8 + 7)
    + return 1;
    + return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
    +}
    +
    +static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
    + struct desc_struct *seg_desc)
    +{
    + u32 base_addr;
    +
    + base_addr = seg_desc->base0;
    + base_addr |= (seg_desc->base1 << 16);
    + base_addr |= (seg_desc->base2 << 24);
    +
    + return base_addr;
    +}
    +
    +static int load_tss_segment32(struct kvm_vcpu *vcpu,
    + struct desc_struct *seg_desc,
    + struct tss_segment_32 *tss)
    +{
    + u32 base_addr;
    +
    + base_addr = get_tss_base_addr(vcpu, seg_desc);
    +
    + return kvm_read_guest(vcpu->kvm, base_addr, tss,
    + sizeof(struct tss_segment_32));
    +}
    +
    +static int save_tss_segment32(struct kvm_vcpu *vcpu,
    + struct desc_struct *seg_desc,
    + struct tss_segment_32 *tss)
    +{
    + u32 base_addr;
    +
    + base_addr = get_tss_base_addr(vcpu, seg_desc);
    +
    + return kvm_write_guest(vcpu->kvm, base_addr, tss,
    + sizeof(struct tss_segment_32));
    +}
    +
    +static int load_tss_segment16(struct kvm_vcpu *vcpu,
    + struct desc_struct *seg_desc,
    + struct tss_segment_16 *tss)
    +{
    + u32 base_addr;
    +
    + base_addr = get_tss_base_addr(vcpu, seg_desc);
    +
    + return kvm_read_guest(vcpu->kvm, base_addr, tss,
    + sizeof(struct tss_segment_16));
    +}
    +
    +static int save_tss_segment16(struct kvm_vcpu *vcpu,
    + struct desc_struct *seg_desc,
    + struct tss_segment_16 *tss)
    +{
    + u32 base_addr;
    +
    + base_addr = get_tss_base_addr(vcpu, seg_desc);
    +
    + return kvm_write_guest(vcpu->kvm, base_addr, tss,
    + sizeof(struct tss_segment_16));
    +}
    +
    +static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
    +{
    + struct kvm_segment kvm_seg;
    +
    + get_segment(vcpu, &kvm_seg, seg);
    + return kvm_seg.selector;
    +}
    +
    +static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
    + u16 selector,
    + struct kvm_segment *kvm_seg)
    +{
    + struct desc_struct seg_desc;
    +
    + if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
    + return 1;
    + seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
    + return 0;
    +}
    +
    +static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    + int type_bits, int seg)
    +{
    + struct kvm_segment kvm_seg;
    +
    + if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
    + return 1;
    + kvm_seg.type |= type_bits;
    +
    + if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
    + seg != VCPU_SREG_LDTR)
    + if (!kvm_seg.s)
    + kvm_seg.unusable = 1;
    +
    + set_segment(vcpu, &kvm_seg, seg);
    + return 0;
    +}
    +
    +static void save_state_to_tss32(struct kvm_vcpu *vcpu,
    + struct tss_segment_32 *tss)
    +{
    + tss->cr3 = vcpu->arch.cr3;
    + tss->eip = vcpu->arch.rip;
    + tss->eflags = kvm_x86_ops->get_rflags(vcpu);
    + tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
    + tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
    + tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
    + tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
    + tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
    + tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
    + tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
    + tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
    +
    + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    + tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
    + tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
    + tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    + tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    +}
    +
    +static int load_state_from_tss32(struct kvm_vcpu *vcpu,
    + struct tss_segment_32 *tss)
    +{
    + kvm_set_cr3(vcpu, tss->cr3);
    +
    + vcpu->arch.rip = tss->eip;
    + kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
    +
    + vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
    + vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
    + vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
    + vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
    + vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
    + vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
    + vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
    + vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
    +
    + if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
    + return 1;
    + return 0;
    +}
    +
    +static void save_state_to_tss16(struct kvm_vcpu *vcpu,
    + struct tss_segment_16 *tss)
    +{
    + tss->ip = vcpu->arch.rip;
    + tss->flag = kvm_x86_ops->get_rflags(vcpu);
    + tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
    + tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
    + tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
    + tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
    + tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
    + tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
    + tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
    + tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
    +
    + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    + tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    + tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    +}
    +
    +static int load_state_from_tss16(struct kvm_vcpu *vcpu,
    + struct tss_segment_16 *tss)
    +{
    + vcpu->arch.rip = tss->ip;
    + kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
    + vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
    + vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
    + vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
    + vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
    + vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
    + vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
    + vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
    + vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
    +
    + if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    + return 1;
    +
    + if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    + return 1;
    + return 0;
    +}
    +
    +int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
    + struct desc_struct *cseg_desc,
    + struct desc_struct *nseg_desc)
    +{
    + struct tss_segment_16 tss_segment_16;
    + int ret = 0;
    +
    + if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
    + goto out;
    +
    + save_state_to_tss16(vcpu, &tss_segment_16);
    + save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
    +
    + load_tss_segment16(vcpu, nseg_desc, &tss_segment_16);
    + if (load_state_from_tss16(vcpu, &tss_segment_16))
    + goto out;
    +
    + ret = 1;
    +out:
    + return ret;
    +}
    +
    +int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
    + struct desc_struct *cseg_desc,
    + struct desc_struct *nseg_desc)
    +{
    + struct tss_segment_32 tss_segment_32;
    + int ret = 0;
    +
    + if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
    + goto out;
    +
    + save_state_to_tss32(vcpu, &tss_segment_32);
    + save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
    +
    + if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
    + goto out;
    + if (load_state_from_tss32(vcpu, &tss_segment_32))
    + goto out;
    +
    + ret = 1;
    +out:
    + return ret;
    +}
    +
    +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
    +{
    + struct kvm_segment tr_seg;
    + struct desc_struct cseg_desc;
    + struct desc_struct nseg_desc;
    + int ret = 0;
    +
    + get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
    +
    + if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
    + goto out;
    +
    + if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
    + goto out;
    +
    +
    + if (reason != TASK_SWITCH_IRET) {
    + int cpl;
    +
    + cpl = kvm_x86_ops->get_cpl(vcpu);
    + if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
    + kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
    + return 1;
    + }
    + }
    +
    + if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
    + kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
    + return 1;
    + }
    +
    + if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
    + cseg_desc.type &= ~(1 << 8); //clear the B flag
    + save_guest_segment_descriptor(vcpu, tr_seg.selector,
    + &cseg_desc);
    + }
    +
    + if (reason == TASK_SWITCH_IRET) {
    + u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    + kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
    + }
    +
    + kvm_x86_ops->skip_emulated_instruction(vcpu);
    + kvm_x86_ops->cache_regs(vcpu);
    +
    + if (nseg_desc.type & 8)
    + ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
    + &nseg_desc);
    + else
    + ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
    + &nseg_desc);
    +
    + if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
    + u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    + kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
    + }
    +
    + if (TASK_SWITCH_IRET != 1) {
    + nseg_desc.type |= (1 << 8);
    + save_guest_segment_descriptor(vcpu, tss_selector,
    + &nseg_desc);
    + }
    +
    + kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
    + seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
    + tr_seg.type = 11;
    + set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
    +out:
    + kvm_x86_ops->decache_regs(vcpu);
    + return ret;
    +}
    +EXPORT_SYMBOL_GPL(kvm_task_switch);
    +
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    struct kvm_sregs *sregs)
    {
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 06bd154..4382ca0 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -491,6 +491,8 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
    unsigned long value);

    +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
    +
    void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
    void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
    void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
    @@ -655,4 +657,11 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
    #define TSS_REDIRECTION_SIZE (256 / 8)
    #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)

    +enum {
    + TASK_SWITCH_CALL = 0,
    + TASK_SWITCH_IRET = 1,
    + TASK_SWITCH_JMP = 2,
    + TASK_SWITCH_GATE = 3,
    +};
    +
    #endif
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. [PATCH 31/35] KVM: MMU: Introduce and use spte_to_page()

    Encapsulate the pte mask'n'shift in a function.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 17 ++++++++++++-----
    1 files changed, 12 insertions(+), 5 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index dd4b95b..6fc3421 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -240,6 +240,13 @@ static int is_rmap_pte(u64 pte)
    return is_shadow_present_pte(pte);
    }

    +static struct page *spte_to_page(u64 pte)
    +{
    + hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
    +
    + return pfn_to_page(hfn);
    +}
    +
    static gfn_t pse36_gfn_delta(u32 gpte)
    {
    int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
    @@ -541,7 +548,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
    if (!is_rmap_pte(*spte))
    return;
    sp = page_header(__pa(spte));
    - page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
    + page = spte_to_page(*spte);
    mark_page_accessed(page);
    if (is_writeble_pte(*spte))
    kvm_release_page_dirty(page);
    @@ -630,7 +637,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
    struct page *page;

    spte = rmap_next(kvm, rmapp, NULL);
    - page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
    + page = spte_to_page(*spte);
    SetPageDirty(page);
    }

    @@ -1033,7 +1040,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    u64 spte;
    int was_rmapped = 0;
    int was_writeble = is_writeble_pte(*shadow_pte);
    - hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;

    pgprintk("%s: spte %llx access %x write_fault %d"
    " user_fault %d gfn %lx\n",
    @@ -1051,9 +1057,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,

    child = page_header(pte & PT64_BASE_ADDR_MASK);
    mmu_page_remove_parent_pte(child, shadow_pte);
    - } else if (host_pfn != page_to_pfn(page)) {
    + } else if (page != spte_to_page(*shadow_pte)) {
    pgprintk("hfn old %lx new %lx\n",
    - host_pfn, page_to_pfn(page));
    + page_to_pfn(spte_to_page(*shadow_pte)),
    + page_to_pfn(page));
    rmap_remove(vcpu->kvm, shadow_pte);
    } else {
    if (largepage)
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  18. [PATCH 21/35] KVM: MMU: hypercall based pte updates and TLB flushes

    From: Marcelo Tosatti

    Hypercall based pte updates are faster than faults, and also allow use
    of the lazy MMU mode to batch operations.

    Don't report the feature if two dimensional paging is enabled.

    [avi:
    - one mmu_op hypercall instead of one per op
    - allow 64-bit gpa on hypercall
    - don't pass host errors (-ENOMEM) to guest]

    [akpm: warning fix on i386]

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Andrew Morton
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 136 +++++++++++++++++++++++++++++++++++++++++++-
    arch/x86/kvm/x86.c | 18 ++++++-
    include/asm-x86/kvm_host.h | 4 +
    include/asm-x86/kvm_para.h | 29 +++++++++
    include/linux/kvm.h | 1 +
    include/linux/kvm_para.h | 5 +-
    6 files changed, 190 insertions(+), 3 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index 414405b..072e942 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -28,6 +28,7 @@
    #include
    #include
    #include
    +#include

    #include
    #include
    @@ -40,7 +41,7 @@
    * 2. while doing 1. it walks guest-physical to host-physical
    * If the hardware supports that we don't need to do shadow paging.
    */
    -static bool tdp_enabled = false;
    +bool tdp_enabled = false;

    #undef MMU_DEBUG

    @@ -167,6 +168,13 @@ static int dbg = 1;
    #define ACC_USER_MASK PT_USER_MASK
    #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)

    +struct kvm_pv_mmu_op_buffer {
    + void *ptr;
    + unsigned len;
    + unsigned processed;
    + char buf[512] __aligned(sizeof(long));
    +};
    +
    struct kvm_rmap_desc {
    u64 *shadow_ptes[RMAP_EXT];
    struct kvm_rmap_desc *more;
    @@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
    return nr_mmu_pages;
    }

    +static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
    + unsigned len)
    +{
    + if (len > buffer->len)
    + return NULL;
    + return buffer->ptr;
    +}
    +
    +static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
    + unsigned len)
    +{
    + void *ret;
    +
    + ret = pv_mmu_peek_buffer(buffer, len);
    + if (!ret)
    + return ret;
    + buffer->ptr += len;
    + buffer->len -= len;
    + buffer->processed += len;
    + return ret;
    +}
    +
    +static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
    + gpa_t addr, gpa_t value)
    +{
    + int bytes = 8;
    + int r;
    +
    + if (!is_long_mode(vcpu) && !is_pae(vcpu))
    + bytes = 4;
    +
    + r = mmu_topup_memory_caches(vcpu);
    + if (r)
    + return r;
    +
    + if (!__emulator_write_phys(vcpu, addr, &value, bytes))
    + return -EFAULT;
    +
    + return 1;
    +}
    +
    +static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
    +{
    + kvm_x86_ops->tlb_flush(vcpu);
    + return 1;
    +}
    +
    +static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
    +{
    + spin_lock(&vcpu->kvm->mmu_lock);
    + mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
    + spin_unlock(&vcpu->kvm->mmu_lock);
    + return 1;
    +}
    +
    +static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
    + struct kvm_pv_mmu_op_buffer *buffer)
    +{
    + struct kvm_mmu_op_header *header;
    +
    + header = pv_mmu_peek_buffer(buffer, sizeof *header);
    + if (!header)
    + return 0;
    + switch (header->op) {
    + case KVM_MMU_OP_WRITE_PTE: {
    + struct kvm_mmu_op_write_pte *wpte;
    +
    + wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
    + if (!wpte)
    + return 0;
    + return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
    + wpte->pte_val);
    + }
    + case KVM_MMU_OP_FLUSH_TLB: {
    + struct kvm_mmu_op_flush_tlb *ftlb;
    +
    + ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
    + if (!ftlb)
    + return 0;
    + return kvm_pv_mmu_flush_tlb(vcpu);
    + }
    + case KVM_MMU_OP_RELEASE_PT: {
    + struct kvm_mmu_op_release_pt *rpt;
    +
    + rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
    + if (!rpt)
    + return 0;
    + return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
    + }
    + default: return 0;
    + }
    +}
    +
    +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
    + gpa_t addr, unsigned long *ret)
    +{
    + int r;
    + struct kvm_pv_mmu_op_buffer buffer;
    +
    + down_read(&vcpu->kvm->slots_lock);
    + down_read(&current->mm->mmap_sem);
    +
    + buffer.ptr = buffer.buf;
    + buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
    + buffer.processed = 0;
    +
    + r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
    + if (r)
    + goto out;
    +
    + while (buffer.len) {
    + r = kvm_pv_mmu_op_one(vcpu, &buffer);
    + if (r < 0)
    + goto out;
    + if (r == 0)
    + break;
    + }
    +
    + r = 1;
    +out:
    + *ret = buffer.processed;
    + up_read(&current->mm->mmap_sem);
    + up_read(&vcpu->kvm->slots_lock);
    + return r;
    +}
    +
    #ifdef AUDIT

    static const char *audit_msg;
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 03ba402..63afca1 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
    case KVM_CAP_NR_MEMSLOTS:
    r = KVM_MEMORY_SLOTS;
    break;
    + case KVM_CAP_PV_MMU:
    + r = !tdp_enabled;
    + break;
    default:
    r = 0;
    break;
    @@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_halt);

    +static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
    + unsigned long a1)
    +{
    + if (is_long_mode(vcpu))
    + return a0;
    + else
    + return a0 | ((gpa_t)a1 << 32);
    +}
    +
    int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    {
    unsigned long nr, a0, a1, a2, a3, ret;
    + int r = 1;

    kvm_x86_ops->cache_regs(vcpu);

    @@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    case KVM_HC_VAPIC_POLL_IRQ:
    ret = 0;
    break;
    + case KVM_HC_MMU_OP:
    + r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
    + break;
    default:
    ret = -KVM_ENOSYS;
    break;
    @@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    vcpu->arch.regs[VCPU_REGS_RAX] = ret;
    kvm_x86_ops->decache_regs(vcpu);
    ++vcpu->stat.hypercalls;
    - return 0;
    + return r;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);

    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index c8e51f8..52e276c 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -433,6 +433,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);

    int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    const void *val, int bytes);
    +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
    + gpa_t addr, unsigned long *ret);
    +
    +extern bool tdp_enabled;

    enum emulation_result {
    EMULATE_DONE, /* no further processing */
    diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
    index ed5df3a..5098459 100644
    --- a/include/asm-x86/kvm_para.h
    +++ b/include/asm-x86/kvm_para.h
    @@ -12,10 +12,39 @@
    #define KVM_CPUID_FEATURES 0x40000001
    #define KVM_FEATURE_CLOCKSOURCE 0
    #define KVM_FEATURE_NOP_IO_DELAY 1
    +#define KVM_FEATURE_MMU_OP 2

    #define MSR_KVM_WALL_CLOCK 0x11
    #define MSR_KVM_SYSTEM_TIME 0x12

    +#define KVM_MAX_MMU_OP_BATCH 32
    +
    +/* Operations for KVM_HC_MMU_OP */
    +#define KVM_MMU_OP_WRITE_PTE 1
    +#define KVM_MMU_OP_FLUSH_TLB 2
    +#define KVM_MMU_OP_RELEASE_PT 3
    +
    +/* Payload for KVM_HC_MMU_OP */
    +struct kvm_mmu_op_header {
    + __u32 op;
    + __u32 pad;
    +};
    +
    +struct kvm_mmu_op_write_pte {
    + struct kvm_mmu_op_header header;
    + __u64 pte_phys;
    + __u64 pte_val;
    +};
    +
    +struct kvm_mmu_op_flush_tlb {
    + struct kvm_mmu_op_header header;
    +};
    +
    +struct kvm_mmu_op_release_pt {
    + struct kvm_mmu_op_header header;
    + __u64 pt_phys;
    +};
    +
    #ifdef __KERNEL__
    #include

    diff --git a/include/linux/kvm.h b/include/linux/kvm.h
    index 76f0947..c1b502a 100644
    --- a/include/linux/kvm.h
    +++ b/include/linux/kvm.h
    @@ -238,6 +238,7 @@ struct kvm_vapic_addr {
    #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
    #define KVM_CAP_PIT 11
    #define KVM_CAP_NOP_IO_DELAY 12
    +#define KVM_CAP_PV_MMU 13

    /*
    * ioctls for VM fds
    diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
    index 9c462c9..3ddce03 100644
    --- a/include/linux/kvm_para.h
    +++ b/include/linux/kvm_para.h
    @@ -11,8 +11,11 @@

    /* Return values for hypercalls */
    #define KVM_ENOSYS 1000
    +#define KVM_EFAULT EFAULT
    +#define KVM_E2BIG E2BIG

    -#define KVM_HC_VAPIC_POLL_IRQ 1
    +#define KVM_HC_VAPIC_POLL_IRQ 1
    +#define KVM_HC_MMU_OP 2

    /*
    * hypercalls use architecture specific
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  19. [PATCH 23/35] x86: KVM guest: hypercall batching

    From: Marcelo Tosatti

    Batch pte updates and tlb flushes in lazy MMU mode.

    [avi:
    - adjust to mmu_op
    - helper for getting para_state without debug warnings]

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kernel/kvm.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
    1 files changed, 60 insertions(+), 2 deletions(-)

    diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
    index 1bb6e97..d9121f9 100644
    --- a/arch/x86/kernel/kvm.c
    +++ b/arch/x86/kernel/kvm.c
    @@ -26,6 +26,22 @@
    #include
    #include
    #include
    +#include
    +
    +#define MMU_QUEUE_SIZE 1024
    +
    +struct kvm_para_state {
    + u8 mmu_queue[MMU_QUEUE_SIZE];
    + int mmu_queue_len;
    + enum paravirt_lazy_mode mode;
    +};
    +
    +static DEFINE_PER_CPU(struct kvm_para_state, para_state);
    +
    +static struct kvm_para_state *kvm_para_state(void)
    +{
    + return &per_cpu(para_state, raw_smp_processor_id());
    +}

    /*
    * No need for any "IO delay" on KVM
    @@ -48,6 +64,28 @@ static void kvm_mmu_op(void *buffer, unsigned len)
    } while (len);
    }

    +static void mmu_queue_flush(struct kvm_para_state *state)
    +{
    + if (state->mmu_queue_len) {
    + kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
    + state->mmu_queue_len = 0;
    + }
    +}
    +
    +static void kvm_deferred_mmu_op(void *buffer, int len)
    +{
    + struct kvm_para_state *state = kvm_para_state();
    +
    + if (state->mode != PARAVIRT_LAZY_MMU) {
    + kvm_mmu_op(buffer, len);
    + return;
    + }
    + if (state->mmu_queue_len + len > sizeof state->mmu_queue)
    + mmu_queue_flush(state);
    + memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
    + state->mmu_queue_len += len;
    +}
    +
    static void kvm_mmu_write(void *dest, u64 val)
    {
    __u64 pte_phys;
    @@ -68,7 +106,7 @@ static void kvm_mmu_write(void *dest, u64 val)
    wpte.pte_val = val;
    wpte.pte_phys = pte_phys;

    - kvm_mmu_op(&wpte, sizeof wpte);
    + kvm_deferred_mmu_op(&wpte, sizeof wpte);
    }

    /*
    @@ -137,7 +175,7 @@ static void kvm_flush_tlb(void)
    .header.op = KVM_MMU_OP_FLUSH_TLB,
    };

    - kvm_mmu_op(&ftlb, sizeof ftlb);
    + kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
    }

    static void kvm_release_pt(u32 pfn)
    @@ -150,6 +188,23 @@ static void kvm_release_pt(u32 pfn)
    kvm_mmu_op(&rpt, sizeof rpt);
    }

    +static void kvm_enter_lazy_mmu(void)
    +{
    + struct kvm_para_state *state = kvm_para_state();
    +
    + paravirt_enter_lazy_mmu();
    + state->mode = paravirt_get_lazy_mode();
    +}
    +
    +static void kvm_leave_lazy_mmu(void)
    +{
    + struct kvm_para_state *state = kvm_para_state();
    +
    + mmu_queue_flush(state);
    + paravirt_leave_lazy(paravirt_get_lazy_mode());
    + state->mode = paravirt_get_lazy_mode();
    +}
    +
    static void paravirt_ops_setup(void)
    {
    pv_info.name = "KVM";
    @@ -177,6 +232,9 @@ static void paravirt_ops_setup(void)
    pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
    pv_mmu_ops.release_pt = kvm_release_pt;
    pv_mmu_ops.release_pd = kvm_release_pt;
    +
    + pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
    + pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
    }
    }

    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  20. [PATCH 29/35] KVM: Move some x86 specific constants and structures to include/asm-x86

    Signed-off-by: Avi Kivity
    ---
    include/asm-x86/kvm_host.h | 13 +++++++++++++
    include/linux/kvm_host.h | 13 -------------
    2 files changed, 13 insertions(+), 13 deletions(-)

    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 52e276c..2773f91 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -20,6 +20,13 @@

    #include

    +#define KVM_MAX_VCPUS 16
    +#define KVM_MEMORY_SLOTS 32
    +/* memory slots that does not exposed to userspace */
    +#define KVM_PRIVATE_MEM_SLOTS 4
    +
    +#define KVM_PIO_PAGE_OFFSET 1
    +
    #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
    #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
    #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL)
    @@ -113,6 +120,12 @@ enum {

    #define KVM_NR_MEM_OBJS 40

    +struct kvm_guest_debug {
    + int enabled;
    + unsigned long bp[4];
    + int singlestep;
    +};
    +
    /*
    * We don't want allocation failures within the mmu code, so we preallocate
    * enough memory for a single page fault in a cache.
    diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
    index 958e003..f4e1436 100644
    --- a/include/linux/kvm_host.h
    +++ b/include/linux/kvm_host.h
    @@ -24,13 +24,6 @@

    #include

    -#define KVM_MAX_VCPUS 16
    -#define KVM_MEMORY_SLOTS 32
    -/* memory slots that does not exposed to userspace */
    -#define KVM_PRIVATE_MEM_SLOTS 4
    -
    -#define KVM_PIO_PAGE_OFFSET 1
    -
    /*
    * vcpu->requests bit members
    */
    @@ -43,12 +36,6 @@
    struct kvm_vcpu;
    extern struct kmem_cache *kvm_vcpu_cache;

    -struct kvm_guest_debug {
    - int enabled;
    - unsigned long bp[4];
    - int singlestep;
    -};
    -
    /*
    * It would be nice to use something smarter than a linear search, TBD...
    * Thankfully we dont expect many devices to register (famous last words ,
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 1 of 2 1 2 LastLast