[PATCH 00/35] KVM updates for the 2.6.26 merge window (part II) - Kernel

This is a discussion on [PATCH 00/35] KVM updates for the 2.6.26 merge window (part II) - Kernel ; Useful for debugging. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fb0389d..0155931 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -39,6 +39,9 @@ module_param(bypass_guest_pf, bool, 0); static int enable_vpid = ...

+ Reply to Thread
Page 2 of 2 FirstFirst 1 2
Results 21 to 32 of 32

Thread: [PATCH 00/35] KVM updates for the 2.6.26 merge window (part II)

  1. [PATCH 33/35] KVM: VMX: Add module option to disable flexpriority

    Useful for debugging.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/vmx.c | 8 ++++++--
    1 files changed, 6 insertions(+), 2 deletions(-)

    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index fb0389d..0155931 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -39,6 +39,9 @@ module_param(bypass_guest_pf, bool, 0);
    static int enable_vpid = 1;
    module_param(enable_vpid, bool, 0);

    +static int flexpriority_enabled = 1;
    +module_param(flexpriority_enabled, bool, 0);
    +
    struct vmcs {
    u32 revision_id;
    u32 abort;
    @@ -200,8 +203,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)

    static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
    {
    - return (vmcs_config.cpu_based_2nd_exec_ctrl &
    - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
    + return flexpriority_enabled
    + && (vmcs_config.cpu_based_2nd_exec_ctrl &
    + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
    }

    static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH 27/35] KVM: kvm.h: __user requires compiler.h

    From: Christian Borntraeger

    include/linux/kvm.h defines struct kvm_dirty_log to
    [...]
    union {
    void __user *dirty_bitmap; /* one bit per page */
    __u64 padding;
    };

    __user requires compiler.h to compile. Currently, this works on x86
    only coincidentally due to other include files. This patch makes
    kvm.h compile in all cases.

    Signed-off-by: Christian Borntraeger
    Signed-off-by: Avi Kivity
    ---
    include/linux/kvm.h | 1 +
    1 files changed, 1 insertions(+), 0 deletions(-)

    diff --git a/include/linux/kvm.h b/include/linux/kvm.h
    index c1b502a..3bd3828 100644
    --- a/include/linux/kvm.h
    +++ b/include/linux/kvm.h
    @@ -8,6 +8,7 @@
    */

    #include
    +#include
    #include
    #include

    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [PATCH 30/35] KVM: MMU: fix dirty bit setting when removing write permissions

    From: Izik Eidus

    When mmu_set_spte() checks if a page related to spte should be release as
    dirty or clean, it check if the shadow pte was writeble, but in case
    rmap_write_protect() is called called it is possible for shadow ptes that were
    writeble to become readonly and therefor mmu_set_spte will release the pages
    as clean.

    This patch fix this issue by marking the page as dirty inside
    rmap_write_protect().

    Signed-off-by: Izik Eidus
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 8 ++++++++
    1 files changed, 8 insertions(+), 0 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index a5872b3..dd4b95b 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -626,6 +626,14 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
    }
    spte = rmap_next(kvm, rmapp, spte);
    }
    + if (write_protected) {
    + struct page *page;
    +
    + spte = rmap_next(kvm, rmapp, NULL);
    + page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
    + SetPageDirty(page);
    + }
    +
    /* check for huge page mappings */
    rmapp = gfn_to_rmap(kvm, gfn, 1);
    spte = rmap_next(kvm, rmapp, NULL);
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. [PATCH 26/35] x86: KVM guest: disable clock before rebooting.

    From: Glauber Costa

    This patch writes 0 (actually, what really matters is that the
    LSB is cleared) to the system time msr before shutting down
    the machine for kexec.

    Without it, we can have a random memory location being written
    when the guest comes back

    It overrides the functions shutdown, used in the path of kernel_kexec() (sys.c)
    and crash_shutdown, used in the path of crash_kexec() (kexec.c)

    Signed-off-by: Glauber Costa
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kernel/kvmclock.c | 27 +++++++++++++++++++++++++++
    1 files changed, 27 insertions(+), 0 deletions(-)

    diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
    index b999f5e..ddee040 100644
    --- a/arch/x86/kernel/kvmclock.c
    +++ b/arch/x86/kernel/kvmclock.c
    @@ -22,6 +22,7 @@
    #include
    #include
    #include
    +#include

    #define KVM_SCALE 22

    @@ -143,6 +144,28 @@ static void kvm_setup_secondary_clock(void)
    setup_secondary_APIC_clock();
    }

    +/*
    + * After the clock is registered, the host will keep writing to the
    + * registered memory location. If the guest happens to shutdown, this memory
    + * won't be valid. In cases like kexec, in which you install a new kernel, this
    + * means a random memory location will be kept being written. So before any
    + * kind of shutdown from our side, we unregister the clock by writting anything
    + * that does not have the 'enable' bit set in the msr
    + */
    +#ifdef CONFIG_KEXEC
    +static void kvm_crash_shutdown(struct pt_regs *regs)
    +{
    + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
    + native_machine_crash_shutdown(regs);
    +}
    +#endif
    +
    +static void kvm_shutdown(void)
    +{
    + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
    + native_machine_shutdown();
    +}
    +
    void __init kvmclock_init(void)
    {
    if (!kvm_para_available())
    @@ -155,6 +178,10 @@ void __init kvmclock_init(void)
    pv_time_ops.set_wallclock = kvm_set_wallclock;
    pv_time_ops.sched_clock = kvm_clock_read;
    pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
    + machine_ops.shutdown = kvm_shutdown;
    +#ifdef CONFIG_KEXEC
    + machine_ops.crash_shutdown = kvm_crash_shutdown;
    +#endif
    clocksource_register(&kvm_clock);
    }
    }
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. [PATCH 22/35] x86: KVM guest: hypercall based pte updates and TLB flushes

    From: Marcelo Tosatti

    Hypercall based pte updates are faster than faults, and also allow use
    of the lazy MMU mode to batch operations.

    Don't report the feature if two dimensional paging is enabled.

    [avi:
    - guest/host split
    - fix 32-bit truncation issues
    - adjust to mmu_op]

    Signed-off-by: Marcelo Tosatti
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kernel/kvm.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++
    1 files changed, 137 insertions(+), 0 deletions(-)

    diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
    index a8e36da..1bb6e97 100644
    --- a/arch/x86/kernel/kvm.c
    +++ b/arch/x86/kernel/kvm.c
    @@ -25,6 +25,7 @@
    #include
    #include
    #include
    +#include

    /*
    * No need for any "IO delay" on KVM
    @@ -33,6 +34,122 @@ static void kvm_io_delay(void)
    {
    }

    +static void kvm_mmu_op(void *buffer, unsigned len)
    +{
    + int r;
    + unsigned long a1, a2;
    +
    + do {
    + a1 = __pa(buffer);
    + a2 = 0; /* on i386 __pa() always returns <4G */
    + r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
    + buffer += r;
    + len -= r;
    + } while (len);
    +}
    +
    +static void kvm_mmu_write(void *dest, u64 val)
    +{
    + __u64 pte_phys;
    + struct kvm_mmu_op_write_pte wpte;
    +
    +#ifdef CONFIG_HIGHPTE
    + struct page *page;
    + unsigned long dst = (unsigned long) dest;
    +
    + page = kmap_atomic_to_page(dest);
    + pte_phys = page_to_pfn(page);
    + pte_phys <<= PAGE_SHIFT;
    + pte_phys += (dst & ~(PAGE_MASK));
    +#else
    + pte_phys = (unsigned long)__pa(dest);
    +#endif
    + wpte.header.op = KVM_MMU_OP_WRITE_PTE;
    + wpte.pte_val = val;
    + wpte.pte_phys = pte_phys;
    +
    + kvm_mmu_op(&wpte, sizeof wpte);
    +}
    +
    +/*
    + * We only need to hook operations that are MMU writes. We hook these so that
    + * we can use lazy MMU mode to batch these operations. We could probably
    + * improve the performance of the host code if we used some of the information
    + * here to simplify processing of batched writes.
    + */
    +static void kvm_set_pte(pte_t *ptep, pte_t pte)
    +{
    + kvm_mmu_write(ptep, pte_val(pte));
    +}
    +
    +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
    + pte_t *ptep, pte_t pte)
    +{
    + kvm_mmu_write(ptep, pte_val(pte));
    +}
    +
    +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
    +{
    + kvm_mmu_write(pmdp, pmd_val(pmd));
    +}
    +
    +#if PAGETABLE_LEVELS >= 3
    +#ifdef CONFIG_X86_PAE
    +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
    +{
    + kvm_mmu_write(ptep, pte_val(pte));
    +}
    +
    +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
    + pte_t *ptep, pte_t pte)
    +{
    + kvm_mmu_write(ptep, pte_val(pte));
    +}
    +
    +static void kvm_pte_clear(struct mm_struct *mm,
    + unsigned long addr, pte_t *ptep)
    +{
    + kvm_mmu_write(ptep, 0);
    +}
    +
    +static void kvm_pmd_clear(pmd_t *pmdp)
    +{
    + kvm_mmu_write(pmdp, 0);
    +}
    +#endif
    +
    +static void kvm_set_pud(pud_t *pudp, pud_t pud)
    +{
    + kvm_mmu_write(pudp, pud_val(pud));
    +}
    +
    +#if PAGETABLE_LEVELS == 4
    +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
    +{
    + kvm_mmu_write(pgdp, pgd_val(pgd));
    +}
    +#endif
    +#endif /* PAGETABLE_LEVELS >= 3 */
    +
    +static void kvm_flush_tlb(void)
    +{
    + struct kvm_mmu_op_flush_tlb ftlb = {
    + .header.op = KVM_MMU_OP_FLUSH_TLB,
    + };
    +
    + kvm_mmu_op(&ftlb, sizeof ftlb);
    +}
    +
    +static void kvm_release_pt(u32 pfn)
    +{
    + struct kvm_mmu_op_release_pt rpt = {
    + .header.op = KVM_MMU_OP_RELEASE_PT,
    + .pt_phys = (u64)pfn << PAGE_SHIFT,
    + };
    +
    + kvm_mmu_op(&rpt, sizeof rpt);
    +}
    +
    static void paravirt_ops_setup(void)
    {
    pv_info.name = "KVM";
    @@ -41,6 +158,26 @@ static void paravirt_ops_setup(void)
    if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
    pv_cpu_ops.io_delay = kvm_io_delay;

    + if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
    + pv_mmu_ops.set_pte = kvm_set_pte;
    + pv_mmu_ops.set_pte_at = kvm_set_pte_at;
    + pv_mmu_ops.set_pmd = kvm_set_pmd;
    +#if PAGETABLE_LEVELS >= 3
    +#ifdef CONFIG_X86_PAE
    + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
    + pv_mmu_ops.set_pte_present = kvm_set_pte_present;
    + pv_mmu_ops.pte_clear = kvm_pte_clear;
    + pv_mmu_ops.pmd_clear = kvm_pmd_clear;
    +#endif
    + pv_mmu_ops.set_pud = kvm_set_pud;
    +#if PAGETABLE_LEVELS == 4
    + pv_mmu_ops.set_pgd = kvm_set_pgd;
    +#endif
    +#endif
    + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
    + pv_mmu_ops.release_pt = kvm_release_pt;
    + pv_mmu_ops.release_pd = kvm_release_pt;
    + }
    }

    void __init kvm_guest_init(void)
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. [PATCH 16/35] KVM: Add save/restore supporting of in kernel PIT

    From: Sheng Yang

    Signed-off-by: Sheng Yang
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/i8254.c | 7 +++++++
    arch/x86/kvm/i8254.h | 1 +
    arch/x86/kvm/x86.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
    include/asm-x86/kvm.h | 21 +++++++++++++++++++++
    include/linux/kvm.h | 2 ++
    5 files changed, 79 insertions(+), 0 deletions(-)

    diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
    index 1031901..7776f50 100644
    --- a/arch/x86/kvm/i8254.c
    +++ b/arch/x86/kvm/i8254.c
    @@ -286,6 +286,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
    }
    }

    +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
    +{
    + mutex_lock(&kvm->arch.vpit->pit_state.lock);
    + pit_load_count(kvm, channel, val);
    + mutex_unlock(&kvm->arch.vpit->pit_state.lock);
    +}
    +
    static void pit_ioport_write(struct kvm_io_device *this,
    gpa_t addr, int len, const void *data)
    {
    diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
    index 38184d5..586bbf0 100644
    --- a/arch/x86/kvm/i8254.h
    +++ b/arch/x86/kvm/i8254.h
    @@ -54,6 +54,7 @@ struct kvm_pit {

    void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
    void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
    +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
    struct kvm_pit *kvm_create_pit(struct kvm *kvm);
    void kvm_free_pit(struct kvm *kvm);

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index c33a457..621a8e3 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -1504,6 +1504,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    return r;
    }

    +static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
    +{
    + int r = 0;
    +
    + memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
    + return r;
    +}
    +
    +static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
    +{
    + int r = 0;
    +
    + memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
    + kvm_pit_load_count(kvm, 0, ps->channels[0].count);
    + return r;
    +}
    +
    /*
    * Get (and clear) the dirty memory log for a memory slot.
    */
    @@ -1657,6 +1674,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
    r = 0;
    break;
    }
    + case KVM_GET_PIT: {
    + struct kvm_pit_state ps;
    + r = -EFAULT;
    + if (copy_from_user(&ps, argp, sizeof ps))
    + goto out;
    + r = -ENXIO;
    + if (!kvm->arch.vpit)
    + goto out;
    + r = kvm_vm_ioctl_get_pit(kvm, &ps);
    + if (r)
    + goto out;
    + r = -EFAULT;
    + if (copy_to_user(argp, &ps, sizeof ps))
    + goto out;
    + r = 0;
    + break;
    + }
    + case KVM_SET_PIT: {
    + struct kvm_pit_state ps;
    + r = -EFAULT;
    + if (copy_from_user(&ps, argp, sizeof ps))
    + goto out;
    + r = -ENXIO;
    + if (!kvm->arch.vpit)
    + goto out;
    + r = kvm_vm_ioctl_set_pit(kvm, &ps);
    + if (r)
    + goto out;
    + r = 0;
    + break;
    + }
    default:
    ;
    }
    diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
    index 7a71120..12b4b25 100644
    --- a/include/asm-x86/kvm.h
    +++ b/include/asm-x86/kvm.h
    @@ -188,4 +188,25 @@ struct kvm_cpuid2 {
    struct kvm_cpuid_entry2 entries[0];
    };

    +/* for KVM_GET_PIT and KVM_SET_PIT */
    +struct kvm_pit_channel_state {
    + __u32 count; /* can be 65536 */
    + __u16 latched_count;
    + __u8 count_latched;
    + __u8 status_latched;
    + __u8 status;
    + __u8 read_state;
    + __u8 write_state;
    + __u8 write_latch;
    + __u8 rw_mode;
    + __u8 mode;
    + __u8 bcd;
    + __u8 gate;
    + __s64 count_load_time;
    +};
    +
    +struct kvm_pit_state {
    + struct kvm_pit_channel_state channels[3];
    +};
    +
    #endif
    diff --git a/include/linux/kvm.h b/include/linux/kvm.h
    index cefa9a2..a2f3274 100644
    --- a/include/linux/kvm.h
    +++ b/include/linux/kvm.h
    @@ -260,6 +260,8 @@ struct kvm_vapic_addr {
    #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
    #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
    #define KVM_CREATE_PIT _IO(KVMIO, 0x64)
    +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
    +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)

    /*
    * ioctls for vcpu fds
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. [PATCH 15/35] KVM: In kernel PIT model

    From: Sheng Yang

    The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly.

    Signed-off-by: Sheng Yang
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/Makefile | 3 +-
    arch/x86/kvm/i8254.c | 585 ++++++++++++++++++++++++++++++++++++++++++++
    arch/x86/kvm/i8254.h | 60 +++++
    arch/x86/kvm/irq.c | 3 +
    arch/x86/kvm/x86.c | 9 +
    include/asm-x86/kvm_host.h | 1 +
    include/linux/kvm.h | 2 +
    7 files changed, 662 insertions(+), 1 deletions(-)
    create mode 100644 arch/x86/kvm/i8254.c
    create mode 100644 arch/x86/kvm/i8254.h

    diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
    index ffdd0b3..4d0c22e 100644
    --- a/arch/x86/kvm/Makefile
    +++ b/arch/x86/kvm/Makefile
    @@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)

    EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm

    -kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
    +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
    + i8254.o
    obj-$(CONFIG_KVM) += kvm.o
    kvm-intel-objs = vmx.o
    obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
    diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
    new file mode 100644
    index 0000000..1031901
    --- /dev/null
    +++ b/arch/x86/kvm/i8254.c
    @@ -0,0 +1,585 @@
    +/*
    + * 8253/8254 interval timer emulation
    + *
    + * Copyright (c) 2003-2004 Fabrice Bellard
    + * Copyright (c) 2006 Intel Corporation
    + * Copyright (c) 2007 Keir Fraser, XenSource Inc
    + * Copyright (c) 2008 Intel Corporation
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a copy
    + * of this software and associated documentation files (the "Software"), to deal
    + * in the Software without restriction, including without limitation the rights
    + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    + * copies of the Software, and to permit persons to whom the Software is
    + * furnished to do so, subject to the following conditions:
    + *
    + * The above copyright notice and this permission notice shall be included in
    + * all copies or substantial portions of the Software.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    + * THE SOFTWARE.
    + *
    + * Authors:
    + * Sheng Yang
    + * Based on QEMU and Xen.
    + */
    +
    +#include
    +
    +#include "irq.h"
    +#include "i8254.h"
    +
    +#ifndef CONFIG_X86_64
    +#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
    +#else
    +#define mod_64(x, y) ((x) % (y))
    +#endif
    +
    +#define RW_STATE_LSB 1
    +#define RW_STATE_MSB 2
    +#define RW_STATE_WORD0 3
    +#define RW_STATE_WORD1 4
    +
    +/* Compute with 96 bit intermediate result: (a*b)/c */
    +static u64 muldiv64(u64 a, u32 b, u32 c)
    +{
    + union {
    + u64 ll;
    + struct {
    + u32 low, high;
    + } l;
    + } u, res;
    + u64 rl, rh;
    +
    + u.ll = a;
    + rl = (u64)u.l.low * (u64)b;
    + rh = (u64)u.l.high * (u64)b;
    + rh += (rl >> 32);
    + res.l.high = div64_64(rh, c);
    + res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
    + return res.ll;
    +}
    +
    +static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
    +{
    + struct kvm_kpit_channel_state *c =
    + &kvm->arch.vpit->pit_state.channels[channel];
    +
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + switch (c->mode) {
    + default:
    + case 0:
    + case 4:
    + /* XXX: just disable/enable counting */
    + break;
    + case 1:
    + case 2:
    + case 3:
    + case 5:
    + /* Restart counting on rising edge. */
    + if (c->gate < val)
    + c->count_load_time = ktime_get();
    + break;
    + }
    +
    + c->gate = val;
    +}
    +
    +int pit_get_gate(struct kvm *kvm, int channel)
    +{
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + return kvm->arch.vpit->pit_state.channels[channel].gate;
    +}
    +
    +static int pit_get_count(struct kvm *kvm, int channel)
    +{
    + struct kvm_kpit_channel_state *c =
    + &kvm->arch.vpit->pit_state.channels[channel];
    + s64 d, t;
    + int counter;
    +
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
    + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
    +
    + switch (c->mode) {
    + case 0:
    + case 1:
    + case 4:
    + case 5:
    + counter = (c->count - d) & 0xffff;
    + break;
    + case 3:
    + /* XXX: may be incorrect for odd counts */
    + counter = c->count - (mod_64((2 * d), c->count));
    + break;
    + default:
    + counter = c->count - mod_64(d, c->count);
    + break;
    + }
    + return counter;
    +}
    +
    +static int pit_get_out(struct kvm *kvm, int channel)
    +{
    + struct kvm_kpit_channel_state *c =
    + &kvm->arch.vpit->pit_state.channels[channel];
    + s64 d, t;
    + int out;
    +
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
    + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
    +
    + switch (c->mode) {
    + default:
    + case 0:
    + out = (d >= c->count);
    + break;
    + case 1:
    + out = (d < c->count);
    + break;
    + case 2:
    + out = ((mod_64(d, c->count) == 0) && (d != 0));
    + break;
    + case 3:
    + out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
    + break;
    + case 4:
    + case 5:
    + out = (d == c->count);
    + break;
    + }
    +
    + return out;
    +}
    +
    +static void pit_latch_count(struct kvm *kvm, int channel)
    +{
    + struct kvm_kpit_channel_state *c =
    + &kvm->arch.vpit->pit_state.channels[channel];
    +
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + if (!c->count_latched) {
    + c->latched_count = pit_get_count(kvm, channel);
    + c->count_latched = c->rw_mode;
    + }
    +}
    +
    +static void pit_latch_status(struct kvm *kvm, int channel)
    +{
    + struct kvm_kpit_channel_state *c =
    + &kvm->arch.vpit->pit_state.channels[channel];
    +
    + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
    +
    + if (!c->status_latched) {
    + /* TODO: Return NULL COUNT (bit 6). */
    + c->status = ((pit_get_out(kvm, channel) << 7) |
    + (c->rw_mode << 4) |
    + (c->mode << 1) |
    + c->bcd);
    + c->status_latched = 1;
    + }
    +}
    +
    +int __pit_timer_fn(struct kvm_kpit_state *ps)
    +{
    + struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
    + struct kvm_kpit_timer *pt = &ps->pit_timer;
    +
    + atomic_inc(&pt->pending);
    + if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
    + vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
    + wake_up_interruptible(&vcpu0->wq);
    + }
    +
    + pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
    + pt->scheduled = ktime_to_ns(pt->timer.expires);
    +
    + return (pt->period == 0 ? 0 : 1);
    +}
    +
    +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
    +{
    + struct kvm_kpit_state *ps;
    + int restart_timer = 0;
    +
    + ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
    +
    + restart_timer = __pit_timer_fn(ps);
    +
    + if (restart_timer)
    + return HRTIMER_RESTART;
    + else
    + return HRTIMER_NORESTART;
    +}
    +
    +static void destroy_pit_timer(struct kvm_kpit_timer *pt)
    +{
    + pr_debug("pit: execute del timer!\n");
    + hrtimer_cancel(&pt->timer);
    +}
    +
    +static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
    +{
    + s64 interval;
    +
    + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
    +
    + pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
    +
    + /* TODO The new value only affected after the retriggered */
    + hrtimer_cancel(&pt->timer);
    + pt->period = (is_period == 0) ? 0 : interval;
    + pt->timer.function = pit_timer_fn;
    + atomic_set(&pt->pending, 0);
    +
    + hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
    + HRTIMER_MODE_ABS);
    +}
    +
    +static void pit_load_count(struct kvm *kvm, int channel, u32 val)
    +{
    + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
    +
    + WARN_ON(!mutex_is_locked(&ps->lock));
    +
    + pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
    +
    + /*
    + * Though spec said the state of 8254 is undefined after power-up,
    + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
    + * when booting up.
    + * So here setting initialize rate for it, and not a specific number
    + */
    + if (val == 0)
    + val = 0x10000;
    +
    + ps->channels[channel].count_load_time = ktime_get();
    + ps->channels[channel].count = val;
    +
    + if (channel != 0)
    + return;
    +
    + /* Two types of timer
    + * mode 1 is one shot, mode 2 is period, otherwise del timer */
    + switch (ps->channels[0].mode) {
    + case 1:
    + create_pit_timer(&ps->pit_timer, val, 0);
    + break;
    + case 2:
    + create_pit_timer(&ps->pit_timer, val, 1);
    + break;
    + default:
    + destroy_pit_timer(&ps->pit_timer);
    + }
    +}
    +
    +static void pit_ioport_write(struct kvm_io_device *this,
    + gpa_t addr, int len, const void *data)
    +{
    + struct kvm_pit *pit = (struct kvm_pit *)this->private;
    + struct kvm_kpit_state *pit_state = &pit->pit_state;
    + struct kvm *kvm = pit->kvm;
    + int channel, access;
    + struct kvm_kpit_channel_state *s;
    + u32 val = *(u32 *) data;
    +
    + val &= 0xff;
    + addr &= KVM_PIT_CHANNEL_MASK;
    +
    + mutex_lock(&pit_state->lock);
    +
    + if (val != 0)
    + pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
    + (unsigned int)addr, len, val);
    +
    + if (addr == 3) {
    + channel = val >> 6;
    + if (channel == 3) {
    + /* Read-Back Command. */
    + for (channel = 0; channel < 3; channel++) {
    + s = &pit_state->channels[channel];
    + if (val & (2 << channel)) {
    + if (!(val & 0x20))
    + pit_latch_count(kvm, channel);
    + if (!(val & 0x10))
    + pit_latch_status(kvm, channel);
    + }
    + }
    + } else {
    + /* Select Counter . */
    + s = &pit_state->channels[channel];
    + access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
    + if (access == 0) {
    + pit_latch_count(kvm, channel);
    + } else {
    + s->rw_mode = access;
    + s->read_state = access;
    + s->write_state = access;
    + s->mode = (val >> 1) & 7;
    + if (s->mode > 5)
    + s->mode -= 4;
    + s->bcd = val & 1;
    + }
    + }
    + } else {
    + /* Write Count. */
    + s = &pit_state->channels[addr];
    + switch (s->write_state) {
    + default:
    + case RW_STATE_LSB:
    + pit_load_count(kvm, addr, val);
    + break;
    + case RW_STATE_MSB:
    + pit_load_count(kvm, addr, val << 8);
    + break;
    + case RW_STATE_WORD0:
    + s->write_latch = val;
    + s->write_state = RW_STATE_WORD1;
    + break;
    + case RW_STATE_WORD1:
    + pit_load_count(kvm, addr, s->write_latch | (val << 8));
    + s->write_state = RW_STATE_WORD0;
    + break;
    + }
    + }
    +
    + mutex_unlock(&pit_state->lock);
    +}
    +
    +static void pit_ioport_read(struct kvm_io_device *this,
    + gpa_t addr, int len, void *data)
    +{
    + struct kvm_pit *pit = (struct kvm_pit *)this->private;
    + struct kvm_kpit_state *pit_state = &pit->pit_state;
    + struct kvm *kvm = pit->kvm;
    + int ret, count;
    + struct kvm_kpit_channel_state *s;
    +
    + addr &= KVM_PIT_CHANNEL_MASK;
    + s = &pit_state->channels[addr];
    +
    + mutex_lock(&pit_state->lock);
    +
    + if (s->status_latched) {
    + s->status_latched = 0;
    + ret = s->status;
    + } else if (s->count_latched) {
    + switch (s->count_latched) {
    + default:
    + case RW_STATE_LSB:
    + ret = s->latched_count & 0xff;
    + s->count_latched = 0;
    + break;
    + case RW_STATE_MSB:
    + ret = s->latched_count >> 8;
    + s->count_latched = 0;
    + break;
    + case RW_STATE_WORD0:
    + ret = s->latched_count & 0xff;
    + s->count_latched = RW_STATE_MSB;
    + break;
    + }
    + } else {
    + switch (s->read_state) {
    + default:
    + case RW_STATE_LSB:
    + count = pit_get_count(kvm, addr);
    + ret = count & 0xff;
    + break;
    + case RW_STATE_MSB:
    + count = pit_get_count(kvm, addr);
    + ret = (count >> 8) & 0xff;
    + break;
    + case RW_STATE_WORD0:
    + count = pit_get_count(kvm, addr);
    + ret = count & 0xff;
    + s->read_state = RW_STATE_WORD1;
    + break;
    + case RW_STATE_WORD1:
    + count = pit_get_count(kvm, addr);
    + ret = (count >> 8) & 0xff;
    + s->read_state = RW_STATE_WORD0;
    + break;
    + }
    + }
    +
    + if (len > sizeof(ret))
    + len = sizeof(ret);
    + memcpy(data, (char *)&ret, len);
    +
    + mutex_unlock(&pit_state->lock);
    +}
    +
    +static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
    +{
    + return ((addr >= KVM_PIT_BASE_ADDRESS) &&
    + (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
    +}
    +
    +static void speaker_ioport_write(struct kvm_io_device *this,
    + gpa_t addr, int len, const void *data)
    +{
    + struct kvm_pit *pit = (struct kvm_pit *)this->private;
    + struct kvm_kpit_state *pit_state = &pit->pit_state;
    + struct kvm *kvm = pit->kvm;
    + u32 val = *(u32 *) data;
    +
    + mutex_lock(&pit_state->lock);
    + pit_state->speaker_data_on = (val >> 1) & 1;
    + pit_set_gate(kvm, 2, val & 1);
    + mutex_unlock(&pit_state->lock);
    +}
    +
    +static void speaker_ioport_read(struct kvm_io_device *this,
    + gpa_t addr, int len, void *data)
    +{
    + struct kvm_pit *pit = (struct kvm_pit *)this->private;
    + struct kvm_kpit_state *pit_state = &pit->pit_state;
    + struct kvm *kvm = pit->kvm;
    + unsigned int refresh_clock;
    + int ret;
    +
    + /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
    + refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
    +
    + mutex_lock(&pit_state->lock);
    + ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
    + (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
    + if (len > sizeof(ret))
    + len = sizeof(ret);
    + memcpy(data, (char *)&ret, len);
    + mutex_unlock(&pit_state->lock);
    +}
    +
    +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
    +{
    + return (addr == KVM_SPEAKER_BASE_ADDRESS);
    +}
    +
    +struct kvm_pit *kvm_create_pit(struct kvm *kvm)
    +{
    + int i;
    + struct kvm_pit *pit;
    + struct kvm_kpit_state *pit_state;
    + struct kvm_kpit_channel_state *c;
    +
    + pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
    + if (!pit)
    + return NULL;
    +
    + mutex_init(&pit->pit_state.lock);
    + mutex_lock(&pit->pit_state.lock);
    +
    + /* Initialize PIO device */
    + pit->dev.read = pit_ioport_read;
    + pit->dev.write = pit_ioport_write;
    + pit->dev.in_range = pit_in_range;
    + pit->dev.private = pit;
    + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
    +
    + pit->speaker_dev.read = speaker_ioport_read;
    + pit->speaker_dev.write = speaker_ioport_write;
    + pit->speaker_dev.in_range = speaker_in_range;
    + pit->speaker_dev.private = pit;
    + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
    +
    + kvm->arch.vpit = pit;
    + pit->kvm = kvm;
    +
    + pit_state = &pit->pit_state;
    + pit_state->pit = pit;
    + hrtimer_init(&pit_state->pit_timer.timer,
    + CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
    + atomic_set(&pit_state->pit_timer.pending, 0);
    + for (i = 0; i < 3; i++) {
    + c = &pit_state->channels[i];
    + c->mode = 0xff;
    + c->gate = (i != 2);
    + pit_load_count(kvm, i, 0);
    + }
    +
    + mutex_unlock(&pit->pit_state.lock);
    +
    + pit->pit_state.inject_pending = 1;
    +
    + return pit;
    +}
    +
    +void kvm_free_pit(struct kvm *kvm)
    +{
    + struct hrtimer *timer;
    +
    + if (kvm->arch.vpit) {
    + mutex_lock(&kvm->arch.vpit->pit_state.lock);
    + timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
    + hrtimer_cancel(timer);
    + mutex_unlock(&kvm->arch.vpit->pit_state.lock);
    + kfree(kvm->arch.vpit);
    + }
    +}
    +
    +void __inject_pit_timer_intr(struct kvm *kvm)
    +{
    + mutex_lock(&kvm->lock);
    + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
    + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
    + kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
    + kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
    + mutex_unlock(&kvm->lock);
    +}
    +
    +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
    +{
    + struct kvm_pit *pit = vcpu->kvm->arch.vpit;
    + struct kvm *kvm = vcpu->kvm;
    + struct kvm_kpit_state *ps;
    + static unsigned long last_injected_time;
    +
    + if (vcpu && pit) {
    + ps = &pit->pit_state;
    +
    + /* Try to inject pending interrupts when:
    + * 1. Pending exists
    + * 2. Last interrupt was accepted or waited for too long time*/
    + if (atomic_read(&ps->pit_timer.pending) &&
    + (ps->inject_pending ||
    + (jiffies - last_injected_time
    + >= KVM_MAX_PIT_INTR_INTERVAL))) {
    + ps->inject_pending = 0;
    + __inject_pit_timer_intr(kvm);
    + last_injected_time = jiffies;
    + }
    + }
    +}
    +
    +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
    +{
    + struct kvm_arch *arch = &vcpu->kvm->arch;
    + struct kvm_kpit_state *ps;
    +
    + if (vcpu && arch->vpit) {
    + ps = &arch->vpit->pit_state;
    + if (atomic_read(&ps->pit_timer.pending) &&
    + (((arch->vpic->pics[0].imr & 1) == 0 &&
    + arch->vpic->pics[0].irq_base == vec) ||
    + (arch->vioapic->redirtbl[0].fields.vector == vec &&
    + arch->vioapic->redirtbl[0].fields.mask != 1))) {
    + ps->inject_pending = 1;
    + atomic_dec(&ps->pit_timer.pending);
    + ps->channels[0].count_load_time = ktime_get();
    + }
    + }
    +}
    diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
    new file mode 100644
    index 0000000..38184d5
    --- /dev/null
    +++ b/arch/x86/kvm/i8254.h
    @@ -0,0 +1,60 @@
    +#ifndef __I8254_H
    +#define __I8254_H
    +
    +#include "iodev.h"
    +
    +struct kvm_kpit_timer {
    + struct hrtimer timer;
    + int irq;
    + s64 period; /* unit: ns */
    + s64 scheduled;
    + ktime_t last_update;
    + atomic_t pending;
    +};
    +
    +struct kvm_kpit_channel_state {
    + u32 count; /* can be 65536 */
    + u16 latched_count;
    + u8 count_latched;
    + u8 status_latched;
    + u8 status;
    + u8 read_state;
    + u8 write_state;
    + u8 write_latch;
    + u8 rw_mode;
    + u8 mode;
    + u8 bcd; /* not supported */
    + u8 gate; /* timer start */
    + ktime_t count_load_time;
    +};
    +
    +struct kvm_kpit_state {
    + struct kvm_kpit_channel_state channels[3];
    + struct kvm_kpit_timer pit_timer;
    + u32 speaker_data_on;
    + struct mutex lock;
    + struct kvm_pit *pit;
    + bool inject_pending; /* if inject pending interrupts */
    +};
    +
    +struct kvm_pit {
    + unsigned long base_addresss;
    + struct kvm_io_device dev;
    + struct kvm_io_device speaker_dev;
    + struct kvm *kvm;
    + struct kvm_kpit_state pit_state;
    +};
    +
    +#define KVM_PIT_BASE_ADDRESS 0x40
    +#define KVM_SPEAKER_BASE_ADDRESS 0x61
    +#define KVM_PIT_MEM_LENGTH 4
    +#define KVM_PIT_FREQ 1193181
    +#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
    +#define KVM_PIT_CHANNEL_MASK 0x3
    +
    +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
    +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
    +struct kvm_pit *kvm_create_pit(struct kvm *kvm);
    +void kvm_free_pit(struct kvm *kvm);
    +
    +#endif
    diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
    index e571475..dbfe21c 100644
    --- a/arch/x86/kvm/irq.c
    +++ b/arch/x86/kvm/irq.c
    @@ -23,6 +23,7 @@
    #include

    #include "irq.h"
    +#include "i8254.h"

    /*
    * check if there is pending interrupt without
    @@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
    void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
    {
    kvm_inject_apic_timer_irqs(vcpu);
    + kvm_inject_pit_timer_irqs(vcpu);
    /* TODO: PIT, RTC etc. */
    }
    EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
    @@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
    void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
    {
    kvm_apic_timer_intr_post(vcpu, vec);
    + kvm_pit_timer_intr_post(vcpu, vec);
    /* TODO: PIT, RTC etc. */
    }
    EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index bf78d65..c33a457 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -17,6 +17,7 @@
    #include
    #include "irq.h"
    #include "mmu.h"
    +#include "i8254.h"

    #include
    #include
    @@ -818,6 +819,7 @@ int kvm_dev_ioctl_check_extension(long ext)
    case KVM_CAP_SET_TSS_ADDR:
    case KVM_CAP_EXT_CPUID:
    case KVM_CAP_CLOCKSOURCE:
    + case KVM_CAP_PIT:
    r = 1;
    break;
    case KVM_CAP_VAPIC:
    @@ -1594,6 +1596,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
    } else
    goto out;
    break;
    + case KVM_CREATE_PIT:
    + r = -ENOMEM;
    + kvm->arch.vpit = kvm_create_pit(kvm);
    + if (kvm->arch.vpit)
    + r = 0;
    + break;
    case KVM_IRQ_LINE: {
    struct kvm_irq_level irq_event;

    @@ -3372,6 +3380,7 @@ static void kvm_free_vcpus(struct kvm *kvm)

    void kvm_arch_destroy_vm(struct kvm *kvm)
    {
    + kvm_free_pit(kvm);
    kfree(kvm->arch.vpic);
    kfree(kvm->arch.vioapic);
    kvm_free_vcpus(kvm);
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 024b57c..12932bb 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -297,6 +297,7 @@ struct kvm_arch{
    struct list_head active_mmu_pages;
    struct kvm_pic *vpic;
    struct kvm_ioapic *vioapic;
    + struct kvm_pit *vpit;

    int round_robin_prev_vcpu;
    unsigned int tss_addr;
    diff --git a/include/linux/kvm.h b/include/linux/kvm.h
    index e92e703..cefa9a2 100644
    --- a/include/linux/kvm.h
    +++ b/include/linux/kvm.h
    @@ -236,6 +236,7 @@ struct kvm_vapic_addr {
    #define KVM_CAP_CLOCKSOURCE 8
    #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
    #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
    +#define KVM_CAP_PIT 11

    /*
    * ioctls for VM fds
    @@ -258,6 +259,7 @@ struct kvm_vapic_addr {
    #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
    #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
    #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
    +#define KVM_CREATE_PIT _IO(KVMIO, 0x64)

    /*
    * ioctls for vcpu fds
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. [PATCH 34/35] KVM: x86: add functions to get the cpl of vcpu

    From: Izik Eidus

    Signed-off-by: Izik Eidus
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/svm.c | 8 ++++++++
    arch/x86/kvm/vmx.c | 15 +++++++++++++++
    include/asm-x86/kvm_host.h | 1 +
    3 files changed, 24 insertions(+), 0 deletions(-)

    diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
    index 51741f9..c1c1b97 100644
    --- a/arch/x86/kvm/svm.c
    +++ b/arch/x86/kvm/svm.c
    @@ -792,6 +792,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
    var->unusable = !var->present;
    }

    +static int svm_get_cpl(struct kvm_vcpu *vcpu)
    +{
    + struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
    +
    + return save->cpl;
    +}
    +
    static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
    {
    struct vcpu_svm *svm = to_svm(vcpu);
    @@ -1822,6 +1829,7 @@ static struct kvm_x86_ops svm_x86_ops = {
    .get_segment_base = svm_get_segment_base,
    .get_segment = svm_get_segment,
    .set_segment = svm_set_segment,
    + .get_cpl = svm_get_cpl,
    .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
    .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
    .set_cr0 = svm_set_cr0,
    diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
    index 0155931..9b56032 100644
    --- a/arch/x86/kvm/vmx.c
    +++ b/arch/x86/kvm/vmx.c
    @@ -1395,6 +1395,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
    var->unusable = (ar >> 16) & 1;
    }

    +static int vmx_get_cpl(struct kvm_vcpu *vcpu)
    +{
    + struct kvm_segment kvm_seg;
    +
    + if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
    + return 0;
    +
    + if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
    + return 3;
    +
    + vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
    + return kvm_seg.selector & 3;
    +}
    +
    static u32 vmx_segment_access_rights(struct kvm_segment *var)
    {
    u32 ar;
    @@ -2665,6 +2679,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
    .get_segment_base = vmx_get_segment_base,
    .get_segment = vmx_get_segment,
    .set_segment = vmx_set_segment,
    + .get_cpl = vmx_get_cpl,
    .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
    .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
    .set_cr0 = vmx_set_cr0,
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 2773f91..06bd154 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -387,6 +387,7 @@ struct kvm_x86_ops {
    u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
    void (*get_segment)(struct kvm_vcpu *vcpu,
    struct kvm_segment *var, int seg);
    + int (*get_cpl)(struct kvm_vcpu *vcpu);
    void (*set_segment)(struct kvm_vcpu *vcpu,
    struct kvm_segment *var, int seg);
    void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. [PATCH 24/35] x86: allow machine_crash_shutdown to be replaced

    From: Glauber Costa

    This patch a llows machine_crash_shutdown to
    be replaced, just like any of the other functions
    in machine_ops

    Signed-off-by: Glauber Costa
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kernel/crash.c | 3 ++-
    arch/x86/kernel/reboot.c | 11 ++++++++++-
    include/asm-x86/reboot.h | 1 +
    3 files changed, 13 insertions(+), 2 deletions(-)

    diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
    index 9a5fa0a..d262306 100644
    --- a/arch/x86/kernel/crash.c
    +++ b/arch/x86/kernel/crash.c
    @@ -25,6 +25,7 @@
    #include
    #include
    #include
    +#include

    #ifdef CONFIG_X86_32
    #include
    @@ -121,7 +122,7 @@ static void nmi_shootdown_cpus(void)
    }
    #endif

    -void machine_crash_shutdown(struct pt_regs *regs)
    +void native_machine_crash_shutdown(struct pt_regs *regs)
    {
    /* This function is only called after the system
    * has panicked or is otherwise in a critical state.
    diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
    index 484c4a8..708d6f8 100644
    --- a/arch/x86/kernel/reboot.c
    +++ b/arch/x86/kernel/reboot.c
    @@ -471,7 +471,10 @@ struct machine_ops machine_ops = {
    .shutdown = native_machine_shutdown,
    .emergency_restart = native_machine_emergency_restart,
    .restart = native_machine_restart,
    - .halt = native_machine_halt
    + .halt = native_machine_halt,
    +#ifdef CONFIG_KEXEC
    + .crash_shutdown = native_machine_crash_shutdown,
    +#endif
    };

    void machine_power_off(void)
    @@ -499,3 +502,9 @@ void machine_halt(void)
    machine_ops.halt();
    }

    +#ifdef CONFIG_KEXEC
    +void machine_crash_shutdown(struct pt_regs *regs)
    +{
    + machine_ops.crash_shutdown(regs);
    +}
    +#endif
    diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
    index e9e3ffc..ff9b546 100644
    --- a/include/asm-x86/reboot.h
    +++ b/include/asm-x86/reboot.h
    @@ -16,5 +16,6 @@ struct machine_ops
    extern struct machine_ops machine_ops;

    void machine_real_restart(unsigned char *code, int length);
    +void native_machine_crash_shutdown(struct pt_regs *regs);

    #endif /* _ASM_REBOOT_H */
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. [PATCH 25/35] x86: make native_machine_shutdown non-static

    From: Glauber Costa

    it will allow external users to call it. It is mainly
    useful for routines that will override its machine_ops
    field for its own special purposes, but want to call the
    normal shutdown routine after they're done

    Signed-off-by: Glauber Costa
    Signed-off-by: Avi Kivity
    ---
    arch/x86/kernel/reboot.c | 2 +-
    include/asm-x86/reboot.h | 1 +
    2 files changed, 2 insertions(+), 1 deletions(-)

    diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
    index 708d6f8..1481d85 100644
    --- a/arch/x86/kernel/reboot.c
    +++ b/arch/x86/kernel/reboot.c
    @@ -400,7 +400,7 @@ static void native_machine_emergency_restart(void)
    }
    }

    -static void native_machine_shutdown(void)
    +void native_machine_shutdown(void)
    {
    /* Stop the cpus and apics */
    #ifdef CONFIG_SMP
    diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
    index ff9b546..c5e8722 100644
    --- a/include/asm-x86/reboot.h
    +++ b/include/asm-x86/reboot.h
    @@ -17,5 +17,6 @@ extern struct machine_ops machine_ops;

    void machine_real_restart(unsigned char *code, int length);
    void native_machine_crash_shutdown(struct pt_regs *regs);
    +void native_machine_shutdown(void);

    #endif /* _ASM_REBOOT_H */
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. [PATCH 20/35] KVM: Provide unlocked version of emulator_write_phys()

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/x86.c | 21 ++++++++++++++-------
    include/asm-x86/kvm_host.h | 3 +++
    2 files changed, 17 insertions(+), 7 deletions(-)

    diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
    index 1b9e695..03ba402 100644
    --- a/arch/x86/kvm/x86.c
    +++ b/arch/x86/kvm/x86.c
    @@ -1840,22 +1840,29 @@ mmio:
    return X86EMUL_UNHANDLEABLE;
    }

    -static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    - const void *val, int bytes)
    +int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    + const void *val, int bytes)
    {
    int ret;

    - down_read(&vcpu->kvm->slots_lock);
    ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
    - if (ret < 0) {
    - up_read(&vcpu->kvm->slots_lock);
    + if (ret < 0)
    return 0;
    - }
    kvm_mmu_pte_write(vcpu, gpa, val, bytes);
    - up_read(&vcpu->kvm->slots_lock);
    return 1;
    }

    +static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    + const void *val, int bytes)
    +{
    + int ret;
    +
    + down_read(&vcpu->kvm->slots_lock);
    + ret =__emulator_write_phys(vcpu, gpa, val, bytes);
    + up_read(&vcpu->kvm->slots_lock);
    + return ret;
    +}
    +
    static int emulator_write_emulated_onepage(unsigned long addr,
    const void *val,
    unsigned int bytes,
    diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
    index 12932bb..c8e51f8 100644
    --- a/include/asm-x86/kvm_host.h
    +++ b/include/asm-x86/kvm_host.h
    @@ -431,6 +431,9 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);

    int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);

    +int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    + const void *val, int bytes);
    +
    enum emulation_result {
    EMULATE_DONE, /* no further processing */
    EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [PATCH 28/35] KVM: MMU: Set the accessed bit on non-speculative shadow ptes

    If we populate a shadow pte due to a fault (and not speculatively due to a
    pte write) then we can set the accessed bit on it, as we know it will be
    set immediately on the next guest instruction. This saves a read-modify-write
    operation.

    Signed-off-by: Avi Kivity
    ---
    arch/x86/kvm/mmu.c | 8 +++++---
    arch/x86/kvm/paging_tmpl.h | 4 ++--
    2 files changed, 7 insertions(+), 5 deletions(-)

    diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
    index 072e942..a5872b3 100644
    --- a/arch/x86/kvm/mmu.c
    +++ b/arch/x86/kvm/mmu.c
    @@ -1020,7 +1020,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    unsigned pt_access, unsigned pte_access,
    int user_fault, int write_fault, int dirty,
    int *ptwrite, int largepage, gfn_t gfn,
    - struct page *page)
    + struct page *page, bool speculative)
    {
    u64 spte;
    int was_rmapped = 0;
    @@ -1061,6 +1061,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
    * demand paging).
    */
    spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
    + if (!speculative)
    + pte_access |= PT_ACCESSED_MASK;
    if (!dirty)
    pte_access &= ~ACC_WRITE_MASK;
    if (!(pte_access & ACC_EXEC_MASK))
    @@ -1148,13 +1150,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,

    if (level == 1) {
    mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
    - 0, write, 1, &pt_write, 0, gfn, page);
    + 0, write, 1, &pt_write, 0, gfn, page, false);
    return pt_write;
    }

    if (largepage && level == 2) {
    mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
    - 0, write, 1, &pt_write, 1, gfn, page);
    + 0, write, 1, &pt_write, 1, gfn, page, false);
    return pt_write;
    }

    diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
    index 57abbd0..e9ae5db 100644
    --- a/arch/x86/kvm/paging_tmpl.h
    +++ b/arch/x86/kvm/paging_tmpl.h
    @@ -266,7 +266,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
    get_page(npage);
    mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
    gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
    - npage);
    + npage, true);
    }

    /*
    @@ -349,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
    mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
    user_fault, write_fault,
    walker->ptes[walker->level-1] & PT_DIRTY_MASK,
    - ptwrite, largepage, walker->gfn, page);
    + ptwrite, largepage, walker->gfn, page, false);

    return shadow_ent;
    }
    --
    1.5.4.5

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 2 of 2 FirstFirst 1 2