[PATCH v2 0/4] I/OAT: watchdog/reset, tcp_dma_copybreak and I/OAT ver.3 support - Kernel

This is a discussion on [PATCH v2 0/4] I/OAT: watchdog/reset, tcp_dma_copybreak and I/OAT ver.3 support - Kernel ; This is version 2 of ioatdma patchset which has been updated after first review comments. The first three patches are a result of ioatdma sustaining and performance tuning efforts for both I/OAT versions 1.2 and 2.0. The fourth patch is ...

+ Reply to Thread
Results 1 to 6 of 6

Thread: [PATCH v2 0/4] I/OAT: watchdog/reset, tcp_dma_copybreak and I/OAT ver.3 support

  1. [PATCH v2 0/4] I/OAT: watchdog/reset, tcp_dma_copybreak and I/OAT ver.3 support

    This is version 2 of ioatdma patchset
    which has been updated after first review comments.
    The first three patches are a result of
    ioatdma sustaining and performance tuning efforts
    for both I/OAT versions 1.2 and 2.0.
    The fourth patch is a new patch in this set.
    It adds I/OAT version 3 support to ioatdma driver.

    The main differences between v1 and v2 of this patchset are:
    * "Increase sleep time in ioat_dma_self_test" change
    has been separated from
    "Add watchdog/reset functionality to ioatdma driver" patch
    * tcp_dma_copybreak default value setting has been moved
    from dmaengine to ioatdma driver
    * "I/OAT version 3.0 support" patch has been added
    * checkpatch problems have been resolved

    This patches apply to kernel 2.6.26-rc9.

    Maciej Sosnowski (4):
    I/OAT: Add watchdog/reset functionality to ioatdma driver
    I/OAT: Increase sleep time in ioat_dma_self_test
    I/OAT: tcp_dma_copybreak default value dependant on I/OAT version
    I/OAT: I/OAT version 3.0 support

    drivers/dca/dca-core.c | 131 +++++++--
    drivers/dca/dca-sysfs.c | 3
    drivers/dma/ioat.c | 15 +
    drivers/dma/ioat_dca.c | 244 ++++++++++++++++++
    drivers/dma/ioat_dma.c | 395 ++++++++++++++++++++++++++++--
    drivers/dma/ioatdma.h | 14 -
    drivers/dma/ioatdma_hw.h | 1
    drivers/dma/ioatdma_registers.h | 20 +
    include/linux/dca.h | 7
    include/linux/pci_ids.h | 8
    net/core/user_dma.c | 1
    11 files changed, 783 insertions(+), 56 deletions(-)

    --
    Maciej
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH v2 1/4] I/OAT: Add watchdog/reset functionality to ioatdma driver

    Due to occasional DMA channel hangs observed for I/OAT versions 1.2 and 2.0
    a watchdog has been introduced to check every 2 seconds
    if all channels progress normally.
    If stuck channel is detected, driver resets it.
    The reset is done in two parts. The second part is scheduled
    by the first one to reinitialize the channel after the restart.

    Signed-off-by: Maciej Sosnowski
    ---

    drivers/dma/ioat_dma.c | 267 +++++++++++++++++++++++++++++++++++++++++++++++-
    drivers/dma/ioatdma.h | 10 ++
    2 files changed, 271 insertions(+), 6 deletions(-)

    diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
    index 318e8a2..f1d6382 100644
    --- a/drivers/dma/ioat_dma.c
    +++ b/drivers/dma/ioat_dma.c
    @@ -32,6 +32,7 @@ #include
    #include
    #include
    #include
    +#include
    #include "ioatdma.h"
    #include "ioatdma_registers.h"
    #include "ioatdma_hw.h"
    @@ -41,11 +42,17 @@ #define to_ioatdma_device(dev) container
    #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
    #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)

    +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
    static int ioat_pending_level = 4;
    module_param(ioat_pending_level, int, 0644);
    MODULE_PARM_DESC(ioat_pending_level,
    "high-water mark for pushing ioat descriptors (default: 4)");

    +#define RESET_DELAY msecs_to_jiffies(100)
    +#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
    +static void ioat_dma_chan_reset_part2(struct work_struct *work);
    +static void ioat_dma_chan_watchdog(struct work_struct *work);
    +
    /* internal functions */
    static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
    static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
    @@ -137,6 +144,7 @@ static int ioat_dma_enumerate_channels(s
    ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
    ioat_chan->xfercap = xfercap;
    ioat_chan->desccount = 0;
    + INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
    if (ioat_chan->device->version != IOAT_VER_1_2) {
    writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
    | IOAT_DMA_DCA_ANY_CPU,
    @@ -175,7 +183,7 @@ static void ioat1_dma_memcpy_issue_pendi
    {
    struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);

    - if (ioat_chan->pending != 0) {
    + if (ioat_chan->pending > 0) {
    spin_lock_bh(&ioat_chan->desc_lock);
    __ioat1_dma_memcpy_issue_pending(ioat_chan);
    spin_unlock_bh(&ioat_chan->desc_lock);
    @@ -194,13 +202,228 @@ static void ioat2_dma_memcpy_issue_pendi
    {
    struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);

    - if (ioat_chan->pending != 0) {
    + if (ioat_chan->pending > 0) {
    spin_lock_bh(&ioat_chan->desc_lock);
    __ioat2_dma_memcpy_issue_pending(ioat_chan);
    spin_unlock_bh(&ioat_chan->desc_lock);
    }
    }

    +
    +/**
    + * ioat_dma_chan_reset_part2 - reinit the channel after a reset
    + */
    +static void ioat_dma_chan_reset_part2(struct work_struct *work)
    +{
    + struct ioat_dma_chan *ioat_chan =
    + container_of(work, struct ioat_dma_chan, work.work);
    + struct ioat_desc_sw *desc;
    +
    + spin_lock_bh(&ioat_chan->cleanup_lock);
    + spin_lock_bh(&ioat_chan->desc_lock);
    +
    + ioat_chan->completion_virt->low = 0;
    + ioat_chan->completion_virt->high = 0;
    + ioat_chan->pending = 0;
    +
    + /*
    + * count the descriptors waiting, and be sure to do it
    + * right for both the CB1 line and the CB2 ring
    + */
    + ioat_chan->dmacount = 0;
    + if (ioat_chan->used_desc.prev) {
    + desc = to_ioat_desc(ioat_chan->used_desc.prev);
    + do {
    + ioat_chan->dmacount++;
    + desc = to_ioat_desc(desc->node.next);
    + } while (&desc->node != ioat_chan->used_desc.next);
    + }
    +
    + /*
    + * write the new starting descriptor address
    + * this puts channel engine into ARMED state
    + */
    + desc = to_ioat_desc(ioat_chan->used_desc.prev);
    + switch (ioat_chan->device->version) {
    + case IOAT_VER_1_2:
    + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
    + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
    + writel(((u64) desc->async_tx.phys) >> 32,
    + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
    +
    + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
    + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
    + break;
    + case IOAT_VER_2_0:
    + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
    + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
    + writel(((u64) desc->async_tx.phys) >> 32,
    + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
    +
    + /* tell the engine to go with what's left to be done */
    + writew(ioat_chan->dmacount,
    + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
    +
    + break;
    + }
    + dev_err(&ioat_chan->device->pdev->dev,
    + "chan%d reset - %d descs waiting, %d total desc\n",
    + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
    +
    + spin_unlock_bh(&ioat_chan->desc_lock);
    + spin_unlock_bh(&ioat_chan->cleanup_lock);
    +}
    +
    +/**
    + * ioat_dma_reset_channel - restart a channel
    + * @ioat_chan: IOAT DMA channel handle
    + */
    +static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
    +{
    + u32 chansts, chanerr;
    +
    + if (!ioat_chan->used_desc.prev)
    + return;
    +
    + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
    + chansts = (ioat_chan->completion_virt->low
    + & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
    + if (chanerr) {
    + dev_err(&ioat_chan->device->pdev->dev,
    + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
    + chan_num(ioat_chan), chansts, chanerr);
    + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
    + }
    +
    + /*
    + * whack it upside the head with a reset
    + * and wait for things to settle out.
    + * force the pending count to a really big negative
    + * to make sure no one forces an issue_pending
    + * while we're waiting.
    + */
    +
    + spin_lock_bh(&ioat_chan->desc_lock);
    + ioat_chan->pending = INT_MIN;
    + writeb(IOAT_CHANCMD_RESET,
    + ioat_chan->reg_base
    + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
    + spin_unlock_bh(&ioat_chan->desc_lock);
    +
    + /* schedule the 2nd half instead of sleeping a long time */
    + schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
    +}
    +
    +/**
    + * ioat_dma_chan_watchdog - watch for stuck channels
    + */
    +static void ioat_dma_chan_watchdog(struct work_struct *work)
    +{
    + struct ioatdma_device *device =
    + container_of(work, struct ioatdma_device, work.work);
    + struct ioat_dma_chan *ioat_chan;
    + int i;
    +
    + union {
    + u64 full;
    + struct {
    + u32 low;
    + u32 high;
    + };
    + } completion_hw;
    + unsigned long compl_desc_addr_hw;
    +
    + for (i = 0; i < device->common.chancnt; i++) {
    + ioat_chan = ioat_lookup_chan_by_index(device, i);
    +
    + if (ioat_chan->device->version == IOAT_VER_1_2
    + /* have we started processing anything yet */
    + && ioat_chan->last_completion
    + /* have we completed any since last watchdog cycle? */
    + && (ioat_chan->last_completion ==
    + ioat_chan->watchdog_completion)
    + /* has TCP stuck on one cookie since last watchdog? */
    + && (ioat_chan->watchdog_tcp_cookie ==
    + ioat_chan->watchdog_last_tcp_cookie)
    + && (ioat_chan->watchdog_tcp_cookie !=
    + ioat_chan->completed_cookie)
    + /* is there something in the chain to be processed? */
    + /* CB1 chain always has at least the last one processed */
    + && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
    + && ioat_chan->pending == 0) {
    +
    + /*
    + * check CHANSTS register for completed
    + * descriptor address.
    + * if it is different than completion writeback,
    + * it is not zero
    + * and it has changed since the last watchdog
    + * we can assume that channel
    + * is still working correctly
    + * and the problem is in completion writeback.
    + * update completion writeback
    + * with actual CHANSTS value
    + * else
    + * try resetting the channel
    + */
    +
    + completion_hw.low = readl(ioat_chan->reg_base +
    + IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
    + completion_hw.high = readl(ioat_chan->reg_base +
    + IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
    +#if (BITS_PER_LONG == 64)
    + compl_desc_addr_hw =
    + completion_hw.full
    + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
    +#else
    + compl_desc_addr_hw =
    + completion_hw.low & IOAT_LOW_COMPLETION_MASK;
    +#endif
    +
    + if ((compl_desc_addr_hw != 0)
    + && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
    + && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
    + ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
    + ioat_chan->completion_virt->low = completion_hw.low;
    + ioat_chan->completion_virt->high = completion_hw.high;
    + } else {
    + ioat_dma_reset_channel(ioat_chan);
    + ioat_chan->watchdog_completion = 0;
    + ioat_chan->last_compl_desc_addr_hw = 0;
    + }
    +
    + /*
    + * for version 2.0 if there are descriptors yet to be processed
    + * and the last completed hasn't changed since the last watchdog
    + * if they haven't hit the pending level
    + * issue the pending to push them through
    + * else
    + * try resetting the channel
    + */
    + } else if (ioat_chan->device->version == IOAT_VER_2_0
    + && ioat_chan->used_desc.prev
    + && ioat_chan->last_completion
    + && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
    +
    + if (ioat_chan->pending < ioat_pending_level)
    + ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
    + else {
    + ioat_dma_reset_channel(ioat_chan);
    + ioat_chan->watchdog_completion = 0;
    + }
    + } else {
    + ioat_chan->last_compl_desc_addr_hw = 0;
    + ioat_chan->watchdog_completion
    + = ioat_chan->last_completion;
    + }
    +
    + ioat_chan->watchdog_last_tcp_cookie =
    + ioat_chan->watchdog_tcp_cookie;
    + }
    +
    + schedule_delayed_work(&device->work, WATCHDOG_DELAY);
    +}
    +
    static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
    {
    struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
    @@ -585,6 +808,10 @@ static void ioat_dma_free_chan_resources
    ioat_chan->last_completion = ioat_chan->completion_addr = 0;
    ioat_chan->pending = 0;
    ioat_chan->dmacount = 0;
    + ioat_chan->watchdog_completion = 0;
    + ioat_chan->last_compl_desc_addr_hw = 0;
    + ioat_chan->watchdog_tcp_cookie =
    + ioat_chan->watchdog_last_tcp_cookie = 0;
    }

    /**
    @@ -716,8 +943,12 @@ static struct dma_async_tx_descriptor *i
    new->src = dma_src;
    new->async_tx.flags = flags;
    return &new->async_tx;
    - } else
    + } else {
    + dev_err(&ioat_chan->device->pdev->dev,
    + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
    + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
    return NULL;
    + }
    }

    static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
    @@ -744,8 +975,13 @@ static struct dma_async_tx_descriptor *i
    new->src = dma_src;
    new->async_tx.flags = flags;
    return &new->async_tx;
    - } else
    + } else {
    + spin_unlock_bh(&ioat_chan->desc_lock);
    + dev_err(&ioat_chan->device->pdev->dev,
    + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
    + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
    return NULL;
    + }
    }

    static void ioat_dma_cleanup_tasklet(unsigned long data)
    @@ -799,11 +1035,25 @@ #endif

    if (phys_complete == ioat_chan->last_completion) {
    spin_unlock_bh(&ioat_chan->cleanup_lock);
    + /*
    + * perhaps we're stuck so hard that the watchdog can't go off?
    + * try to catch it after 2 seconds
    + */
    + if (time_after(jiffies,
    + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
    + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
    + ioat_chan->last_completion_time = jiffies;
    + }
    return;
    }
    + ioat_chan->last_completion_time = jiffies;

    cookie = 0;
    - spin_lock_bh(&ioat_chan->desc_lock);
    + if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
    + spin_unlock_bh(&ioat_chan->cleanup_lock);
    + return;
    + }
    +
    switch (ioat_chan->device->version) {
    case IOAT_VER_1_2:
    list_for_each_entry_safe(desc, _desc,
    @@ -943,6 +1193,7 @@ static enum dma_status ioat_dma_is_compl

    last_used = chan->cookie;
    last_complete = ioat_chan->completed_cookie;
    + ioat_chan->watchdog_tcp_cookie = cookie;

    if (done)
    *done = last_complete;
    @@ -1333,6 +1584,10 @@ struct ioatdma_device *ioat_dma_probe(st

    dma_async_device_register(&device->common);

    + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
    + schedule_delayed_work(&device->work,
    + WATCHDOG_DELAY);
    +
    return device;

    err_self_test:
    @@ -1365,6 +1620,8 @@ void ioat_dma_remove(struct ioatdma_devi
    pci_release_regions(device->pdev);
    pci_disable_device(device->pdev);

    + cancel_delayed_work(&device->work);
    +
    list_for_each_entry_safe(chan, _chan,
    &device->common.channels, device_node) {
    ioat_chan = to_ioat_chan(chan);
    diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
    index f2c7fed..c6ec933 100644
    --- a/drivers/dma/ioatdma.h
    +++ b/drivers/dma/ioatdma.h
    @@ -28,7 +28,7 @@ #include
    #include
    #include

    -#define IOAT_DMA_VERSION "2.04"
    +#define IOAT_DMA_VERSION "2.18"

    enum ioat_interrupt {
    none = 0,
    @@ -40,6 +40,7 @@ enum ioat_interrupt {

    #define IOAT_LOW_COMPLETION_MASK 0xffffffc0
    #define IOAT_DMA_DCA_ANY_CPU ~0
    +#define IOAT_WATCHDOG_PERIOD (2 * HZ)


    /**
    @@ -62,6 +63,7 @@ struct ioatdma_device {
    struct dma_device common;
    u8 version;
    enum ioat_interrupt irq_mode;
    + struct delayed_work work;
    struct msix_entry msix_entries[4];
    struct ioat_dma_chan *idx[4];
    };
    @@ -75,6 +77,7 @@ struct ioat_dma_chan {

    dma_cookie_t completed_cookie;
    unsigned long last_completion;
    + unsigned long last_completion_time;

    size_t xfercap; /* XFERCAP register value expanded out */

    @@ -82,6 +85,10 @@ struct ioat_dma_chan {
    spinlock_t desc_lock;
    struct list_head free_desc;
    struct list_head used_desc;
    + unsigned long watchdog_completion;
    + int watchdog_tcp_cookie;
    + u32 watchdog_last_tcp_cookie;
    + struct delayed_work work;

    int pending;
    int dmacount;
    @@ -98,6 +105,7 @@ struct ioat_dma_chan {
    u32 high;
    };
    } *completion_virt;
    + unsigned long last_compl_desc_addr_hw;
    struct tasklet_struct cleanup_task;
    };


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [PATCH v2 3/4] I/OAT: tcp_dma_copybreak default value dependant on I/OAT version

    I/OAT DMA performance tuning showed different optimal values
    of tcp_dma_copybreak for different I/OAT versions
    (4096 for 1.2 and 2048 for 2.0).
    This patch lets ioatdma driver set tcp_dma_copybreak value
    according to these results.

    Signed-off-by: Maciej Sosnowski
    ---

    drivers/dma/ioat_dma.c | 25 +++++++++++++++++++++++++
    net/core/user_dma.c | 1 +
    2 files changed, 26 insertions(+), 0 deletions(-)

    diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
    index 2fe3aac..b5ce5be 100644
    --- a/drivers/dma/ioat_dma.c
    +++ b/drivers/dma/ioat_dma.c
    @@ -37,6 +37,10 @@ #include "ioatdma.h"
    #include "ioatdma_registers.h"
    #include "ioatdma_hw.h"

    +#ifdef CONFIG_NET_DMA
    +#include
    +#endif
    +
    #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
    #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
    #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
    @@ -53,6 +57,14 @@ #define WATCHDOG_DELAY round_jiffies(ms
    static void ioat_dma_chan_reset_part2(struct work_struct *work);
    static void ioat_dma_chan_watchdog(struct work_struct *work);

    +#ifdef CONFIG_NET_DMA
    +/*
    + * default tcp_dma_copybreak values for different IOAT versions
    + */
    +#define IOAT1_DEFAULT_TCP_DMA_COPYBREAK 4096
    +#define IOAT2_DEFAULT_TCP_DMA_COPYBREAK 2048
    +#endif
    +
    /* internal functions */
    static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
    static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
    @@ -1582,6 +1594,19 @@ struct ioatdma_device *ioat_dma_probe(st
    if (err)
    goto err_self_test;

    +#ifdef CONFIG_NET_DMA
    + switch (device->version) {
    + case IOAT_VER_1_2:
    + sysctl_tcp_dma_copybreak =
    + IOAT1_DEFAULT_TCP_DMA_COPYBREAK;
    + break;
    + case IOAT_VER_2_0:
    + sysctl_tcp_dma_copybreak =
    + IOAT2_DEFAULT_TCP_DMA_COPYBREAK;
    + break;
    + }
    +#endif
    +
    dma_async_device_register(&device->common);

    INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
    diff --git a/net/core/user_dma.c b/net/core/user_dma.c
    index c77aff9..8c6b706 100644
    --- a/net/core/user_dma.c
    +++ b/net/core/user_dma.c
    @@ -34,6 +34,7 @@ #include
    #define NET_DMA_DEFAULT_COPYBREAK 4096

    int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
    +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);

    /**
    * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. [PATCH v2 4/4] I/OAT: I/OAT version 3.0 support

    This patch adds to ioatdma and dca modules
    support for Intel I/OAT DMA engine ver.3 (aka CB3 device).
    The main features of I/OAT ver.3 are:
    * 8 single channel DMA devices (8 channels total)
    * 8 DCA providers, each can accept 2 requesters
    * 8-bit TAG values and 32-bit extended APIC IDs

    Signed-off-by: Maciej Sosnowski
    ---

    drivers/dca/dca-core.c | 131 ++++++++++++++++-----
    drivers/dca/dca-sysfs.c | 3
    drivers/dma/ioat.c | 15 ++
    drivers/dma/ioat_dca.c | 244 ++++++++++++++++++++++++++++++++++++++-
    drivers/dma/ioat_dma.c | 101 ++++++++++++++--
    drivers/dma/ioatdma.h | 4 -
    drivers/dma/ioatdma_hw.h | 1
    drivers/dma/ioatdma_registers.h | 20 +++
    include/linux/dca.h | 7 +
    include/linux/pci_ids.h | 8 +
    10 files changed, 485 insertions(+), 49 deletions(-)

    diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
    index bf5b92f..ec249d2 100644
    --- a/drivers/dca/dca-core.c
    +++ b/drivers/dca/dca-core.c
    @@ -28,13 +28,29 @@ #include
    #include
    #include

    -MODULE_LICENSE("GPL");
    +#define DCA_VERSION "1.4"

    -/* For now we're assuming a single, global, DCA provider for the system. */
    +MODULE_VERSION(DCA_VERSION);
    +MODULE_LICENSE("GPL");
    +MODULE_AUTHOR("Intel Corporation");

    static DEFINE_SPINLOCK(dca_lock);

    -static struct dca_provider *global_dca = NULL;
    +static LIST_HEAD(dca_providers);
    +
    +static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
    +{
    + struct dca_provider *dca, *ret = NULL;
    +
    + list_for_each_entry(dca, &dca_providers, node) {
    + if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
    + ret = dca;
    + break;
    + }
    + }
    +
    + return ret;
    +}

    /**
    * dca_add_requester - add a dca client to the list
    @@ -42,25 +58,39 @@ static struct dca_provider *global_dca =
    */
    int dca_add_requester(struct device *dev)
    {
    - int err, slot;
    + struct dca_provider *dca;
    + int err, slot = -ENODEV;

    - if (!global_dca)
    - return -ENODEV;
    + if (!dev)
    + return -EFAULT;

    spin_lock(&dca_lock);
    - slot = global_dca->ops->add_requester(global_dca, dev);
    - spin_unlock(&dca_lock);
    - if (slot < 0)
    +
    + /* check if the requester has not been added already */
    + dca = dca_find_provider_by_dev(dev);
    + if (dca) {
    + spin_unlock(&dca_lock);
    + return -EEXIST;
    + }
    +
    + list_for_each_entry(dca, &dca_providers, node) {
    + slot = dca->ops->add_requester(dca, dev);
    + if (slot >= 0)
    + break;
    + }
    + if (slot < 0) {
    + spin_unlock(&dca_lock);
    return slot;
    + }

    - err = dca_sysfs_add_req(global_dca, dev, slot);
    + err = dca_sysfs_add_req(dca, dev, slot);
    if (err) {
    - spin_lock(&dca_lock);
    - global_dca->ops->remove_requester(global_dca, dev);
    + dca->ops->remove_requester(dca, dev);
    spin_unlock(&dca_lock);
    return err;
    }

    + spin_unlock(&dca_lock);
    return 0;
    }
    EXPORT_SYMBOL_GPL(dca_add_requester);
    @@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester);
    */
    int dca_remove_requester(struct device *dev)
    {
    + struct dca_provider *dca;
    int slot;
    - if (!global_dca)
    - return -ENODEV;
    +
    + if (!dev)
    + return -EFAULT;

    spin_lock(&dca_lock);
    - slot = global_dca->ops->remove_requester(global_dca, dev);
    - spin_unlock(&dca_lock);
    - if (slot < 0)
    + dca = dca_find_provider_by_dev(dev);
    + if (!dca) {
    + spin_unlock(&dca_lock);
    + return -ENODEV;
    + }
    + slot = dca->ops->remove_requester(dca, dev);
    + if (slot < 0) {
    + spin_unlock(&dca_lock);
    return slot;
    + }

    - dca_sysfs_remove_req(global_dca, slot);
    + dca_sysfs_remove_req(dca, slot);
    +
    + spin_unlock(&dca_lock);
    return 0;
    }
    EXPORT_SYMBOL_GPL(dca_remove_requester);

    /**
    - * dca_get_tag - return the dca tag for the given cpu
    + * dca_common_get_tag - return the dca tag (serves both new and old api)
    + * @dev - the device that wants dca service
    * @cpu - the cpuid as returned by get_cpu()
    */
    -u8 dca_get_tag(int cpu)
    +u8 dca_common_get_tag(struct device *dev, int cpu)
    {
    - if (!global_dca)
    + struct dca_provider *dca;
    + u8 tag;
    +
    + spin_lock(&dca_lock);
    +
    + dca = dca_find_provider_by_dev(dev);
    + if (!dca) {
    + spin_unlock(&dca_lock);
    return -ENODEV;
    - return global_dca->ops->get_tag(global_dca, cpu);
    + }
    + tag = dca->ops->get_tag(dca, dev, cpu);
    +
    + spin_unlock(&dca_lock);
    + return tag;
    +}
    +
    +/**
    + * dca3_get_tag - return the dca tag to the requester device
    + * for the given cpu (new api)
    + * @dev - the device that wants dca service
    + * @cpu - the cpuid as returned by get_cpu()
    + */
    +u8 dca3_get_tag(struct device *dev, int cpu)
    +{
    + if (!dev)
    + return -EFAULT;
    +
    + return dca_common_get_tag(dev, cpu);
    +}
    +EXPORT_SYMBOL_GPL(dca3_get_tag);
    +
    +/**
    + * dca_get_tag - return the dca tag for the given cpu (old api)
    + * @cpu - the cpuid as returned by get_cpu()
    + */
    +u8 dca_get_tag(int cpu)
    +{
    + struct device *dev = NULL;
    +
    + return dca_common_get_tag(dev, cpu);
    }
    EXPORT_SYMBOL_GPL(dca_get_tag);

    @@ -140,12 +218,10 @@ int register_dca_provider(struct dca_pro
    {
    int err;

    - if (global_dca)
    - return -EEXIST;
    err = dca_sysfs_add_provider(dca, dev);
    if (err)
    return err;
    - global_dca = dca;
    + list_add(&dca->node, &dca_providers);
    blocking_notifier_call_chain(&dca_provider_chain,
    DCA_PROVIDER_ADD, NULL);
    return 0;
    @@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider)
    */
    void unregister_dca_provider(struct dca_provider *dca)
    {
    - if (!global_dca)
    - return;
    blocking_notifier_call_chain(&dca_provider_chain,
    DCA_PROVIDER_REMOVE, NULL);
    - global_dca = NULL;
    + list_del(&dca->node);
    dca_sysfs_remove_provider(dca);
    }
    EXPORT_SYMBOL_GPL(unregister_dca_provider);
    @@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify)

    static int __init dca_init(void)
    {
    + printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
    return dca_sysfs_init();
    }

    diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c
    index 011328f..3d47e9d 100644
    --- a/drivers/dca/dca-sysfs.c
    +++ b/drivers/dca/dca-sysfs.c
    @@ -13,9 +13,10 @@ static spinlock_t dca_idr_lock;
    int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
    {
    struct device *cd;
    + static int req_count;

    cd = device_create(dca_class, dca->cd, MKDEV(0, slot + 1),
    - "requester%d", slot);
    + "requester%d", req_count++);
    if (IS_ERR(cd))
    return PTR_ERR(cd);
    return 0;
    diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
    index 16e0fd8..9b16a3a 100644
    --- a/drivers/dma/ioat.c
    +++ b/drivers/dma/ioat.c
    @@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl

    /* I/OAT v2 platforms */
    { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
    +
    + /* I/OAT v3 platforms */
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
    + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
    { 0, }
    };

    @@ -83,6 +93,11 @@ static int ioat_setup_functionality(stru
    if (device->dma && ioat_dca_enabled)
    device->dca = ioat2_dca_init(pdev, iobase);
    break;
    + case IOAT_VER_3_0:
    + device->dma = ioat_dma_probe(pdev, iobase);
    + if (device->dma && ioat_dca_enabled)
    + device->dca = ioat3_dca_init(pdev, iobase);
    + break;
    default:
    err = -ENODEV;
    break;
    diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
    index 9e92276..6cf622d 100644
    --- a/drivers/dma/ioat_dca.c
    +++ b/drivers/dma/ioat_dca.c
    @@ -37,12 +37,18 @@ #include "ioatdma.h"
    #include "ioatdma_registers.h"

    /*
    - * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15
    + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
    * contain the bit number of the APIC ID to map into the DCA tag. If the valid
    * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
    */
    #define DCA_TAG_MAP_VALID 0x80

    +#define DCA3_TAG_MAP_BIT_TO_INV 0x80
    +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
    +#define DCA3_TAG_MAP_LITERAL_VAL 0x1
    +
    +#define DCA_TAG_MAP_MASK 0xDF
    +
    /*
    * "Legacy" DCA systems do not implement the DCA register set in the
    * I/OAT device. Software needs direct support for their tag mappings.
    @@ -95,6 +101,7 @@ struct ioat_dca_slot {
    };

    #define IOAT_DCA_MAX_REQ 6
    +#define IOAT3_DCA_MAX_REQ 2

    struct ioat_dca_priv {
    void __iomem *iobase;
    @@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(str
    return -ENODEV;
    }

    -static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
    +static u8 ioat_dca_get_tag(struct dca_provider *dca,
    + struct device *dev,
    + int cpu)
    {
    struct ioat_dca_priv *ioatdca = dca_priv(dca);
    int i, apic_id, bit, value;
    @@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_pr
    return tag;
    }

    +static int ioat_dca_dev_managed(struct dca_provider *dca,
    + struct device *dev)
    +{
    + struct ioat_dca_priv *ioatdca = dca_priv(dca);
    + struct pci_dev *pdev;
    + int i;
    +
    + pdev = to_pci_dev(dev);
    + for (i = 0; i < ioatdca->max_requesters; i++) {
    + if (ioatdca->req_slots[i].pdev == pdev)
    + return 1;
    + }
    + return 0;
    +}
    +
    static struct dca_ops ioat_dca_ops = {
    .add_requester = ioat_dca_add_requester,
    .remove_requester = ioat_dca_remove_requester,
    .get_tag = ioat_dca_get_tag,
    + .dev_managed = ioat_dca_dev_managed,
    };


    @@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struc
    u8 *tag_map = NULL;
    int i;
    int err;
    + u8 version;
    + u8 max_requesters;

    if (!system_has_dca_enabled(pdev))
    return NULL;
    @@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struc
    if (tag_map == NULL)
    return NULL;

    + version = readb(iobase + IOAT_VER_OFFSET);
    + if (version == IOAT_VER_3_0)
    + max_requesters = IOAT3_DCA_MAX_REQ;
    + else
    + max_requesters = IOAT_DCA_MAX_REQ;
    +
    dca = alloc_dca_provider(&ioat_dca_ops,
    sizeof(*ioatdca) +
    - (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ));
    + (sizeof(struct ioat_dca_slot) * max_requesters));
    if (!dca)
    return NULL;

    ioatdca = dca_priv(dca);
    - ioatdca->max_requesters = IOAT_DCA_MAX_REQ;
    -
    + ioatdca->max_requesters = max_requesters;
    ioatdca->dca_base = iobase + 0x54;

    /* copy over the APIC ID to DCA tag mapping */
    @@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(st
    return -ENODEV;
    }

    -static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu)
    +static u8 ioat2_dca_get_tag(struct dca_provider *dca,
    + struct device *dev,
    + int cpu)
    {
    u8 tag;

    - tag = ioat_dca_get_tag(dca, cpu);
    + tag = ioat_dca_get_tag(dca, dev, cpu);
    tag = (~tag) & 0x1F;
    return tag;
    }
    @@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = {
    .add_requester = ioat2_dca_add_requester,
    .remove_requester = ioat2_dca_remove_requester,
    .get_tag = ioat2_dca_get_tag,
    + .dev_managed = ioat_dca_dev_managed,
    };

    static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
    @@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(stru

    return dca;
    }
    +
    +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
    +{
    + struct ioat_dca_priv *ioatdca = dca_priv(dca);
    + struct pci_dev *pdev;
    + int i;
    + u16 id;
    + u16 global_req_table;
    +
    + /* This implementation only supports PCI-Express */
    + if (dev->bus != &pci_bus_type)
    + return -ENODEV;
    + pdev = to_pci_dev(dev);
    + id = dcaid_from_pcidev(pdev);
    +
    + if (ioatdca->requester_count == ioatdca->max_requesters)
    + return -ENODEV;
    +
    + for (i = 0; i < ioatdca->max_requesters; i++) {
    + if (ioatdca->req_slots[i].pdev == NULL) {
    + /* found an empty slot */
    + ioatdca->requester_count++;
    + ioatdca->req_slots[i].pdev = pdev;
    + ioatdca->req_slots[i].rid = id;
    + global_req_table =
    + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
    + writel(id | IOAT_DCA_GREQID_VALID,
    + ioatdca->iobase + global_req_table + (i * 4));
    + return i;
    + }
    + }
    + /* Error, ioatdma->requester_count is out of whack */
    + return -EFAULT;
    +}
    +
    +static int ioat3_dca_remove_requester(struct dca_provider *dca,
    + struct device *dev)
    +{
    + struct ioat_dca_priv *ioatdca = dca_priv(dca);
    + struct pci_dev *pdev;
    + int i;
    + u16 global_req_table;
    +
    + /* This implementation only supports PCI-Express */
    + if (dev->bus != &pci_bus_type)
    + return -ENODEV;
    + pdev = to_pci_dev(dev);
    +
    + for (i = 0; i < ioatdca->max_requesters; i++) {
    + if (ioatdca->req_slots[i].pdev == pdev) {
    + global_req_table =
    + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
    + writel(0, ioatdca->iobase + global_req_table + (i * 4));
    + ioatdca->req_slots[i].pdev = NULL;
    + ioatdca->req_slots[i].rid = 0;
    + ioatdca->requester_count--;
    + return i;
    + }
    + }
    + return -ENODEV;
    +}
    +
    +static u8 ioat3_dca_get_tag(struct dca_provider *dca,
    + struct device *dev,
    + int cpu)
    +{
    + u8 tag;
    +
    + struct ioat_dca_priv *ioatdca = dca_priv(dca);
    + int i, apic_id, bit, value;
    + u8 entry;
    +
    + tag = 0;
    + apic_id = cpu_physical_id(cpu);
    +
    + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
    + entry = ioatdca->tag_map[i];
    + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
    + bit = entry &
    + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
    + value = (apic_id & (1 << bit)) ? 1 : 0;
    + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
    + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
    + value = (apic_id & (1 << bit)) ? 0 : 1;
    + } else {
    + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
    + }
    + tag |= (value << i);
    + }
    +
    + return tag;
    +}
    +
    +static struct dca_ops ioat3_dca_ops = {
    + .add_requester = ioat3_dca_add_requester,
    + .remove_requester = ioat3_dca_remove_requester,
    + .get_tag = ioat3_dca_get_tag,
    + .dev_managed = ioat_dca_dev_managed,
    +};
    +
    +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
    +{
    + int slots = 0;
    + u32 req;
    + u16 global_req_table;
    +
    + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
    + if (global_req_table == 0)
    + return 0;
    +
    + do {
    + req = readl(iobase + global_req_table + (slots * sizeof(u32)));
    + slots++;
    + } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
    +
    + return slots;
    +}
    +
    +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
    +{
    + struct dca_provider *dca;
    + struct ioat_dca_priv *ioatdca;
    + int slots;
    + int i;
    + int err;
    + u16 dca_offset;
    + u16 csi_fsb_control;
    + u16 pcie_control;
    + u8 bit;
    +
    + union {
    + u64 full;
    + struct {
    + u32 low;
    + u32 high;
    + };
    + } tag_map;
    +
    + if (!system_has_dca_enabled(pdev))
    + return NULL;
    +
    + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
    + if (dca_offset == 0)
    + return NULL;
    +
    + slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
    + if (slots == 0)
    + return NULL;
    +
    + dca = alloc_dca_provider(&ioat3_dca_ops,
    + sizeof(*ioatdca)
    + + (sizeof(struct ioat_dca_slot) * slots));
    + if (!dca)
    + return NULL;
    +
    + ioatdca = dca_priv(dca);
    + ioatdca->iobase = iobase;
    + ioatdca->dca_base = iobase + dca_offset;
    + ioatdca->max_requesters = slots;
    +
    + /* some bios might not know to turn these on */
    + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
    + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
    + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
    + writew(csi_fsb_control,
    + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
    + }
    + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
    + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
    + pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
    + writew(pcie_control,
    + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
    + }
    +
    +
    + /* TODO version, compatibility and configuration checks */
    +
    + /* copy out the APIC to DCA tag map */
    + tag_map.low =
    + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
    + tag_map.high =
    + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
    + for (i = 0; i < 8; i++) {
    + bit = tag_map.full >> (8 * i);
    + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
    + }
    +
    + err = register_dca_provider(dca, &pdev->dev);
    + if (err) {
    + free_dca_provider(dca);
    + return NULL;
    + }
    +
    + return dca;
    +}
    diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
    index b5ce5be..890e252 100644
    --- a/drivers/dma/ioat_dma.c
    +++ b/drivers/dma/ioat_dma.c
    @@ -63,8 +63,15 @@ #ifdef CONFIG_NET_DMA
    */
    #define IOAT1_DEFAULT_TCP_DMA_COPYBREAK 4096
    #define IOAT2_DEFAULT_TCP_DMA_COPYBREAK 2048
    +#define IOAT3_DEFAULT_TCP_DMA_COPYBREAK 4096
    #endif

    +/*
    + * workaround for IOAT ver.3.0 null descriptor issue
    + * (channel returns error when size is 0)
    + */
    +#define NULL_DESC_BUFFER_SIZE 1
    +
    /* internal functions */
    static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
    static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
    @@ -141,6 +148,38 @@ static int ioat_dma_enumerate_channels(s
    int i;
    struct ioat_dma_chan *ioat_chan;

    + /*
    + * IOAT ver.3 workarounds
    + */
    + if (device->version == IOAT_VER_3_0) {
    + u32 chan_err_mask;
    + u16 dev_id;
    + u32 dmauncerrsts;
    +
    + /*
    + * Write CHANERRMSK_INT with 3E07h to mask out the errors
    + * that can cause stability issues for IOAT ver.3
    + */
    + chan_err_mask = 0x3E07;
    + pci_write_config_dword(device->pdev,
    + IOAT_PCI_CHANERRMASK_INT_OFFSET,
    + chan_err_mask);
    +
    + /*
    + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
    + * (workaround for spurious config parity error after restart)
    + */
    + pci_read_config_word(device->pdev,
    + IOAT_PCI_DEVICE_ID_OFFSET,
    + &dev_id);
    + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
    + dmauncerrsts = 0x10;
    + pci_write_config_dword(device->pdev,
    + IOAT_PCI_DMAUNCERRSTS_OFFSET,
    + dmauncerrsts);
    + }
    + }
    +
    device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
    xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
    xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
    @@ -485,6 +524,13 @@ static dma_cookie_t ioat1_tx_submit(stru
    prev = new;
    } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));

    + if (!new) {
    + dev_err(&ioat_chan->device->pdev->dev,
    + "tx submit failed\n");
    + spin_unlock_bh(&ioat_chan->desc_lock);
    + return -ENOMEM;
    + }
    +
    hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
    if (new->async_tx.callback) {
    hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
    @@ -570,7 +616,14 @@ static dma_cookie_t ioat2_tx_submit(stru
    desc_count++;
    } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));

    - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
    + if (!new) {
    + dev_err(&ioat_chan->device->pdev->dev,
    + "tx submit failed\n");
    + spin_unlock_bh(&ioat_chan->desc_lock);
    + return -ENOMEM;
    + }
    +
    + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
    if (new->async_tx.callback) {
    hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
    if (first != new) {
    @@ -641,6 +694,7 @@ static struct ioat_desc_sw *ioat_dma_all
    desc_sw->async_tx.tx_submit = ioat1_tx_submit;
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    desc_sw->async_tx.tx_submit = ioat2_tx_submit;
    break;
    }
    @@ -790,6 +844,7 @@ static void ioat_dma_free_chan_resources
    }
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    list_for_each_entry_safe(desc, _desc,
    ioat_chan->free_desc.next, node) {
    list_del(&desc->node);
    @@ -879,7 +934,8 @@ ioat2_dma_get_next_descriptor(struct ioa

    /* set up the noop descriptor */
    noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
    - noop_desc->hw->size = 0;
    + /* set size to non-zero value (channel returns error when size is 0) */
    + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
    noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
    noop_desc->hw->src_addr = 0;
    noop_desc->hw->dst_addr = 0;
    @@ -929,6 +985,7 @@ static struct ioat_desc_sw *ioat_dma_get
    return ioat1_dma_get_next_descriptor(ioat_chan);
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    return ioat2_dma_get_next_descriptor(ioat_chan);
    break;
    }
    @@ -1051,10 +1108,12 @@ #endif
    * perhaps we're stuck so hard that the watchdog can't go off?
    * try to catch it after 2 seconds
    */
    - if (time_after(jiffies,
    - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
    - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
    - ioat_chan->last_completion_time = jiffies;
    + if (ioat_chan->device->version != IOAT_VER_3_0) {
    + if (time_after(jiffies,
    + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
    + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
    + ioat_chan->last_completion_time = jiffies;
    + }
    }
    return;
    }
    @@ -1124,6 +1183,7 @@ #endif
    }
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    /* has some other thread has already cleaned up? */
    if (ioat_chan->used_desc.prev == NULL)
    break;
    @@ -1236,10 +1296,19 @@ static void ioat_dma_start_null_desc(str
    spin_lock_bh(&ioat_chan->desc_lock);

    desc = ioat_dma_get_next_descriptor(ioat_chan);
    +
    + if (!desc) {
    + dev_err(&ioat_chan->device->pdev->dev,
    + "Unable to start null desc - get next desc failed\n");
    + spin_unlock_bh(&ioat_chan->desc_lock);
    + return;
    + }
    +
    desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
    | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
    | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
    - desc->hw->size = 0;
    + /* set size to non-zero value (channel returns error when size is 0) */
    + desc->hw->size = NULL_DESC_BUFFER_SIZE;
    desc->hw->src_addr = 0;
    desc->hw->dst_addr = 0;
    async_tx_ack(&desc->async_tx);
    @@ -1257,6 +1326,7 @@ static void ioat_dma_start_null_desc(str
    + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
    ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
    writel(((u64) desc->async_tx.phys) >> 32,
    @@ -1575,6 +1645,7 @@ struct ioatdma_device *ioat_dma_probe(st
    ioat1_dma_memcpy_issue_pending;
    break;
    case IOAT_VER_2_0:
    + case IOAT_VER_3_0:
    device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
    device->common.device_issue_pending =
    ioat2_dma_memcpy_issue_pending;
    @@ -1604,14 +1675,20 @@ #ifdef CONFIG_NET_DMA
    sysctl_tcp_dma_copybreak =
    IOAT2_DEFAULT_TCP_DMA_COPYBREAK;
    break;
    + case IOAT_VER_3_0:
    + sysctl_tcp_dma_copybreak =
    + IOAT3_DEFAULT_TCP_DMA_COPYBREAK;
    + break;
    }
    #endif

    dma_async_device_register(&device->common);

    - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
    - schedule_delayed_work(&device->work,
    - WATCHDOG_DELAY);
    + if (device->version != IOAT_VER_3_0) {
    + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
    + schedule_delayed_work(&device->work,
    + WATCHDOG_DELAY);
    + }

    return device;

    @@ -1645,7 +1722,9 @@ void ioat_dma_remove(struct ioatdma_devi
    pci_release_regions(device->pdev);
    pci_disable_device(device->pdev);

    - cancel_delayed_work(&device->work);
    + if (device->version != IOAT_VER_3_0) {
    + cancel_delayed_work(&device->work);
    + }

    list_for_each_entry_safe(chan, _chan,
    &device->common.channels, device_node) {
    diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
    index c6ec933..24c127b 100644
    --- a/drivers/dma/ioatdma.h
    +++ b/drivers/dma/ioatdma.h
    @@ -28,7 +28,7 @@ #include
    #include
    #include

    -#define IOAT_DMA_VERSION "2.18"
    +#define IOAT_DMA_VERSION "3.30"

    enum ioat_interrupt {
    none = 0,
    @@ -135,11 +135,13 @@ struct ioatdma_device *ioat_dma_probe(st
    void ioat_dma_remove(struct ioatdma_device *device);
    struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
    struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
    +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
    #else
    #define ioat_dma_probe(pdev, iobase) NULL
    #define ioat_dma_remove(device) do { } while (0)
    #define ioat_dca_init(pdev, iobase) NULL
    #define ioat2_dca_init(pdev, iobase) NULL
    +#define ioat3_dca_init(pdev, iobase) NULL
    #endif

    #endif /* IOATDMA_H */
    diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
    index dd470fa..f1ae2c7 100644
    --- a/drivers/dma/ioatdma_hw.h
    +++ b/drivers/dma/ioatdma_hw.h
    @@ -35,6 +35,7 @@ #define IOAT_PCI_SVID 0x8086
    #define IOAT_PCI_SID 0x8086
    #define IOAT_VER_1_2 0x12 /* Version 1.2 */
    #define IOAT_VER_2_0 0x20 /* Version 2.0 */
    +#define IOAT_VER_3_0 0x30 /* Version 3.0 */

    struct ioat_dma_descriptor {
    uint32_t size;
    diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
    index 9832d7e..827cb50 100644
    --- a/drivers/dma/ioatdma_registers.h
    +++ b/drivers/dma/ioatdma_registers.h
    @@ -25,6 +25,10 @@ #define IOAT_PCI_DMACTRL_OFFSET 0x48
    #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
    #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002

    +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
    +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
    +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
    +
    /* MMIO Device Registers */
    #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */

    @@ -149,7 +153,23 @@ #define IOAT_DCA_GREQID_IGNOREFUN 0x10
    #define IOAT_DCA_GREQID_VALID 0x20000000
    #define IOAT_DCA_GREQID_LASTID 0x80000000

    +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
    +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
    +
    +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
    +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
    +
    +#define IOAT3_CSI_CONTROL_OFFSET 0x0C
    +#define IOAT3_CSI_CONTROL_PREFETCH 0x1
    +
    +#define IOAT3_PCI_CONTROL_OFFSET 0x0E
    +#define IOAT3_PCI_CONTROL_MEMWR 0x1
    +
    +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
    +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
    +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14

    +#define IOAT3_DCA_GREQID_OFFSET 0x02

    #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
    #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
    diff --git a/include/linux/dca.h b/include/linux/dca.h
    index af61cd1..b00a753 100644
    --- a/include/linux/dca.h
    +++ b/include/linux/dca.h
    @@ -10,6 +10,7 @@ #define DCA_PROVIDER_ADD 0x0001
    #define DCA_PROVIDER_REMOVE 0x0002

    struct dca_provider {
    + struct list_head node;
    struct dca_ops *ops;
    struct device *cd;
    int id;
    @@ -18,7 +19,9 @@ struct dca_provider {
    struct dca_ops {
    int (*add_requester) (struct dca_provider *, struct device *);
    int (*remove_requester) (struct dca_provider *, struct device *);
    - u8 (*get_tag) (struct dca_provider *, int cpu);
    + u8 (*get_tag) (struct dca_provider *, struct device *,
    + int cpu);
    + int (*dev_managed) (struct dca_provider *, struct device *);
    };

    struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
    @@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_
    }

    /* Requester API */
    +#define DCA_GET_TAG_TWO_ARGS
    int dca_add_requester(struct device *dev);
    int dca_remove_requester(struct device *dev);
    u8 dca_get_tag(int cpu);
    +u8 dca3_get_tag(struct device *dev, int cpu);

    /* internal stuff */
    int __init dca_sysfs_init(void);
    diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
    index 6be6a79..f1e1560 100644
    --- a/include/linux/pci_ids.h
    +++ b/include/linux/pci_ids.h
    @@ -2368,6 +2368,14 @@ #define PCI_DEVICE_ID_INTEL_ICH9_6 0x293
    #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916
    #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918
    #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432
    +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
    #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
    #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
    #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH v2 3/4] I/OAT: tcp_dma_copybreak default value dependant onI/OAT version


    On Fri, 2008-07-18 at 08:29 -0700, Sosnowski, Maciej wrote:
    > I/OAT DMA performance tuning showed different optimal values
    > of tcp_dma_copybreak for different I/OAT versions
    > (4096 for 1.2 and 2048 for 2.0).
    > This patch lets ioatdma driver set tcp_dma_copybreak value
    > according to these results.
    >
    > Signed-off-by: Maciej Sosnowski


    Let's kill some ifdef's... how about the following instead?

    diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
    index 171cad6..1c0dafc 100644
    --- a/drivers/dma/ioat_dma.c
    +++ b/drivers/dma/ioat_dma.c
    @@ -1330,6 +1330,8 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
    if (err)
    goto err_self_test;

    + ioat_set_tcp_copy_break(device);
    +
    dma_async_device_register(&device->common);

    return device;
    diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
    index f2c7fed..23e3196 100644
    --- a/drivers/dma/ioatdma.h
    +++ b/drivers/dma/ioatdma.h
    @@ -27,6 +27,7 @@
    #include
    #include
    #include
    +#include

    #define IOAT_DMA_VERSION "2.04"

    @@ -121,6 +122,20 @@ struct ioat_desc_sw {
    struct dma_async_tx_descriptor async_tx;
    };

    +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
    +{
    + #ifdef CONFIG_NET_DMA
    + switch (dev->version) {
    + case IOAT_VER_1_2:
    + sysctl_tcp_dma_copybreak = 4096;
    + break;
    + case IOAT_VER_2_0:
    + sysctl_tcp_dma_copybreak = 2048;
    + break;
    + }
    + #endif
    +}
    +
    #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
    struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
    void __iomem *iobase);
    diff --git a/net/core/user_dma.c b/net/core/user_dma.c
    index 0ad1cd5..de76050 100644
    --- a/net/core/user_dma.c
    +++ b/net/core/user_dma.c
    @@ -34,6 +34,7 @@
    #define NET_DMA_DEFAULT_COPYBREAK 4096

    int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
    +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);

    /**
    * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. RE: [PATCH v2 3/4] I/OAT: tcp_dma_copybreak default valuedependant onI/OAT version

    Williams, Dan J wrote:
    > On Fri, 2008-07-18 at 08:29 -0700, Sosnowski, Maciej wrote:
    >> I/OAT DMA performance tuning showed different optimal values
    >> of tcp_dma_copybreak for different I/OAT versions
    >> (4096 for 1.2 and 2048 for 2.0).
    >> This patch lets ioatdma driver set tcp_dma_copybreak value according
    >> to these results.
    >>
    >> Signed-off-by: Maciej Sosnowski

    >
    > Let's kill some ifdef's... how about the following instead?
    >
    > diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
    > index 171cad6..1c0dafc 100644
    > --- a/drivers/dma/ioat_dma.c
    > +++ b/drivers/dma/ioat_dma.c
    > @@ -1330,6 +1330,8 @@ struct ioatdma_device *ioat_dma_probe(struct
    > pci_dev *pdev, if (err)
    > goto err_self_test;
    >
    > + ioat_set_tcp_copy_break(device);
    > +
    > dma_async_device_register(&device->common);
    >
    > return device;
    > diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
    > index f2c7fed..23e3196 100644
    > --- a/drivers/dma/ioatdma.h
    > +++ b/drivers/dma/ioatdma.h
    > @@ -27,6 +27,7 @@
    > #include
    > #include
    > #include
    > +#include
    >
    > #define IOAT_DMA_VERSION "2.04"
    >
    > @@ -121,6 +122,20 @@ struct ioat_desc_sw {
    > struct dma_async_tx_descriptor async_tx;
    > };
    >
    > +static inline void ioat_set_tcp_copy_break(struct ioatdma_device
    > *dev) +{
    > + #ifdef CONFIG_NET_DMA
    > + switch (dev->version) {
    > + case IOAT_VER_1_2:
    > + sysctl_tcp_dma_copybreak = 4096;
    > + break;
    > + case IOAT_VER_2_0:
    > + sysctl_tcp_dma_copybreak = 2048;
    > + break;
    > + }
    > + #endif
    > +}
    > +
    > #if defined(CONFIG_INTEL_IOATDMA) ||
    > defined(CONFIG_INTEL_IOATDMA_MODULE) struct ioatdma_device
    > *ioat_dma_probe(struct pci_dev

    *pdev, void __iomem
    > *iobase);
    > diff --git a/net/core/user_dma.c b/net/core/user_dma.c
    > index 0ad1cd5..de76050 100644
    > --- a/net/core/user_dma.c
    > +++ b/net/core/user_dma.c
    > @@ -34,6 +34,7 @@
    > #define NET_DMA_DEFAULT_COPYBREAK 4096
    >
    > int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
    > +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
    >
    > /**
    > * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.


    Yes, I'm fine with this version.

    Regards,
    Maciej
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread