Kernel  |  3.18

下载     查看原文件
C++程序  |  1128行  |  29 KB
/*
 * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 */
/*
 * QCOM BAM DMA engine driver
 *
 * QCOM BAM DMA blocks are distributed amongst a number of the on-chip
 * peripherals on the MSM 8x74.  The configuration of the channels are dependent
 * on the way they are hard wired to that specific peripheral.  The peripheral
 * device tree entries specify the configuration of each channel.
 *
 * The DMA controller requires the use of external memory for storage of the
 * hardware descriptors for each channel.  The descriptor FIFO is accessed as a
 * circular buffer and operations are managed according to the offset within the
 * FIFO.  After pipe/channel reset, all of the pipe registers and internal state
 * are back to defaults.
 *
 * During DMA operations, we write descriptors to the FIFO, being careful to
 * handle wrapping and then write the last FIFO offset to that channel's
 * P_EVNT_REG register to kick off the transaction.  The P_SW_OFSTS register
 * indicates the current FIFO offset that is being processed, so there is some
 * indication of where the hardware is currently working.
 */

#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_dma.h>
#include <linux/clk.h>
#include <linux/dmaengine.h>

#include "dmaengine.h"
#include "virt-dma.h"

struct bam_desc_hw {
	u32 addr;		/* Buffer physical address */
	u16 size;		/* Buffer size in bytes */
	u16 flags;
};

#define DESC_FLAG_INT BIT(15)
#define DESC_FLAG_EOT BIT(14)
#define DESC_FLAG_EOB BIT(13)
#define DESC_FLAG_NWD BIT(12)

struct bam_async_desc {
	struct virt_dma_desc vd;

	u32 num_desc;
	u32 xfer_len;

	/* transaction flags, EOT|EOB|NWD */
	u16 flags;

	struct bam_desc_hw *curr_desc;

	enum dma_transfer_direction dir;
	size_t length;
	struct bam_desc_hw desc[0];
};

#define BAM_CTRL			0x0000
#define BAM_REVISION			0x0004
#define BAM_SW_REVISION			0x0080
#define BAM_NUM_PIPES			0x003C
#define BAM_TIMER			0x0040
#define BAM_TIMER_CTRL			0x0044
#define BAM_DESC_CNT_TRSHLD		0x0008
#define BAM_IRQ_SRCS			0x000C
#define BAM_IRQ_SRCS_MSK		0x0010
#define BAM_IRQ_SRCS_UNMASKED		0x0030
#define BAM_IRQ_STTS			0x0014
#define BAM_IRQ_CLR			0x0018
#define BAM_IRQ_EN			0x001C
#define BAM_CNFG_BITS			0x007C
#define BAM_IRQ_SRCS_EE(ee)		(0x0800 + ((ee) * 0x80))
#define BAM_IRQ_SRCS_MSK_EE(ee)		(0x0804 + ((ee) * 0x80))
#define BAM_P_CTRL(pipe)		(0x1000 + ((pipe) * 0x1000))
#define BAM_P_RST(pipe)			(0x1004 + ((pipe) * 0x1000))
#define BAM_P_HALT(pipe)		(0x1008 + ((pipe) * 0x1000))
#define BAM_P_IRQ_STTS(pipe)		(0x1010 + ((pipe) * 0x1000))
#define BAM_P_IRQ_CLR(pipe)		(0x1014 + ((pipe) * 0x1000))
#define BAM_P_IRQ_EN(pipe)		(0x1018 + ((pipe) * 0x1000))
#define BAM_P_EVNT_DEST_ADDR(pipe)	(0x182C + ((pipe) * 0x1000))
#define BAM_P_EVNT_REG(pipe)		(0x1818 + ((pipe) * 0x1000))
#define BAM_P_SW_OFSTS(pipe)		(0x1800 + ((pipe) * 0x1000))
#define BAM_P_DATA_FIFO_ADDR(pipe)	(0x1824 + ((pipe) * 0x1000))
#define BAM_P_DESC_FIFO_ADDR(pipe)	(0x181C + ((pipe) * 0x1000))
#define BAM_P_EVNT_TRSHLD(pipe)		(0x1828 + ((pipe) * 0x1000))
#define BAM_P_FIFO_SIZES(pipe)		(0x1820 + ((pipe) * 0x1000))

/* BAM CTRL */
#define BAM_SW_RST			BIT(0)
#define BAM_EN				BIT(1)
#define BAM_EN_ACCUM			BIT(4)
#define BAM_TESTBUS_SEL_SHIFT		5
#define BAM_TESTBUS_SEL_MASK		0x3F
#define BAM_DESC_CACHE_SEL_SHIFT	13
#define BAM_DESC_CACHE_SEL_MASK		0x3
#define BAM_CACHED_DESC_STORE		BIT(15)
#define IBC_DISABLE			BIT(16)

/* BAM REVISION */
#define REVISION_SHIFT		0
#define REVISION_MASK		0xFF
#define NUM_EES_SHIFT		8
#define NUM_EES_MASK		0xF
#define CE_BUFFER_SIZE		BIT(13)
#define AXI_ACTIVE		BIT(14)
#define USE_VMIDMT		BIT(15)
#define SECURED			BIT(16)
#define BAM_HAS_NO_BYPASS	BIT(17)
#define HIGH_FREQUENCY_BAM	BIT(18)
#define INACTIV_TMRS_EXST	BIT(19)
#define NUM_INACTIV_TMRS	BIT(20)
#define DESC_CACHE_DEPTH_SHIFT	21
#define DESC_CACHE_DEPTH_1	(0 << DESC_CACHE_DEPTH_SHIFT)
#define DESC_CACHE_DEPTH_2	(1 << DESC_CACHE_DEPTH_SHIFT)
#define DESC_CACHE_DEPTH_3	(2 << DESC_CACHE_DEPTH_SHIFT)
#define DESC_CACHE_DEPTH_4	(3 << DESC_CACHE_DEPTH_SHIFT)
#define CMD_DESC_EN		BIT(23)
#define INACTIV_TMR_BASE_SHIFT	24
#define INACTIV_TMR_BASE_MASK	0xFF

/* BAM NUM PIPES */
#define BAM_NUM_PIPES_SHIFT		0
#define BAM_NUM_PIPES_MASK		0xFF
#define PERIPH_NON_PIPE_GRP_SHIFT	16
#define PERIPH_NON_PIP_GRP_MASK		0xFF
#define BAM_NON_PIPE_GRP_SHIFT		24
#define BAM_NON_PIPE_GRP_MASK		0xFF

/* BAM CNFG BITS */
#define BAM_PIPE_CNFG		BIT(2)
#define BAM_FULL_PIPE		BIT(11)
#define BAM_NO_EXT_P_RST	BIT(12)
#define BAM_IBC_DISABLE		BIT(13)
#define BAM_SB_CLK_REQ		BIT(14)
#define BAM_PSM_CSW_REQ		BIT(15)
#define BAM_PSM_P_RES		BIT(16)
#define BAM_AU_P_RES		BIT(17)
#define BAM_SI_P_RES		BIT(18)
#define BAM_WB_P_RES		BIT(19)
#define BAM_WB_BLK_CSW		BIT(20)
#define BAM_WB_CSW_ACK_IDL	BIT(21)
#define BAM_WB_RETR_SVPNT	BIT(22)
#define BAM_WB_DSC_AVL_P_RST	BIT(23)
#define BAM_REG_P_EN		BIT(24)
#define BAM_PSM_P_HD_DATA	BIT(25)
#define BAM_AU_ACCUMED		BIT(26)
#define BAM_CMD_ENABLE		BIT(27)

#define BAM_CNFG_BITS_DEFAULT	(BAM_PIPE_CNFG |	\
				 BAM_NO_EXT_P_RST |	\
				 BAM_IBC_DISABLE |	\
				 BAM_SB_CLK_REQ |	\
				 BAM_PSM_CSW_REQ |	\
				 BAM_PSM_P_RES |	\
				 BAM_AU_P_RES |		\
				 BAM_SI_P_RES |		\
				 BAM_WB_P_RES |		\
				 BAM_WB_BLK_CSW |	\
				 BAM_WB_CSW_ACK_IDL |	\
				 BAM_WB_RETR_SVPNT |	\
				 BAM_WB_DSC_AVL_P_RST |	\
				 BAM_REG_P_EN |		\
				 BAM_PSM_P_HD_DATA |	\
				 BAM_AU_ACCUMED |	\
				 BAM_CMD_ENABLE)

/* PIPE CTRL */
#define P_EN			BIT(1)
#define P_DIRECTION		BIT(3)
#define P_SYS_STRM		BIT(4)
#define P_SYS_MODE		BIT(5)
#define P_AUTO_EOB		BIT(6)
#define P_AUTO_EOB_SEL_SHIFT	7
#define P_AUTO_EOB_SEL_512	(0 << P_AUTO_EOB_SEL_SHIFT)
#define P_AUTO_EOB_SEL_256	(1 << P_AUTO_EOB_SEL_SHIFT)
#define P_AUTO_EOB_SEL_128	(2 << P_AUTO_EOB_SEL_SHIFT)
#define P_AUTO_EOB_SEL_64	(3 << P_AUTO_EOB_SEL_SHIFT)
#define P_PREFETCH_LIMIT_SHIFT	9
#define P_PREFETCH_LIMIT_32	(0 << P_PREFETCH_LIMIT_SHIFT)
#define P_PREFETCH_LIMIT_16	(1 << P_PREFETCH_LIMIT_SHIFT)
#define P_PREFETCH_LIMIT_4	(2 << P_PREFETCH_LIMIT_SHIFT)
#define P_WRITE_NWD		BIT(11)
#define P_LOCK_GROUP_SHIFT	16
#define P_LOCK_GROUP_MASK	0x1F

/* BAM_DESC_CNT_TRSHLD */
#define CNT_TRSHLD		0xffff
#define DEFAULT_CNT_THRSHLD	0x4

/* BAM_IRQ_SRCS */
#define BAM_IRQ			BIT(31)
#define P_IRQ			0x7fffffff

/* BAM_IRQ_SRCS_MSK */
#define BAM_IRQ_MSK		BAM_IRQ
#define P_IRQ_MSK		P_IRQ

/* BAM_IRQ_STTS */
#define BAM_TIMER_IRQ		BIT(4)
#define BAM_EMPTY_IRQ		BIT(3)
#define BAM_ERROR_IRQ		BIT(2)
#define BAM_HRESP_ERR_IRQ	BIT(1)

/* BAM_IRQ_CLR */
#define BAM_TIMER_CLR		BIT(4)
#define BAM_EMPTY_CLR		BIT(3)
#define BAM_ERROR_CLR		BIT(2)
#define BAM_HRESP_ERR_CLR	BIT(1)

/* BAM_IRQ_EN */
#define BAM_TIMER_EN		BIT(4)
#define BAM_EMPTY_EN		BIT(3)
#define BAM_ERROR_EN		BIT(2)
#define BAM_HRESP_ERR_EN	BIT(1)

/* BAM_P_IRQ_EN */
#define P_PRCSD_DESC_EN		BIT(0)
#define P_TIMER_EN		BIT(1)
#define P_WAKE_EN		BIT(2)
#define P_OUT_OF_DESC_EN	BIT(3)
#define P_ERR_EN		BIT(4)
#define P_TRNSFR_END_EN		BIT(5)
#define P_DEFAULT_IRQS_EN	(P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)

/* BAM_P_SW_OFSTS */
#define P_SW_OFSTS_MASK		0xffff

#define BAM_DESC_FIFO_SIZE	SZ_32K
#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
#define BAM_MAX_DATA_SIZE	(SZ_32K - 8)

struct bam_chan {
	struct virt_dma_chan vc;

	struct bam_device *bdev;

	/* configuration from device tree */
	u32 id;

	struct bam_async_desc *curr_txd;	/* current running dma */

	/* runtime configuration */
	struct dma_slave_config slave;

	/* fifo storage */
	struct bam_desc_hw *fifo_virt;
	dma_addr_t fifo_phys;

	/* fifo markers */
	unsigned short head;		/* start of active descriptor entries */
	unsigned short tail;		/* end of active descriptor entries */

	unsigned int initialized;	/* is the channel hw initialized? */
	unsigned int paused;		/* is the channel paused? */
	unsigned int reconfigure;	/* new slave config? */

	struct list_head node;
};

static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
{
	return container_of(common, struct bam_chan, vc.chan);
}

struct bam_device {
	void __iomem *regs;
	struct device *dev;
	struct dma_device common;
	struct device_dma_parameters dma_parms;
	struct bam_chan *channels;
	u32 num_channels;

	/* execution environment ID, from DT */
	u32 ee;

	struct clk *bamclk;
	int irq;

	/* dma start transaction tasklet */
	struct tasklet_struct task;
};

/**
 * bam_reset_channel - Reset individual BAM DMA channel
 * @bchan: bam channel
 *
 * This function resets a specific BAM channel
 */
static void bam_reset_channel(struct bam_chan *bchan)
{
	struct bam_device *bdev = bchan->bdev;

	lockdep_assert_held(&bchan->vc.lock);

	/* reset channel */
	writel_relaxed(1, bdev->regs + BAM_P_RST(bchan->id));
	writel_relaxed(0, bdev->regs + BAM_P_RST(bchan->id));

	/* don't allow cpu to reorder BAM register accesses done after this */
	wmb();

	/* make sure hw is initialized when channel is used the first time  */
	bchan->initialized = 0;
}

/**
 * bam_chan_init_hw - Initialize channel hardware
 * @bchan: bam channel
 *
 * This function resets and initializes the BAM channel
 */
static void bam_chan_init_hw(struct bam_chan *bchan,
	enum dma_transfer_direction dir)
{
	struct bam_device *bdev = bchan->bdev;
	u32 val;

	/* Reset the channel to clear internal state of the FIFO */
	bam_reset_channel(bchan);

	/*
	 * write out 8 byte aligned address.  We have enough space for this
	 * because we allocated 1 more descriptor (8 bytes) than we can use
	 */
	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
			bdev->regs + BAM_P_DESC_FIFO_ADDR(bchan->id));
	writel_relaxed(BAM_DESC_FIFO_SIZE, bdev->regs +
			BAM_P_FIFO_SIZES(bchan->id));

	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
	writel_relaxed(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));

	/* unmask the specific pipe and EE combo */
	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
	val |= BIT(bchan->id);
	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));

	/* don't allow cpu to reorder the channel enable done below */
	wmb();

	/* set fixed direction and mode, then enable channel */
	val = P_EN | P_SYS_MODE;
	if (dir == DMA_DEV_TO_MEM)
		val |= P_DIRECTION;

	writel_relaxed(val, bdev->regs + BAM_P_CTRL(bchan->id));

	bchan->initialized = 1;

	/* init FIFO pointers */
	bchan->head = 0;
	bchan->tail = 0;
}

/**
 * bam_alloc_chan - Allocate channel resources for DMA channel.
 * @chan: specified channel
 *
 * This function allocates the FIFO descriptor memory
 */
static int bam_alloc_chan(struct dma_chan *chan)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	struct bam_device *bdev = bchan->bdev;

	if (bchan->fifo_virt)
		return 0;

	/* allocate FIFO descriptor space, but only if necessary */
	bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
				&bchan->fifo_phys, GFP_KERNEL);

	if (!bchan->fifo_virt) {
		dev_err(bdev->dev, "Failed to allocate desc fifo\n");
		return -ENOMEM;
	}

	return 0;
}

/**
 * bam_free_chan - Frees dma resources associated with specific channel
 * @chan: specified channel
 *
 * Free the allocated fifo descriptor memory and channel resources
 *
 */
static void bam_free_chan(struct dma_chan *chan)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	struct bam_device *bdev = bchan->bdev;
	u32 val;
	unsigned long flags;

	vchan_free_chan_resources(to_virt_chan(chan));

	if (bchan->curr_txd) {
		dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
		return;
	}

	spin_lock_irqsave(&bchan->vc.lock, flags);
	bam_reset_channel(bchan);
	spin_unlock_irqrestore(&bchan->vc.lock, flags);

	dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
				bchan->fifo_phys);
	bchan->fifo_virt = NULL;

	/* mask irq for pipe/channel */
	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
	val &= ~BIT(bchan->id);
	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));

	/* disable irq */
	writel_relaxed(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
}

/**
 * bam_slave_config - set slave configuration for channel
 * @chan: dma channel
 * @cfg: slave configuration
 *
 * Sets slave configuration for channel
 *
 */
static void bam_slave_config(struct bam_chan *bchan,
		struct dma_slave_config *cfg)
{
	memcpy(&bchan->slave, cfg, sizeof(*cfg));
	bchan->reconfigure = 1;
}

/**
 * bam_prep_slave_sg - Prep slave sg transaction
 *
 * @chan: dma channel
 * @sgl: scatter gather list
 * @sg_len: length of sg
 * @direction: DMA transfer direction
 * @flags: DMA flags
 * @context: transfer context (unused)
 */
static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
	struct scatterlist *sgl, unsigned int sg_len,
	enum dma_transfer_direction direction, unsigned long flags,
	void *context)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	struct bam_device *bdev = bchan->bdev;
	struct bam_async_desc *async_desc;
	struct scatterlist *sg;
	u32 i;
	struct bam_desc_hw *desc;
	unsigned int num_alloc = 0;


	if (!is_slave_direction(direction)) {
		dev_err(bdev->dev, "invalid dma direction\n");
		return NULL;
	}

	/* calculate number of required entries */
	for_each_sg(sgl, sg, sg_len, i)
		num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE);

	/* allocate enough room to accomodate the number of entries */
	async_desc = kzalloc(sizeof(*async_desc) +
			(num_alloc * sizeof(struct bam_desc_hw)), GFP_NOWAIT);

	if (!async_desc)
		goto err_out;

	if (flags & DMA_PREP_FENCE)
		async_desc->flags |= DESC_FLAG_NWD;

	if (flags & DMA_PREP_INTERRUPT)
		async_desc->flags |= DESC_FLAG_EOT;
	else
		async_desc->flags |= DESC_FLAG_INT;

	async_desc->num_desc = num_alloc;
	async_desc->curr_desc = async_desc->desc;
	async_desc->dir = direction;

	/* fill in temporary descriptors */
	desc = async_desc->desc;
	for_each_sg(sgl, sg, sg_len, i) {
		unsigned int remainder = sg_dma_len(sg);
		unsigned int curr_offset = 0;

		do {
			desc->addr = sg_dma_address(sg) + curr_offset;

			if (remainder > BAM_MAX_DATA_SIZE) {
				desc->size = BAM_MAX_DATA_SIZE;
				remainder -= BAM_MAX_DATA_SIZE;
				curr_offset += BAM_MAX_DATA_SIZE;
			} else {
				desc->size = remainder;
				remainder = 0;
			}

			async_desc->length += desc->size;
			desc++;
		} while (remainder > 0);
	}

	return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);

err_out:
	kfree(async_desc);
	return NULL;
}

/**
 * bam_dma_terminate_all - terminate all transactions on a channel
 * @bchan: bam dma channel
 *
 * Dequeues and frees all transactions
 * No callbacks are done
 *
 */
static void bam_dma_terminate_all(struct bam_chan *bchan)
{
	unsigned long flag;
	LIST_HEAD(head);

	/* remove all transactions, including active transaction */
	spin_lock_irqsave(&bchan->vc.lock, flag);
	if (bchan->curr_txd) {
		list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
		bchan->curr_txd = NULL;
	}

	vchan_get_all_descriptors(&bchan->vc, &head);
	spin_unlock_irqrestore(&bchan->vc.lock, flag);

	vchan_dma_desc_free_list(&bchan->vc, &head);
}

/**
 * bam_control - DMA device control
 * @chan: dma channel
 * @cmd: control cmd
 * @arg: cmd argument
 *
 * Perform DMA control command
 *
 */
static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
	unsigned long arg)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	struct bam_device *bdev = bchan->bdev;
	int ret = 0;
	unsigned long flag;

	switch (cmd) {
	case DMA_PAUSE:
		spin_lock_irqsave(&bchan->vc.lock, flag);
		writel_relaxed(1, bdev->regs + BAM_P_HALT(bchan->id));
		bchan->paused = 1;
		spin_unlock_irqrestore(&bchan->vc.lock, flag);
		break;

	case DMA_RESUME:
		spin_lock_irqsave(&bchan->vc.lock, flag);
		writel_relaxed(0, bdev->regs + BAM_P_HALT(bchan->id));
		bchan->paused = 0;
		spin_unlock_irqrestore(&bchan->vc.lock, flag);
		break;

	case DMA_TERMINATE_ALL:
		bam_dma_terminate_all(bchan);
		break;

	case DMA_SLAVE_CONFIG:
		spin_lock_irqsave(&bchan->vc.lock, flag);
		bam_slave_config(bchan, (struct dma_slave_config *)arg);
		spin_unlock_irqrestore(&bchan->vc.lock, flag);
		break;

	default:
		ret = -ENXIO;
		break;
	}

	return ret;
}

/**
 * process_channel_irqs - processes the channel interrupts
 * @bdev: bam controller
 *
 * This function processes the channel interrupts
 *
 */
static u32 process_channel_irqs(struct bam_device *bdev)
{
	u32 i, srcs, pipe_stts;
	unsigned long flags;
	struct bam_async_desc *async_desc;

	srcs = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_EE(bdev->ee));

	/* return early if no pipe/channel interrupts are present */
	if (!(srcs & P_IRQ))
		return srcs;

	for (i = 0; i < bdev->num_channels; i++) {
		struct bam_chan *bchan = &bdev->channels[i];

		if (!(srcs & BIT(i)))
			continue;

		/* clear pipe irq */
		pipe_stts = readl_relaxed(bdev->regs +
			BAM_P_IRQ_STTS(i));

		writel_relaxed(pipe_stts, bdev->regs +
				BAM_P_IRQ_CLR(i));

		spin_lock_irqsave(&bchan->vc.lock, flags);
		async_desc = bchan->curr_txd;

		if (async_desc) {
			async_desc->num_desc -= async_desc->xfer_len;
			async_desc->curr_desc += async_desc->xfer_len;
			bchan->curr_txd = NULL;

			/* manage FIFO */
			bchan->head += async_desc->xfer_len;
			bchan->head %= MAX_DESCRIPTORS;

			/*
			 * if complete, process cookie.  Otherwise
			 * push back to front of desc_issued so that
			 * it gets restarted by the tasklet
			 */
			if (!async_desc->num_desc)
				vchan_cookie_complete(&async_desc->vd);
			else
				list_add(&async_desc->vd.node,
					&bchan->vc.desc_issued);
		}

		spin_unlock_irqrestore(&bchan->vc.lock, flags);
	}

	return srcs;
}

/**
 * bam_dma_irq - irq handler for bam controller
 * @irq: IRQ of interrupt
 * @data: callback data
 *
 * IRQ handler for the bam controller
 */
static irqreturn_t bam_dma_irq(int irq, void *data)
{
	struct bam_device *bdev = data;
	u32 clr_mask = 0, srcs = 0;

	srcs |= process_channel_irqs(bdev);

	/* kick off tasklet to start next dma transfer */
	if (srcs & P_IRQ)
		tasklet_schedule(&bdev->task);

	if (srcs & BAM_IRQ)
		clr_mask = readl_relaxed(bdev->regs + BAM_IRQ_STTS);

	/* don't allow reorder of the various accesses to the BAM registers */
	mb();

	writel_relaxed(clr_mask, bdev->regs + BAM_IRQ_CLR);

	return IRQ_HANDLED;
}

/**
 * bam_tx_status - returns status of transaction
 * @chan: dma channel
 * @cookie: transaction cookie
 * @txstate: DMA transaction state
 *
 * Return status of dma transaction
 */
static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
		struct dma_tx_state *txstate)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	struct virt_dma_desc *vd;
	int ret;
	size_t residue = 0;
	unsigned int i;
	unsigned long flags;

	ret = dma_cookie_status(chan, cookie, txstate);
	if (ret == DMA_COMPLETE)
		return ret;

	if (!txstate)
		return bchan->paused ? DMA_PAUSED : ret;

	spin_lock_irqsave(&bchan->vc.lock, flags);
	vd = vchan_find_desc(&bchan->vc, cookie);
	if (vd)
		residue = container_of(vd, struct bam_async_desc, vd)->length;
	else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
		for (i = 0; i < bchan->curr_txd->num_desc; i++)
			residue += bchan->curr_txd->curr_desc[i].size;

	spin_unlock_irqrestore(&bchan->vc.lock, flags);

	dma_set_residue(txstate, residue);

	if (ret == DMA_IN_PROGRESS && bchan->paused)
		ret = DMA_PAUSED;

	return ret;
}

/**
 * bam_apply_new_config
 * @bchan: bam dma channel
 * @dir: DMA direction
 */
static void bam_apply_new_config(struct bam_chan *bchan,
	enum dma_transfer_direction dir)
{
	struct bam_device *bdev = bchan->bdev;
	u32 maxburst;

	if (dir == DMA_DEV_TO_MEM)
		maxburst = bchan->slave.src_maxburst;
	else
		maxburst = bchan->slave.dst_maxburst;

	writel_relaxed(maxburst, bdev->regs + BAM_DESC_CNT_TRSHLD);

	bchan->reconfigure = 0;
}

/**
 * bam_start_dma - start next transaction
 * @bchan - bam dma channel
 */
static void bam_start_dma(struct bam_chan *bchan)
{
	struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
	struct bam_device *bdev = bchan->bdev;
	struct bam_async_desc *async_desc;
	struct bam_desc_hw *desc;
	struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
					sizeof(struct bam_desc_hw));

	lockdep_assert_held(&bchan->vc.lock);

	if (!vd)
		return;

	list_del(&vd->node);

	async_desc = container_of(vd, struct bam_async_desc, vd);
	bchan->curr_txd = async_desc;

	/* on first use, initialize the channel hardware */
	if (!bchan->initialized)
		bam_chan_init_hw(bchan, async_desc->dir);

	/* apply new slave config changes, if necessary */
	if (bchan->reconfigure)
		bam_apply_new_config(bchan, async_desc->dir);

	desc = bchan->curr_txd->curr_desc;

	if (async_desc->num_desc > MAX_DESCRIPTORS)
		async_desc->xfer_len = MAX_DESCRIPTORS;
	else
		async_desc->xfer_len = async_desc->num_desc;

	/* set any special flags on the last descriptor */
	if (async_desc->num_desc == async_desc->xfer_len)
		desc[async_desc->xfer_len - 1].flags = async_desc->flags;
	else
		desc[async_desc->xfer_len - 1].flags |= DESC_FLAG_INT;

	if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
		u32 partial = MAX_DESCRIPTORS - bchan->tail;

		memcpy(&fifo[bchan->tail], desc,
				partial * sizeof(struct bam_desc_hw));
		memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
				sizeof(struct bam_desc_hw));
	} else {
		memcpy(&fifo[bchan->tail], desc,
			async_desc->xfer_len * sizeof(struct bam_desc_hw));
	}

	bchan->tail += async_desc->xfer_len;
	bchan->tail %= MAX_DESCRIPTORS;

	/* ensure descriptor writes and dma start not reordered */
	wmb();
	writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
			bdev->regs + BAM_P_EVNT_REG(bchan->id));
}

/**
 * dma_tasklet - DMA IRQ tasklet
 * @data: tasklet argument (bam controller structure)
 *
 * Sets up next DMA operation and then processes all completed transactions
 */
static void dma_tasklet(unsigned long data)
{
	struct bam_device *bdev = (struct bam_device *)data;
	struct bam_chan *bchan;
	unsigned long flags;
	unsigned int i;

	/* go through the channels and kick off transactions */
	for (i = 0; i < bdev->num_channels; i++) {
		bchan = &bdev->channels[i];
		spin_lock_irqsave(&bchan->vc.lock, flags);

		if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
			bam_start_dma(bchan);
		spin_unlock_irqrestore(&bchan->vc.lock, flags);
	}
}

/**
 * bam_issue_pending - starts pending transactions
 * @chan: dma channel
 *
 * Calls tasklet directly which in turn starts any pending transactions
 */
static void bam_issue_pending(struct dma_chan *chan)
{
	struct bam_chan *bchan = to_bam_chan(chan);
	unsigned long flags;

	spin_lock_irqsave(&bchan->vc.lock, flags);

	/* if work pending and idle, start a transaction */
	if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
		bam_start_dma(bchan);

	spin_unlock_irqrestore(&bchan->vc.lock, flags);
}

/**
 * bam_dma_free_desc - free descriptor memory
 * @vd: virtual descriptor
 *
 */
static void bam_dma_free_desc(struct virt_dma_desc *vd)
{
	struct bam_async_desc *async_desc = container_of(vd,
			struct bam_async_desc, vd);

	kfree(async_desc);
}

static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
		struct of_dma *of)
{
	struct bam_device *bdev = container_of(of->of_dma_data,
					struct bam_device, common);
	unsigned int request;

	if (dma_spec->args_count != 1)
		return NULL;

	request = dma_spec->args[0];
	if (request >= bdev->num_channels)
		return NULL;

	return dma_get_slave_channel(&(bdev->channels[request].vc.chan));
}

/**
 * bam_init
 * @bdev: bam device
 *
 * Initialization helper for global bam registers
 */
static int bam_init(struct bam_device *bdev)
{
	u32 val;

	/* read revision and configuration information */
	val = readl_relaxed(bdev->regs + BAM_REVISION) >> NUM_EES_SHIFT;
	val &= NUM_EES_MASK;

	/* check that configured EE is within range */
	if (bdev->ee >= val)
		return -EINVAL;

	val = readl_relaxed(bdev->regs + BAM_NUM_PIPES);
	bdev->num_channels = val & BAM_NUM_PIPES_MASK;

	/* s/w reset bam */
	/* after reset all pipes are disabled and idle */
	val = readl_relaxed(bdev->regs + BAM_CTRL);
	val |= BAM_SW_RST;
	writel_relaxed(val, bdev->regs + BAM_CTRL);
	val &= ~BAM_SW_RST;
	writel_relaxed(val, bdev->regs + BAM_CTRL);

	/* make sure previous stores are visible before enabling BAM */
	wmb();

	/* enable bam */
	val |= BAM_EN;
	writel_relaxed(val, bdev->regs + BAM_CTRL);

	/* set descriptor threshhold, start with 4 bytes */
	writel_relaxed(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);

	/* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bdev->regs + BAM_CNFG_BITS);

	/* enable irqs for errors */
	writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
				bdev->regs + BAM_IRQ_EN);

	/* unmask global bam interrupt */
	writel_relaxed(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));

	return 0;
}

static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
	u32 index)
{
	bchan->id = index;
	bchan->bdev = bdev;

	vchan_init(&bchan->vc, &bdev->common);
	bchan->vc.desc_free = bam_dma_free_desc;
}

static int bam_dma_probe(struct platform_device *pdev)
{
	struct bam_device *bdev;
	struct resource *iores;
	int ret, i;

	bdev = devm_kzalloc(&pdev->dev, sizeof(*bdev), GFP_KERNEL);
	if (!bdev)
		return -ENOMEM;

	bdev->dev = &pdev->dev;

	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
	if (IS_ERR(bdev->regs))
		return PTR_ERR(bdev->regs);

	bdev->irq = platform_get_irq(pdev, 0);
	if (bdev->irq < 0)
		return bdev->irq;

	ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &bdev->ee);
	if (ret) {
		dev_err(bdev->dev, "Execution environment unspecified\n");
		return ret;
	}

	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
	if (IS_ERR(bdev->bamclk))
		return PTR_ERR(bdev->bamclk);

	ret = clk_prepare_enable(bdev->bamclk);
	if (ret) {
		dev_err(bdev->dev, "failed to prepare/enable clock\n");
		return ret;
	}

	ret = bam_init(bdev);
	if (ret)
		goto err_disable_clk;

	tasklet_init(&bdev->task, dma_tasklet, (unsigned long)bdev);

	bdev->channels = devm_kcalloc(bdev->dev, bdev->num_channels,
				sizeof(*bdev->channels), GFP_KERNEL);

	if (!bdev->channels) {
		ret = -ENOMEM;
		goto err_disable_clk;
	}

	/* allocate and initialize channels */
	INIT_LIST_HEAD(&bdev->common.channels);

	for (i = 0; i < bdev->num_channels; i++)
		bam_channel_init(bdev, &bdev->channels[i], i);

	ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
			IRQF_TRIGGER_HIGH, "bam_dma", bdev);
	if (ret)
		goto err_disable_clk;

	/* set max dma segment size */
	bdev->common.dev = bdev->dev;
	bdev->common.dev->dma_parms = &bdev->dma_parms;
	ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE);
	if (ret) {
		dev_err(bdev->dev, "cannot set maximum segment size\n");
		goto err_disable_clk;
	}

	platform_set_drvdata(pdev, bdev);

	/* set capabilities */
	dma_cap_zero(bdev->common.cap_mask);
	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);

	/* initialize dmaengine apis */
	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
	bdev->common.device_free_chan_resources = bam_free_chan;
	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
	bdev->common.device_control = bam_control;
	bdev->common.device_issue_pending = bam_issue_pending;
	bdev->common.device_tx_status = bam_tx_status;
	bdev->common.dev = bdev->dev;

	ret = dma_async_device_register(&bdev->common);
	if (ret) {
		dev_err(bdev->dev, "failed to register dma async device\n");
		goto err_disable_clk;
	}

	ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
					&bdev->common);
	if (ret)
		goto err_unregister_dma;

	return 0;

err_unregister_dma:
	dma_async_device_unregister(&bdev->common);
err_disable_clk:
	clk_disable_unprepare(bdev->bamclk);
	return ret;
}

static int bam_dma_remove(struct platform_device *pdev)
{
	struct bam_device *bdev = platform_get_drvdata(pdev);
	u32 i;

	of_dma_controller_free(pdev->dev.of_node);
	dma_async_device_unregister(&bdev->common);

	/* mask all interrupts for this execution environment */
	writel_relaxed(0, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));

	devm_free_irq(bdev->dev, bdev->irq, bdev);

	for (i = 0; i < bdev->num_channels; i++) {
		bam_dma_terminate_all(&bdev->channels[i]);
		tasklet_kill(&bdev->channels[i].vc.task);

		dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
			bdev->channels[i].fifo_virt,
			bdev->channels[i].fifo_phys);
	}

	tasklet_kill(&bdev->task);

	clk_disable_unprepare(bdev->bamclk);

	return 0;
}

static const struct of_device_id bam_of_match[] = {
	{ .compatible = "qcom,bam-v1.4.0", },
	{}
};
MODULE_DEVICE_TABLE(of, bam_of_match);

static struct platform_driver bam_dma_driver = {
	.probe = bam_dma_probe,
	.remove = bam_dma_remove,
	.driver = {
		.name = "bam-dma-engine",
		.owner = THIS_MODULE,
		.of_match_table = bam_of_match,
	},
};

module_platform_driver(bam_dma_driver);

MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
MODULE_DESCRIPTION("QCOM BAM DMA engine driver");
MODULE_LICENSE("GPL v2");