/***********************license start***************
 * Author: Cavium Networks
 *
 * Contact: support@caviumnetworks.com
 * This file is part of the OCTEON SDK
 *
 * Copyright (c) 2003-2008 Cavium Networks
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, Version 2, as
 * published by the Free Software Foundation.
 *
 * This file is distributed in the hope that it will be useful, but
 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
 * NONINFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this file; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 * or visit http://www.gnu.org/licenses/.
 *
 * This file may also be available under a different license from Cavium.
 * Contact Cavium Networks for more information
 ***********************license end**************************************/

/**
 *
 * Interface to the hardware Packet Output unit.
 *
 * Starting with SDK 1.7.0, the PKO output functions now support
 * two types of locking. CVMX_PKO_LOCK_ATOMIC_TAG continues to
 * function similarly to previous SDKs by using POW atomic tags
 * to preserve ordering and exclusivity. As a new option, you
 * can now pass CVMX_PKO_LOCK_CMD_QUEUE which uses a ll/sc
 * memory based locking instead. This locking has the advantage
 * of not affecting the tag state but doesn't preserve packet
 * ordering. CVMX_PKO_LOCK_CMD_QUEUE is appropriate in most
 * generic code while CVMX_PKO_LOCK_CMD_QUEUE should be used
 * with hand tuned fast path code.
 *
 * Some of other SDK differences visible to the command command
 * queuing:
 * - PKO indexes are no longer stored in the FAU. A large
 *   percentage of the FAU register block used to be tied up
 *   maintaining PKO queue pointers. These are now stored in a
 *   global named block.
 * - The PKO <b>use_locking</b> parameter can now have a global
 *   effect. Since all application use the same named block,
 *   queue locking correctly applies across all operating
 *   systems when using CVMX_PKO_LOCK_CMD_QUEUE.
 * - PKO 3 word commands are now supported. Use
 *   cvmx_pko_send_packet_finish3().
 *
 */

#ifndef __CVMX_PKO_H__
#define __CVMX_PKO_H__

#include "cvmx-fpa.h"
#include "cvmx-pow.h"
#include "cvmx-cmd-queue.h"
#include "cvmx-pko-defs.h"

/* Adjust the command buffer size by 1 word so that in the case of using only
 * two word PKO commands no command words stradle buffers.  The useful values
 * for this are 0 and 1. */
#define CVMX_PKO_COMMAND_BUFFER_SIZE_ADJUST (1)

#define CVMX_PKO_MAX_OUTPUT_QUEUES_STATIC 256
#define CVMX_PKO_MAX_OUTPUT_QUEUES      ((OCTEON_IS_MODEL(OCTEON_CN31XX) || \
	OCTEON_IS_MODEL(OCTEON_CN3010) || OCTEON_IS_MODEL(OCTEON_CN3005) || \
	OCTEON_IS_MODEL(OCTEON_CN50XX)) ? 32 : \
		(OCTEON_IS_MODEL(OCTEON_CN58XX) || \
		OCTEON_IS_MODEL(OCTEON_CN56XX)) ? 256 : 128)
#define CVMX_PKO_NUM_OUTPUT_PORTS       40
/* use this for queues that are not used */
#define CVMX_PKO_MEM_QUEUE_PTRS_ILLEGAL_PID 63
#define CVMX_PKO_QUEUE_STATIC_PRIORITY  9
#define CVMX_PKO_ILLEGAL_QUEUE  0xFFFF
#define CVMX_PKO_MAX_QUEUE_DEPTH 0

typedef enum {
	CVMX_PKO_SUCCESS,
	CVMX_PKO_INVALID_PORT,
	CVMX_PKO_INVALID_QUEUE,
	CVMX_PKO_INVALID_PRIORITY,
	CVMX_PKO_NO_MEMORY,
	CVMX_PKO_PORT_ALREADY_SETUP,
	CVMX_PKO_CMD_QUEUE_INIT_ERROR
} cvmx_pko_status_t;

/**
 * This enumeration represents the differnet locking modes supported by PKO.
 */
typedef enum {
	/*
	 * PKO doesn't do any locking. It is the responsibility of the
	 * application to make sure that no other core is accessing
	 * the same queue at the same time
	 */
	CVMX_PKO_LOCK_NONE = 0,
	/*
	 * PKO performs an atomic tagswitch to insure exclusive access
	 * to the output queue. This will maintain packet ordering on
	 * output.
	 */
	CVMX_PKO_LOCK_ATOMIC_TAG = 1,
	/*
	 * PKO uses the common command queue locks to insure exclusive
	 * access to the output queue. This is a memory based
	 * ll/sc. This is the most portable locking mechanism.
	 */
	CVMX_PKO_LOCK_CMD_QUEUE = 2,
} cvmx_pko_lock_t;

typedef struct {
	uint32_t packets;
	uint64_t octets;
	uint64_t doorbell;
} cvmx_pko_port_status_t;

/**
 * This structure defines the address to use on a packet enqueue
 */
typedef union {
	uint64_t u64;
	struct {
		/* Must CVMX_IO_SEG */
		uint64_t mem_space:2;
		/* Must be zero */
		uint64_t reserved:13;
		/* Must be one */
		uint64_t is_io:1;
		/* The ID of the device on the non-coherent bus */
		uint64_t did:8;
		/* Must be zero */
		uint64_t reserved2:4;
		/* Must be zero */
		uint64_t reserved3:18;
		/*
		 * The hardware likes to have the output port in
		 * addition to the output queue,
		 */
		uint64_t port:6;
		/*
		 * The output queue to send the packet to (0-127 are
		 * legal)
		 */
		uint64_t queue:9;
		/* Must be zero */
		uint64_t reserved4:3;
	} s;
} cvmx_pko_doorbell_address_t;

/**
 * Structure of the first packet output command word.
 */
typedef union {
	uint64_t u64;
	struct {
		/*
		 * The size of the reg1 operation - could be 8, 16,
		 * 32, or 64 bits.
		 */
		uint64_t size1:2;
		/*
		 * The size of the reg0 operation - could be 8, 16,
		 * 32, or 64 bits.
		 */
		uint64_t size0:2;
		/*
		 * If set, subtract 1, if clear, subtract packet
		 * size.
		 */
		uint64_t subone1:1;
		/*
		 * The register, subtract will be done if reg1 is
		 * non-zero.
		 */
		uint64_t reg1:11;
		/* If set, subtract 1, if clear, subtract packet size */
		uint64_t subone0:1;
		/* The register, subtract will be done if reg0 is non-zero */
		uint64_t reg0:11;
		/*
		 * When set, interpret segment pointer and segment
		 * bytes in little endian order.
		 */
		uint64_t le:1;
		/*
		 * When set, packet data not allocated in L2 cache by
		 * PKO.
		 */
		uint64_t n2:1;
		/*
		 * If set and rsp is set, word3 contains a pointer to
		 * a work queue entry.
		 */
		uint64_t wqp:1;
		/* If set, the hardware will send a response when done */
		uint64_t rsp:1;
		/*
		 * If set, the supplied pkt_ptr is really a pointer to
		 * a list of pkt_ptr's.
		 */
		uint64_t gather:1;
		/*
		 * If ipoffp1 is non zero, (ipoffp1-1) is the number
		 * of bytes to IP header, and the hardware will
		 * calculate and insert the UDP/TCP checksum.
		 */
		uint64_t ipoffp1:7;
		/*
		 * If set, ignore the I bit (force to zero) from all
		 * pointer structures.
		 */
		uint64_t ignore_i:1;
		/*
		 * If clear, the hardware will attempt to free the
		 * buffers containing the packet.
		 */
		uint64_t dontfree:1;
		/*
		 * The total number of segs in the packet, if gather
		 * set, also gather list length.
		 */
		uint64_t segs:6;
		/* Including L2, but no trailing CRC */
		uint64_t total_bytes:16;
	} s;
} cvmx_pko_command_word0_t;

/* CSR typedefs have been moved to cvmx-csr-*.h */

/**
 * Definition of internal state for Packet output processing
 */
typedef struct {
	/* ptr to start of buffer, offset kept in FAU reg */
	uint64_t *start_ptr;
} cvmx_pko_state_elem_t;

/**
 * Call before any other calls to initialize the packet
 * output system.
 */
extern void cvmx_pko_initialize_global(void);
extern int cvmx_pko_initialize_local(void);

/**
 * Enables the packet output hardware. It must already be
 * configured.
 */
extern void cvmx_pko_enable(void);

/**
 * Disables the packet output. Does not affect any configuration.
 */
extern void cvmx_pko_disable(void);

/**
 * Shutdown and free resources required by packet output.
 */

extern void cvmx_pko_shutdown(void);

/**
 * Configure a output port and the associated queues for use.
 *
 * @port:       Port to configure.
 * @base_queue: First queue number to associate with this port.
 * @num_queues: Number of queues t oassociate with this port
 * @priority:   Array of priority levels for each queue. Values are
 *                   allowed to be 1-8. A value of 8 get 8 times the traffic
 *                   of a value of 1. There must be num_queues elements in the
 *                   array.
 */
extern cvmx_pko_status_t cvmx_pko_config_port(uint64_t port,
					      uint64_t base_queue,
					      uint64_t num_queues,
					      const uint64_t priority[]);

/**
 * Ring the packet output doorbell. This tells the packet
 * output hardware that "len" command words have been added
 * to its pending list.  This command includes the required
 * CVMX_SYNCWS before the doorbell ring.
 *
 * @port:   Port the packet is for
 * @queue:  Queue the packet is for
 * @len:    Length of the command in 64 bit words
 */
static inline void cvmx_pko_doorbell(uint64_t port, uint64_t queue,
				     uint64_t len)
{
	cvmx_pko_doorbell_address_t ptr;

	ptr.u64 = 0;
	ptr.s.mem_space = CVMX_IO_SEG;
	ptr.s.did = CVMX_OCT_DID_PKT_SEND;
	ptr.s.is_io = 1;
	ptr.s.port = port;
	ptr.s.queue = queue;
	/*
	 * Need to make sure output queue data is in DRAM before
	 * doorbell write.
	 */
	CVMX_SYNCWS;
	cvmx_write_io(ptr.u64, len);
}

/**
 * Prepare to send a packet.  This may initiate a tag switch to
 * get exclusive access to the output queue structure, and
 * performs other prep work for the packet send operation.
 *
 * cvmx_pko_send_packet_finish() MUST be called after this function is called,
 * and must be called with the same port/queue/use_locking arguments.
 *
 * The use_locking parameter allows the caller to use three
 * possible locking modes.
 * - CVMX_PKO_LOCK_NONE
 *      - PKO doesn't do any locking. It is the responsibility
 *          of the application to make sure that no other core
 *          is accessing the same queue at the same time.
 * - CVMX_PKO_LOCK_ATOMIC_TAG
 *      - PKO performs an atomic tagswitch to insure exclusive
 *          access to the output queue. This will maintain
 *          packet ordering on output.
 * - CVMX_PKO_LOCK_CMD_QUEUE
 *      - PKO uses the common command queue locks to insure
 *          exclusive access to the output queue. This is a
 *          memory based ll/sc. This is the most portable
 *          locking mechanism.
 *
 * NOTE: If atomic locking is used, the POW entry CANNOT be
 * descheduled, as it does not contain a valid WQE pointer.
 *
 * @port:   Port to send it on
 * @queue:  Queue to use
 * @use_locking: CVMX_PKO_LOCK_NONE, CVMX_PKO_LOCK_ATOMIC_TAG, or
 *               CVMX_PKO_LOCK_CMD_QUEUE
 */

static inline void cvmx_pko_send_packet_prepare(uint64_t port, uint64_t queue,
						cvmx_pko_lock_t use_locking)
{
	if (use_locking == CVMX_PKO_LOCK_ATOMIC_TAG) {
		/*
		 * Must do a full switch here to handle all cases.  We
		 * use a fake WQE pointer, as the POW does not access
		 * this memory.  The WQE pointer and group are only
		 * used if this work is descheduled, which is not
		 * supported by the
		 * cvmx_pko_send_packet_prepare/cvmx_pko_send_packet_finish
		 * combination.  Note that this is a special case in
		 * which these fake values can be used - this is not a
		 * general technique.
		 */
		uint32_t tag =
		    CVMX_TAG_SW_BITS_INTERNAL << CVMX_TAG_SW_SHIFT |
		    CVMX_TAG_SUBGROUP_PKO << CVMX_TAG_SUBGROUP_SHIFT |
		    (CVMX_TAG_SUBGROUP_MASK & queue);
		cvmx_pow_tag_sw_full((cvmx_wqe_t *) cvmx_phys_to_ptr(0x80), tag,
				     CVMX_POW_TAG_TYPE_ATOMIC, 0);
	}
}

/**
 * Complete packet output. cvmx_pko_send_packet_prepare() must be
 * called exactly once before this, and the same parameters must be
 * passed to both cvmx_pko_send_packet_prepare() and
 * cvmx_pko_send_packet_finish().
 *
 * @port:   Port to send it on
 * @queue:  Queue to use
 * @pko_command:
 *               PKO HW command word
 * @packet: Packet to send
 * @use_locking: CVMX_PKO_LOCK_NONE, CVMX_PKO_LOCK_ATOMIC_TAG, or
 *               CVMX_PKO_LOCK_CMD_QUEUE
 *
 * Returns returns CVMX_PKO_SUCCESS on success, or error code on
 * failure of output
 */
static inline cvmx_pko_status_t cvmx_pko_send_packet_finish(
	uint64_t port,
	uint64_t queue,
	cvmx_pko_command_word0_t pko_command,
	union cvmx_buf_ptr packet,
	cvmx_pko_lock_t use_locking)
{
	cvmx_cmd_queue_result_t result;
	if (use_locking == CVMX_PKO_LOCK_ATOMIC_TAG)
		cvmx_pow_tag_sw_wait();
	result = cvmx_cmd_queue_write2(CVMX_CMD_QUEUE_PKO(queue),
				       (use_locking == CVMX_PKO_LOCK_CMD_QUEUE),
				       pko_command.u64, packet.u64);
	if (likely(result == CVMX_CMD_QUEUE_SUCCESS)) {
		cvmx_pko_doorbell(port, queue, 2);
		return CVMX_PKO_SUCCESS;
	} else if ((result == CVMX_CMD_QUEUE_NO_MEMORY)
		   || (result == CVMX_CMD_QUEUE_FULL)) {
		return CVMX_PKO_NO_MEMORY;
	} else {
		return CVMX_PKO_INVALID_QUEUE;
	}
}

/**
 * Complete packet output. cvmx_pko_send_packet_prepare() must be
 * called exactly once before this, and the same parameters must be
 * passed to both cvmx_pko_send_packet_prepare() and
 * cvmx_pko_send_packet_finish().
 *
 * @port:   Port to send it on
 * @queue:  Queue to use
 * @pko_command:
 *               PKO HW command word
 * @packet: Packet to send
 * @addr: Plysical address of a work queue entry or physical address
 *        to zero on complete.
 * @use_locking: CVMX_PKO_LOCK_NONE, CVMX_PKO_LOCK_ATOMIC_TAG, or
 *               CVMX_PKO_LOCK_CMD_QUEUE
 *
 * Returns returns CVMX_PKO_SUCCESS on success, or error code on
 * failure of output
 */
static inline cvmx_pko_status_t cvmx_pko_send_packet_finish3(
	uint64_t port,
	uint64_t queue,
	cvmx_pko_command_word0_t pko_command,
	union cvmx_buf_ptr packet,
	uint64_t addr,
	cvmx_pko_lock_t use_locking)
{
	cvmx_cmd_queue_result_t result;
	if (use_locking == CVMX_PKO_LOCK_ATOMIC_TAG)
		cvmx_pow_tag_sw_wait();
	result = cvmx_cmd_queue_write3(CVMX_CMD_QUEUE_PKO(queue),
				       (use_locking == CVMX_PKO_LOCK_CMD_QUEUE),
				       pko_command.u64, packet.u64, addr);
	if (likely(result == CVMX_CMD_QUEUE_SUCCESS)) {
		cvmx_pko_doorbell(port, queue, 3);
		return CVMX_PKO_SUCCESS;
	} else if ((result == CVMX_CMD_QUEUE_NO_MEMORY)
		   || (result == CVMX_CMD_QUEUE_FULL)) {
		return CVMX_PKO_NO_MEMORY;
	} else {
		return CVMX_PKO_INVALID_QUEUE;
	}
}

/**
 * Return the pko output queue associated with a port and a specific core.
 * In normal mode (PKO lockless operation is disabled), the value returned
 * is the base queue.
 *
 * @port:   Port number
 * @core:   Core to get queue for
 *
 * Returns Core-specific output queue
 */
static inline int cvmx_pko_get_base_queue_per_core(int port, int core)
{
#ifndef CVMX_HELPER_PKO_MAX_PORTS_INTERFACE0
#define CVMX_HELPER_PKO_MAX_PORTS_INTERFACE0 16
#endif
#ifndef CVMX_HELPER_PKO_MAX_PORTS_INTERFACE1
#define CVMX_HELPER_PKO_MAX_PORTS_INTERFACE1 16
#endif

	if (port < CVMX_PKO_MAX_PORTS_INTERFACE0)
		return port * CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 + core;
	else if (port >= 16 && port < 16 + CVMX_PKO_MAX_PORTS_INTERFACE1)
		return CVMX_PKO_MAX_PORTS_INTERFACE0 *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 + (port -
							   16) *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 + core;
	else if ((port >= 32) && (port < 36))
		return CVMX_PKO_MAX_PORTS_INTERFACE0 *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 +
		    CVMX_PKO_MAX_PORTS_INTERFACE1 *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 + (port -
							   32) *
		    CVMX_PKO_QUEUES_PER_PORT_PCI;
	else if ((port >= 36) && (port < 40))
		return CVMX_PKO_MAX_PORTS_INTERFACE0 *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 +
		    CVMX_PKO_MAX_PORTS_INTERFACE1 *
		    CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 +
		    4 * CVMX_PKO_QUEUES_PER_PORT_PCI + (port -
							36) *
		    CVMX_PKO_QUEUES_PER_PORT_LOOP;
	else
		/* Given the limit on the number of ports we can map to
		 * CVMX_MAX_OUTPUT_QUEUES_STATIC queues (currently 256,
		 * divided among all cores), the remaining unmapped ports
		 * are assigned an illegal queue number */
		return CVMX_PKO_ILLEGAL_QUEUE;
}

/**
 * For a given port number, return the base pko output queue
 * for the port.
 *
 * @port:   Port number
 * Returns Base output queue
 */
static inline int cvmx_pko_get_base_queue(int port)
{
	return cvmx_pko_get_base_queue_per_core(port, 0);
}

/**
 * For a given port number, return the number of pko output queues.
 *
 * @port:   Port number
 * Returns Number of output queues
 */
static inline int cvmx_pko_get_num_queues(int port)
{
	if (port < 16)
		return CVMX_PKO_QUEUES_PER_PORT_INTERFACE0;
	else if (port < 32)
		return CVMX_PKO_QUEUES_PER_PORT_INTERFACE1;
	else if (port < 36)
		return CVMX_PKO_QUEUES_PER_PORT_PCI;
	else if (port < 40)
		return CVMX_PKO_QUEUES_PER_PORT_LOOP;
	else
		return 0;
}

/**
 * Get the status counters for a port.
 *
 * @port_num: Port number to get statistics for.
 * @clear:    Set to 1 to clear the counters after they are read
 * @status:   Where to put the results.
 */
static inline void cvmx_pko_get_port_status(uint64_t port_num, uint64_t clear,
					    cvmx_pko_port_status_t *status)
{
	union cvmx_pko_reg_read_idx pko_reg_read_idx;
	union cvmx_pko_mem_count0 pko_mem_count0;
	union cvmx_pko_mem_count1 pko_mem_count1;

	pko_reg_read_idx.u64 = 0;
	pko_reg_read_idx.s.index = port_num;
	cvmx_write_csr(CVMX_PKO_REG_READ_IDX, pko_reg_read_idx.u64);

	pko_mem_count0.u64 = cvmx_read_csr(CVMX_PKO_MEM_COUNT0);
	status->packets = pko_mem_count0.s.count;
	if (clear) {
		pko_mem_count0.s.count = port_num;
		cvmx_write_csr(CVMX_PKO_MEM_COUNT0, pko_mem_count0.u64);
	}

	pko_mem_count1.u64 = cvmx_read_csr(CVMX_PKO_MEM_COUNT1);
	status->octets = pko_mem_count1.s.count;
	if (clear) {
		pko_mem_count1.s.count = port_num;
		cvmx_write_csr(CVMX_PKO_MEM_COUNT1, pko_mem_count1.u64);
	}

	if (OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
		union cvmx_pko_mem_debug9 debug9;
		pko_reg_read_idx.s.index = cvmx_pko_get_base_queue(port_num);
		cvmx_write_csr(CVMX_PKO_REG_READ_IDX, pko_reg_read_idx.u64);
		debug9.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG9);
		status->doorbell = debug9.cn38xx.doorbell;
	} else {
		union cvmx_pko_mem_debug8 debug8;
		pko_reg_read_idx.s.index = cvmx_pko_get_base_queue(port_num);
		cvmx_write_csr(CVMX_PKO_REG_READ_IDX, pko_reg_read_idx.u64);
		debug8.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG8);
		status->doorbell = debug8.cn58xx.doorbell;
	}
}

/**
 * Rate limit a PKO port to a max packets/sec. This function is only
 * supported on CN57XX, CN56XX, CN55XX, and CN54XX.
 *
 * @port:      Port to rate limit
 * @packets_s: Maximum packet/sec
 * @burst:     Maximum number of packets to burst in a row before rate
 *                  limiting cuts in.
 *
 * Returns Zero on success, negative on failure
 */
extern int cvmx_pko_rate_limit_packets(int port, int packets_s, int burst);

/**
 * Rate limit a PKO port to a max bits/sec. This function is only
 * supported on CN57XX, CN56XX, CN55XX, and CN54XX.
 *
 * @port:   Port to rate limit
 * @bits_s: PKO rate limit in bits/sec
 * @burst:  Maximum number of bits to burst before rate
 *               limiting cuts in.
 *
 * Returns Zero on success, negative on failure
 */
extern int cvmx_pko_rate_limit_bits(int port, uint64_t bits_s, int burst);

#endif /* __CVMX_PKO_H__ */