/*
 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/fs.h>
#include <linux/jhash.h>
#include <linux/hash.h>
#include <linux/ktime.h>
#include <linux/mempool.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
#include <linux/poll.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/writeback.h>

#include "netfs.h"

static struct kmem_cache *netfs_trans_dst;
static mempool_t *netfs_trans_dst_pool;

static void netfs_trans_init_static(struct netfs_trans *t, int num, int size)
{
	t->page_num = num;
	t->total_size = size;
	atomic_set(&t->refcnt, 1);

	spin_lock_init(&t->dst_lock);
	INIT_LIST_HEAD(&t->dst_list);
}

static int netfs_trans_send_pages(struct netfs_trans *t, struct netfs_state *st)
{
	int err = 0;
	unsigned int i, attached_pages = t->attached_pages, ci;
	struct msghdr msg;
	struct page **pages = (t->eng)?t->eng->pages:t->pages;
	struct page *p;
	unsigned int size;

	msg.msg_name = NULL;
	msg.msg_namelen = 0;
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = MSG_WAITALL | MSG_MORE;

	ci = 0;
	for (i=0; i<t->page_num; ++i) {
		struct page *page = pages[ci];
		struct netfs_cmd cmd;
		struct iovec io;

		p = t->pages[i];

		if (!p)
			continue;

		size = page_private(p);

		io.iov_base = &cmd;
		io.iov_len = sizeof(struct netfs_cmd);

		cmd.cmd = NETFS_WRITE_PAGE;
		cmd.ext = 0;
		cmd.id = 0;
		cmd.size = size;
		cmd.start = p->index;
		cmd.start <<= PAGE_CACHE_SHIFT;
		cmd.csize = 0;
		cmd.cpad = 0;
		cmd.iv = pohmelfs_gen_iv(t);

		netfs_convert_cmd(&cmd);

		msg.msg_iov = &io;
		msg.msg_iovlen = 1;
		msg.msg_flags = MSG_WAITALL | MSG_MORE;

		err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, sizeof(struct netfs_cmd));
		if (err <= 0) {
			printk("%s: %d/%d failed to send transaction header: t: %p, gen: %u, err: %d.\n",
					__func__, i, t->page_num, t, t->gen, err);
			if (err == 0)
				err = -ECONNRESET;
			goto err_out;
		}

		msg.msg_flags = MSG_WAITALL | (attached_pages == 1 ? 0 :
				MSG_MORE);

		err = kernel_sendpage(st->socket, page, 0, size, msg.msg_flags);
		if (err <= 0) {
			printk("%s: %d/%d failed to send transaction page: t: %p, gen: %u, size: %u, err: %d.\n",
					__func__, i, t->page_num, t, t->gen, size, err);
			if (err == 0)
				err = -ECONNRESET;
			goto err_out;
		}

		dprintk("%s: %d/%d sent t: %p, gen: %u, page: %p/%p, size: %u.\n",
			__func__, i, t->page_num, t, t->gen, page, p, size);

		err = 0;
		attached_pages--;
		if (!attached_pages)
			break;
		ci++;

		continue;

err_out:
		printk("%s: t: %p, gen: %u, err: %d.\n", __func__, t, t->gen, err);
		netfs_state_exit(st);
		break;
	}

	return err;
}

int netfs_trans_send(struct netfs_trans *t, struct netfs_state *st)
{
	int err;
	struct msghdr msg;

	BUG_ON(!t->iovec.iov_len);
	BUG_ON(t->iovec.iov_len > 1024*1024*1024);

	netfs_state_lock_send(st);
	if (!st->socket) {
		err = netfs_state_init(st);
		if (err)
			goto err_out_unlock_return;
	}

	msg.msg_iov = &t->iovec;
	msg.msg_iovlen = 1;
	msg.msg_name = NULL;
	msg.msg_namelen = 0;
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags = MSG_WAITALL;

	if (t->attached_pages)
		msg.msg_flags |= MSG_MORE;

	err = kernel_sendmsg(st->socket, &msg, (struct kvec *)msg.msg_iov, 1, t->iovec.iov_len);
	if (err <= 0) {
		printk("%s: failed to send contig transaction: t: %p, gen: %u, size: %zu, err: %d.\n",
				__func__, t, t->gen, t->iovec.iov_len, err);
		if (err == 0)
			err = -ECONNRESET;
		goto err_out_unlock_return;
	}

	dprintk("%s: sent %s transaction: t: %p, gen: %u, size: %zu, page_num: %u.\n",
			__func__, (t->page_num)?"partial":"full",
			t, t->gen, t->iovec.iov_len, t->page_num);

	err = 0;
	if (t->attached_pages)
		err = netfs_trans_send_pages(t, st);

err_out_unlock_return:

	if (st->need_reset)
		netfs_state_exit(st);

	netfs_state_unlock_send(st);

	dprintk("%s: t: %p, gen: %u, err: %d.\n",
		__func__, t, t->gen, err);

	t->result = err;
	return err;
}

static inline int netfs_trans_cmp(unsigned int gen, unsigned int new)
{
	if (gen < new)
		return 1;
	if (gen > new)
		return -1;
	return 0;
}

struct netfs_trans_dst *netfs_trans_search(struct netfs_state *st, unsigned int gen)
{
	struct rb_root *root = &st->trans_root;
	struct rb_node *n = root->rb_node;
	struct netfs_trans_dst *tmp, *ret = NULL;
	struct netfs_trans *t;
	int cmp;

	while (n) {
		tmp = rb_entry(n, struct netfs_trans_dst, state_entry);
		t = tmp->trans;

		cmp = netfs_trans_cmp(t->gen, gen);
		if (cmp < 0)
			n = n->rb_left;
		else if (cmp > 0)
			n = n->rb_right;
		else {
			ret = tmp;
			break;
		}
	}

	return ret;
}

static int netfs_trans_insert(struct netfs_trans_dst *ndst, struct netfs_state *st)
{
	struct rb_root *root = &st->trans_root;
	struct rb_node **n = &root->rb_node, *parent = NULL;
	struct netfs_trans_dst *ret = NULL, *tmp;
	struct netfs_trans *t = NULL, *new = ndst->trans;
	int cmp;

	while (*n) {
		parent = *n;

		tmp = rb_entry(parent, struct netfs_trans_dst, state_entry);
		t = tmp->trans;

		cmp = netfs_trans_cmp(t->gen, new->gen);
		if (cmp < 0)
			n = &parent->rb_left;
		else if (cmp > 0)
			n = &parent->rb_right;
		else {
			ret = tmp;
			break;
		}
	}

	if (ret) {
		printk("%s: exist: old: gen: %u, flags: %x, send_time: %lu, "
				"new: gen: %u, flags: %x, send_time: %lu.\n",
			__func__, t->gen, t->flags, ret->send_time,
			new->gen, new->flags, ndst->send_time);
		return -EEXIST;
	}

	rb_link_node(&ndst->state_entry, parent, n);
	rb_insert_color(&ndst->state_entry, root);
	ndst->send_time = jiffies;

	return 0;
}

int netfs_trans_remove_nolock(struct netfs_trans_dst *dst, struct netfs_state *st)
{
	if (dst && dst->state_entry.rb_parent_color) {
		rb_erase(&dst->state_entry, &st->trans_root);
		dst->state_entry.rb_parent_color = 0;
		return 1;
	}
	return 0;
}

static int netfs_trans_remove_state(struct netfs_trans_dst *dst)
{
	int ret;
	struct netfs_state *st = dst->state;

	mutex_lock(&st->trans_lock);
	ret = netfs_trans_remove_nolock(dst, st);
	mutex_unlock(&st->trans_lock);

	return ret;
}

/*
 * Create new destination for given transaction associated with given network state.
 * Transaction's reference counter is bumped and will be dropped when either
 * reply is received or when async timeout detection task will fail resending
 * and drop transaction.
 */
static int netfs_trans_push_dst(struct netfs_trans *t, struct netfs_state *st)
{
	struct netfs_trans_dst *dst;
	int err;

	dst = mempool_alloc(netfs_trans_dst_pool, GFP_KERNEL);
	if (!dst)
		return -ENOMEM;

	dst->retries = 0;
	dst->send_time = 0;
	dst->state = st;
	dst->trans = t;
	netfs_trans_get(t);

	mutex_lock(&st->trans_lock);
	err = netfs_trans_insert(dst, st);
	mutex_unlock(&st->trans_lock);

	if (err)
		goto err_out_free;

	spin_lock(&t->dst_lock);
	list_add_tail(&dst->trans_entry, &t->dst_list);
	spin_unlock(&t->dst_lock);

	return 0;

err_out_free:
	t->result = err;
	netfs_trans_put(t);
	mempool_free(dst, netfs_trans_dst_pool);
	return err;
}

static void netfs_trans_free_dst(struct netfs_trans_dst *dst)
{
	netfs_trans_put(dst->trans);
	mempool_free(dst, netfs_trans_dst_pool);
}

static void netfs_trans_remove_dst(struct netfs_trans_dst *dst)
{
	if (netfs_trans_remove_state(dst))
		netfs_trans_free_dst(dst);
}

/*
 * Drop destination transaction entry when we know it.
 */
void netfs_trans_drop_dst(struct netfs_trans_dst *dst)
{
	struct netfs_trans *t = dst->trans;

	spin_lock(&t->dst_lock);
	list_del_init(&dst->trans_entry);
	spin_unlock(&t->dst_lock);

	netfs_trans_remove_dst(dst);
}

/*
 * Drop destination transaction entry when we know it and when we
 * already removed dst from state tree.
 */
void netfs_trans_drop_dst_nostate(struct netfs_trans_dst *dst)
{
	struct netfs_trans *t = dst->trans;

	spin_lock(&t->dst_lock);
	list_del_init(&dst->trans_entry);
	spin_unlock(&t->dst_lock);

	netfs_trans_free_dst(dst);
}

/*
 * This drops destination transaction entry from appropriate network state
 * tree and drops related reference counter. It is possible that transaction
 * will be freed here if its reference counter hits zero.
 * Destination transaction entry will be freed.
 */
void netfs_trans_drop_trans(struct netfs_trans *t, struct netfs_state *st)
{
	struct netfs_trans_dst *dst, *tmp, *ret = NULL;

	spin_lock(&t->dst_lock);
	list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) {
		if (dst->state == st) {
			ret = dst;
			list_del(&dst->trans_entry);
			break;
		}
	}
	spin_unlock(&t->dst_lock);

	if (ret)
		netfs_trans_remove_dst(ret);
}

/*
 * This drops destination transaction entry from appropriate network state
 * tree and drops related reference counter. It is possible that transaction
 * will be freed here if its reference counter hits zero.
 * Destination transaction entry will be freed.
 */
void netfs_trans_drop_last(struct netfs_trans *t, struct netfs_state *st)
{
	struct netfs_trans_dst *dst, *tmp, *ret;

	spin_lock(&t->dst_lock);
	ret = list_entry(t->dst_list.prev, struct netfs_trans_dst, trans_entry);
	if (ret->state != st) {
		ret = NULL;
		list_for_each_entry_safe(dst, tmp, &t->dst_list, trans_entry) {
			if (dst->state == st) {
				ret = dst;
				list_del_init(&dst->trans_entry);
				break;
			}
		}
	} else {
		list_del(&ret->trans_entry);
	}
	spin_unlock(&t->dst_lock);

	if (ret)
		netfs_trans_remove_dst(ret);
}

static int netfs_trans_push(struct netfs_trans *t, struct netfs_state *st)
{
	int err;

	err = netfs_trans_push_dst(t, st);
	if (err)
		return err;

	err = netfs_trans_send(t, st);
	if (err)
		goto err_out_free;

	if (t->flags & NETFS_TRANS_SINGLE_DST)
		pohmelfs_switch_active(st->psb);

	return 0;

err_out_free:
	t->result = err;
	netfs_trans_drop_last(t, st);

	return err;
}

int netfs_trans_finish_send(struct netfs_trans *t, struct pohmelfs_sb *psb)
{
	struct pohmelfs_config *c;
	int err = -ENODEV;
	struct netfs_state *st;
#if 0
	dprintk("%s: t: %p, gen: %u, size: %u, page_num: %u, active: %p.\n",
		__func__, t, t->gen, t->iovec.iov_len, t->page_num, psb->active_state);
#endif
	mutex_lock(&psb->state_lock);
	list_for_each_entry(c, &psb->state_list, config_entry) {
		st = &c->state;

		if (t->flags & NETFS_TRANS_SINGLE_DST) {
			if (!(st->ctl.perm & POHMELFS_IO_PERM_READ))
				continue;
		} else {
			if (!(st->ctl.perm & POHMELFS_IO_PERM_WRITE))
				continue;
		}

		if (psb->active_state && (psb->active_state->state.ctl.prio >= st->ctl.prio) &&
				(t->flags & NETFS_TRANS_SINGLE_DST))
			st = &psb->active_state->state;

		err = netfs_trans_push(t, st);
		if (!err && (t->flags & NETFS_TRANS_SINGLE_DST))
			break;
	}

	mutex_unlock(&psb->state_lock);
#if 0
	dprintk("%s: fully sent t: %p, gen: %u, size: %u, page_num: %u, err: %d.\n",
		__func__, t, t->gen, t->iovec.iov_len, t->page_num, err);
#endif
	if (err)
		t->result = err;
	return err;
}

int netfs_trans_finish(struct netfs_trans *t, struct pohmelfs_sb *psb)
{
	int err;
	struct netfs_cmd *cmd = t->iovec.iov_base;

	t->gen = atomic_inc_return(&psb->trans_gen);

	cmd->size = t->iovec.iov_len - sizeof(struct netfs_cmd) +
		t->attached_size + t->attached_pages * sizeof(struct netfs_cmd);
	cmd->cmd = NETFS_TRANS;
	cmd->start = t->gen;
	cmd->id = 0;

	if (psb->perform_crypto) {
		cmd->ext = psb->crypto_attached_size;
		cmd->csize = psb->crypto_attached_size;
	}

	dprintk("%s: t: %u, size: %u, iov_len: %zu, attached_size: %u, attached_pages: %u.\n",
			__func__, t->gen, cmd->size, t->iovec.iov_len, t->attached_size, t->attached_pages);
	err = pohmelfs_trans_crypt(t, psb);
	if (err) {
		t->result = err;
		netfs_convert_cmd(cmd);
		dprintk("%s: trans: %llu, crypto_attached_size: %u, attached_size: %u, attached_pages: %d, trans_size: %u, err: %d.\n",
			__func__, cmd->start, psb->crypto_attached_size, t->attached_size, t->attached_pages, cmd->size, err);
	}
	netfs_trans_put(t);
	return err;
}

/*
 * Resend transaction to remote server(s).
 * If new servers were added into superblock, we can try to send data
 * to them too.
 *
 * It is called under superblock's state_lock, so we can safely
 * dereference psb->state_list. Also, transaction's reference counter is
 * bumped, so it can not go away under us, thus we can safely access all
 * its members. State is locked.
 *
 * This function returns 0 if transaction was successfully sent to at
 * least one destination target.
 */
int netfs_trans_resend(struct netfs_trans *t, struct pohmelfs_sb *psb)
{
	struct netfs_trans_dst *dst;
	struct netfs_state *st;
	struct pohmelfs_config *c;
	int err, exist, error = -ENODEV;

	list_for_each_entry(c, &psb->state_list, config_entry) {
		st = &c->state;

		exist = 0;
		spin_lock(&t->dst_lock);
		list_for_each_entry(dst, &t->dst_list, trans_entry) {
			if (st == dst->state) {
				exist = 1;
				break;
			}
		}
		spin_unlock(&t->dst_lock);

		if (exist) {
			if (!(t->flags & NETFS_TRANS_SINGLE_DST) ||
					(c->config_entry.next == &psb->state_list)) {
				dprintk("%s: resending st: %p, t: %p, gen: %u.\n",
						__func__, st, t, t->gen);
				err = netfs_trans_send(t, st);
				if (!err)
					error = 0;
			}
			continue;
		}

		dprintk("%s: pushing/resending st: %p, t: %p, gen: %u.\n",
				__func__, st, t, t->gen);
		err = netfs_trans_push(t, st);
		if (err)
			continue;
		error = 0;
		if (t->flags & NETFS_TRANS_SINGLE_DST)
			break;
	}

	t->result = error;
	return error;
}

void *netfs_trans_add(struct netfs_trans *t, unsigned int size)
{
	struct iovec *io = &t->iovec;
	void *ptr;

	if (size > t->total_size) {
		ptr = ERR_PTR(-EINVAL);
		goto out;
	}

	if (io->iov_len + size > t->total_size) {
		dprintk("%s: too big size t: %p, gen: %u, iov_len: %zu, size: %u, total: %u.\n",
				__func__, t, t->gen, io->iov_len, size, t->total_size);
		ptr = ERR_PTR(-E2BIG);
		goto out;
	}

	ptr = io->iov_base + io->iov_len;
	io->iov_len += size;

out:
	dprintk("%s: t: %p, gen: %u, size: %u, total: %zu.\n",
		__func__, t, t->gen, size, io->iov_len);
	return ptr;
}

void netfs_trans_free(struct netfs_trans *t)
{
	if (t->eng)
		pohmelfs_crypto_thread_make_ready(t->eng->thread);
	kfree(t);
}

struct netfs_trans *netfs_trans_alloc(struct pohmelfs_sb *psb, unsigned int size,
		unsigned int flags, unsigned int nr)
{
	struct netfs_trans *t;
	unsigned int num, cont, pad, size_no_trans;
	unsigned int crypto_added = 0;
	struct netfs_cmd *cmd;

	if (psb->perform_crypto)
		crypto_added = psb->crypto_attached_size;

	/*
	 * |sizeof(struct netfs_trans)|
	 * |sizeof(struct netfs_cmd)| - transaction header
	 * |size| - buffer with requested size
	 * |padding| - crypto padding, zero bytes
	 * |nr * sizeof(struct page *)| - array of page pointers
	 *
	 * Overall size should be less than PAGE_SIZE for guaranteed allocation.
	 */

	cont = size;
	size = ALIGN(size, psb->crypto_align_size);
	pad = size - cont;

	size_no_trans = size + sizeof(struct netfs_cmd) * 2 + crypto_added;

	cont = sizeof(struct netfs_trans) + size_no_trans;

	num = (PAGE_SIZE - cont)/sizeof(struct page *);

	if (nr > num)
		nr = num;

	t = kzalloc(cont + nr*sizeof(struct page *), GFP_NOIO);
	if (!t)
		goto err_out_exit;

	t->iovec.iov_base = (void *)(t + 1);
	t->pages = (struct page **)(t->iovec.iov_base + size_no_trans);

	/*
	 * Reserving space for transaction header.
	 */
	t->iovec.iov_len = sizeof(struct netfs_cmd) + crypto_added;

	netfs_trans_init_static(t, nr, size_no_trans);

	t->flags = flags;
	t->psb = psb;

	cmd = (struct netfs_cmd *)t->iovec.iov_base;

	cmd->size = size;
	cmd->cpad = pad;
	cmd->csize = crypto_added;

	dprintk("%s: t: %p, gen: %u, size: %u, padding: %u, align_size: %u, flags: %x, "
			"page_num: %u, base: %p, pages: %p.\n",
			__func__, t, t->gen, size, pad, psb->crypto_align_size, flags, nr,
			t->iovec.iov_base, t->pages);

	return t;

err_out_exit:
	return NULL;
}

int netfs_trans_init(void)
{
	int err = -ENOMEM;

	netfs_trans_dst = kmem_cache_create("netfs_trans_dst", sizeof(struct netfs_trans_dst),
			0, 0, NULL);
	if (!netfs_trans_dst)
		goto err_out_exit;

	netfs_trans_dst_pool = mempool_create_slab_pool(256, netfs_trans_dst);
	if (!netfs_trans_dst_pool)
		goto err_out_free;

	return 0;

err_out_free:
	kmem_cache_destroy(netfs_trans_dst);
err_out_exit:
	return err;
}

void netfs_trans_exit(void)
{
	mempool_destroy(netfs_trans_dst_pool);
	kmem_cache_destroy(netfs_trans_dst);
}