/*
 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */

#include "radeon_emulate_branches.h"

#include <stdio.h>

#include "radeon_compiler.h"
#include "radeon_dataflow.h"

#define VERBOSE 0

#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)


struct proxy_info {
	unsigned int Proxied:1;
	unsigned int Index:RC_REGISTER_INDEX_BITS;
};

struct register_proxies {
	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
};

struct branch_info {
	struct rc_instruction * If;
	struct rc_instruction * Else;
};

struct emulate_branch_state {
	struct radeon_compiler * C;

	struct branch_info * Branches;
	unsigned int BranchCount;
	unsigned int BranchReserved;
};


static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
{
	struct branch_info * branch;
	struct rc_instruction * inst_mov;

	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
			s->Branches, s->BranchCount, s->BranchReserved, 1);

	DBG("%s\n", __FUNCTION__);

	branch = &s->Branches[s->BranchCount++];
	memset(branch, 0, sizeof(struct branch_info));
	branch->If = inst;

	/* Make a safety copy of the decision register, because we will need
	 * it at ENDIF time and it might be overwritten in both branches. */
	inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
	inst->U.I.SrcReg[0].Swizzle = 0;
	inst->U.I.SrcReg[0].Abs = 0;
	inst->U.I.SrcReg[0].Negate = 0;
}

static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
{
	struct branch_info * branch;

	if (!s->BranchCount) {
		rc_error(s->C, "Encountered ELSE outside of branches");
		return;
	}

	DBG("%s\n", __FUNCTION__);

	branch = &s->Branches[s->BranchCount - 1];
	branch->Else = inst;
}


struct state_and_proxies {
	struct emulate_branch_state * S;
	struct register_proxies * Proxies;
};

static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
			rc_register_file file, unsigned int index)
{
	if (file == RC_FILE_TEMPORARY) {
		return &sap->Proxies->Temporary[index];
	} else {
		return 0;
	}
}

static void scan_write(void * userdata, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int comp)
{
	struct state_and_proxies * sap = userdata;
	struct proxy_info * proxy = get_proxy_info(sap, file, index);

	if (proxy && !proxy->Proxied) {
		proxy->Proxied = 1;
		proxy->Index = rc_find_free_temporary(sap->S->C);
	}
}

static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
		rc_register_file * pfile, unsigned int * pindex)
{
	struct state_and_proxies * sap = userdata;
	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);

	if (proxy && proxy->Proxied) {
		*pfile = RC_FILE_TEMPORARY;
		*pindex = proxy->Index;
	}
}

/**
 * Redirect all writes in the instruction range [begin, end) to proxy
 * temporary registers.
 */
static void allocate_and_insert_proxies(struct emulate_branch_state * s,
		struct register_proxies * proxies,
		struct rc_instruction * begin,
		struct rc_instruction * end)
{
	struct state_and_proxies sap;

	sap.S = s;
	sap.Proxies = proxies;

	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
		rc_for_all_writes_mask(inst, scan_write, &sap);
		rc_remap_registers(inst, remap_proxy_function, &sap);
	}

	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
		if (proxies->Temporary[index].Proxied) {
			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_mov->U.I.SrcReg[0].Index = index;
		}
	}
}


static void inject_cmp(struct emulate_branch_state * s,
		struct rc_instruction * inst_if,
		struct rc_instruction * inst_endif,
		rc_register_file file, unsigned int index,
		struct proxy_info ifproxy,
		struct proxy_info elseproxy)
{
	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
	inst_cmp->U.I.DstReg.File = file;
	inst_cmp->U.I.DstReg.Index = index;
	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
	inst_cmp->U.I.SrcReg[0].Abs = 1;
	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
}

static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
{
	struct branch_info * branch;
	struct register_proxies IfProxies;
	struct register_proxies ElseProxies;

	if (!s->BranchCount) {
		rc_error(s->C, "Encountered ENDIF outside of branches");
		return;
	}

	DBG("%s\n", __FUNCTION__);

	branch = &s->Branches[s->BranchCount - 1];

	memset(&IfProxies, 0, sizeof(IfProxies));
	memset(&ElseProxies, 0, sizeof(ElseProxies));

	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);

	if (branch->Else)
		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);

	/* Insert the CMP instructions at the end. */
	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
		}
	}

	/* Remove all traces of the branch instructions */
	rc_remove_instruction(branch->If);
	if (branch->Else)
		rc_remove_instruction(branch->Else);
	rc_remove_instruction(inst);

	s->BranchCount--;

	if (VERBOSE) {
		DBG("Program after ENDIF handling:\n");
		rc_print_program(&s->C->Program);
	}
}


struct remap_output_data {
	unsigned int Output:RC_REGISTER_INDEX_BITS;
	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
};

static void remap_output_function(void * userdata, struct rc_instruction * inst,
		rc_register_file * pfile, unsigned int * pindex)
{
	struct remap_output_data * data = userdata;

	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
		*pfile = RC_FILE_TEMPORARY;
		*pindex = data->Temporary;
	}
}


/**
 * Output registers cannot be read from and so cannot be dealt with like
 * temporary registers.
 *
 * We do the simplest thing: If an output registers is written within
 * a branch, then *all* writes to this register are proxied to a
 * temporary register, and a final MOV is appended to the end of
 * the program.
 */
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
{
	const struct rc_opcode_info * opcode;

	if (!s->BranchCount)
		return;

	opcode = rc_get_opcode_info(inst->U.I.Opcode);

	if (!opcode->HasDstReg)
		return;

	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
		struct remap_output_data remap;
		struct rc_instruction * inst_mov;

		remap.Output = inst->U.I.DstReg.Index;
		remap.Temporary = rc_find_free_temporary(s->C);

		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
		    inst != &s->C->Program.Instructions;
		    inst = inst->Next) {
			rc_remap_registers(inst, &remap_output_function, &remap);
		}

		inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
		inst_mov->U.I.DstReg.Index = remap.Output;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
	}
}

/**
 * Remove branch instructions; instead, execute both branches
 * on different register sets and choose between their results
 * using CMP instructions in place of the original ENDIF.
 */
void rc_emulate_branches(struct radeon_compiler *c, void *user)
{
	struct emulate_branch_state s;
	struct rc_instruction * ptr;

	memset(&s, 0, sizeof(s));
	s.C = c;

	/* Untypical loop because we may remove the current instruction */
	ptr = c->Program.Instructions.Next;
	while(ptr != &c->Program.Instructions) {
		struct rc_instruction * inst = ptr;
		ptr = ptr->Next;

		if (inst->Type == RC_INSTRUCTION_NORMAL) {
			switch(inst->U.I.Opcode) {
			case RC_OPCODE_IF:
				handle_if(&s, inst);
				break;
			case RC_OPCODE_ELSE:
				handle_else(&s, inst);
				break;
			case RC_OPCODE_ENDIF:
				handle_endif(&s, inst);
				break;
			default:
				fix_output_writes(&s, inst);
				break;
			}
		} else {
			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
		}
	}
}