/*
 * Copyright (C) 2009 Nicolai Haehnle.
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "radeon_dataflow.h"

#include "radeon_compiler.h"


struct updatemask_state {
	unsigned char Output[RC_REGISTER_MAX_INDEX];
	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
	unsigned char Address;
	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
};

struct instruction_state {
	unsigned char WriteMask:4;
	unsigned char WriteALUResult:1;
	unsigned char SrcReg[3];
};

struct loopinfo {
	struct updatemask_state * Breaks;
	unsigned int BreakCount;
	unsigned int BreaksReserved;
};

struct branchinfo {
	unsigned int HaveElse:1;

	struct updatemask_state StoreEndif;
	struct updatemask_state StoreElse;
};

struct deadcode_state {
	struct radeon_compiler * C;
	struct instruction_state * Instructions;

	struct updatemask_state R;

	struct branchinfo * BranchStack;
	unsigned int BranchStackSize;
	unsigned int BranchStackReserved;

	struct loopinfo * LoopStack;
	unsigned int LoopStackSize;
	unsigned int LoopStackReserved;
};


static void or_updatemasks(
	struct updatemask_state * dst,
	struct updatemask_state * a,
	struct updatemask_state * b)
{
	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
		dst->Output[i] = a->Output[i] | b->Output[i];
		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
	}

	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
		dst->Special[i] = a->Special[i] | b->Special[i];

	dst->Address = a->Address | b->Address;
}

static void push_break(struct deadcode_state *s)
{
	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);

	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
}

static void push_loop(struct deadcode_state * s)
{
	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
			s->LoopStackSize, s->LoopStackReserved, 1);
	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
}

static void push_branch(struct deadcode_state * s)
{
	struct branchinfo * branch;

	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
			s->BranchStackSize, s->BranchStackReserved, 1);

	branch = &s->BranchStack[s->BranchStackSize++];
	branch->HaveElse = 0;
	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
}

static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
{
	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
		if (index >= RC_REGISTER_MAX_INDEX) {
			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
			return 0;
		}

		if (file == RC_FILE_OUTPUT)
			return &s->R.Output[index];
		else
			return &s->R.Temporary[index];
	} else if (file == RC_FILE_ADDRESS) {
		return &s->R.Address;
	} else if (file == RC_FILE_SPECIAL) {
		if (index >= RC_NUM_SPECIAL_REGISTERS) {
			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
			return 0;
		}

		return &s->R.Special[index];
	}

	return 0;
}

static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
{
	unsigned char * pused = get_used_ptr(s, file, index);
	if (pused)
		*pused |= mask;
}

static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
{
	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
	struct instruction_state * insts = &s->Instructions[inst->IP];
	unsigned int usedmask = 0;
	unsigned int srcmasks[3];

	if (opcode->HasDstReg) {
		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
		if (pused) {
			usedmask = *pused & inst->U.I.DstReg.WriteMask;
			*pused &= ~usedmask;
		}
	}

	insts->WriteMask |= usedmask;

	if (inst->U.I.WriteALUResult) {
		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
		if (pused && *pused) {
			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
				usedmask |= RC_MASK_X;
			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
				usedmask |= RC_MASK_W;

			*pused = 0;
			insts->WriteALUResult = 1;
		}
	}

	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);

	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
		unsigned int refmask = 0;
		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
		insts->SrcReg[src] |= newsrcmask;

		for(unsigned int chan = 0; chan < 4; ++chan) {
			if (GET_BIT(newsrcmask, chan))
				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
		}

		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
		refmask &= RC_MASK_XYZW;

		if (!refmask)
			continue;

		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);

		if (inst->U.I.SrcReg[src].RelAddr)
			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
	}
}

static void mark_output_use(void * data, unsigned int index, unsigned int mask)
{
	struct deadcode_state * s = data;

	mark_used(s, RC_FILE_OUTPUT, index, mask);
}

void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
{
	struct deadcode_state s;
	unsigned int nr_instructions;
	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
	unsigned int ip;

	memset(&s, 0, sizeof(s));
	s.C = c;

	nr_instructions = rc_recompute_ips(c);
	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);

	dce(c, &s, &mark_output_use);

	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
	    inst != &c->Program.Instructions;
	    inst = inst->Prev) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		switch(opcode->Opcode){
		/* Mark all sources in the loop body as used before doing
		 * normal deadcode analysis.  This is probably not optimal.
		 */
		case RC_OPCODE_ENDLOOP:
		{
			int endloops = 1;
			struct rc_instruction *ptr;
			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
					endloops--;
					continue;
				}
				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
					endloops++;
					continue;
				}
				if(opcode->HasDstReg){
					int src = 0;
					unsigned int srcmasks[3];
					rc_compute_sources_for_writemask(ptr,
						ptr->U.I.DstReg.WriteMask, srcmasks);
					for(src=0; src < opcode->NumSrcRegs; src++){
						mark_used(&s,
							ptr->U.I.SrcReg[src].File,
							ptr->U.I.SrcReg[src].Index,
							srcmasks[src]);
					}
				}
			}
			push_loop(&s);
			break;
		}
		case RC_OPCODE_BRK:
			push_break(&s);
			break;
		case RC_OPCODE_BGNLOOP:
		{
			unsigned int i;
			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
			for(i = 0; i < loop->BreakCount; i++) {
				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
			}
			break;
		}
		case RC_OPCODE_CONT:
			break;
		case RC_OPCODE_ENDIF:
			push_branch(&s);
			break;
		default:
			if (opcode->IsFlowControl && s.BranchStackSize) {
				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
				if (opcode->Opcode == RC_OPCODE_IF) {
					or_updatemasks(&s.R,
							&s.R,
							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);

					s.BranchStackSize--;
				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
					if (branch->HaveElse) {
						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
					} else {
						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
						branch->HaveElse = 1;
					}
				} else {
					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
				}
			}
		}

		update_instruction(&s, inst);
	}

	ip = 0;
	for(struct rc_instruction * inst = c->Program.Instructions.Next;
	    inst != &c->Program.Instructions;
	    inst = inst->Next, ++ip) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
		int dead = 1;
		unsigned int srcmasks[3];
		unsigned int usemask;

		if (!opcode->HasDstReg) {
			dead = 0;
		} else {
			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
			if (s.Instructions[ip].WriteMask)
				dead = 0;

			if (s.Instructions[ip].WriteALUResult)
				dead = 0;
			else
				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
		}

		if (dead) {
			struct rc_instruction * todelete = inst;
			inst = inst->Prev;
			rc_remove_instruction(todelete);
			continue;
		}

		usemask = s.Instructions[ip].WriteMask;

		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
			usemask |= RC_MASK_X;
		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
			usemask |= RC_MASK_W;

		rc_compute_sources_for_writemask(inst, usemask, srcmasks);

		for(unsigned int src = 0; src < 3; ++src) {
			for(unsigned int chan = 0; chan < 4; ++chan) {
				if (!GET_BIT(srcmasks[src], chan))
					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
			}
		}
	}

	rc_calculate_inputs_outputs(c);
}