/*
 * Copyright (C) 2010 Corbin Simpson
 * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "radeon_program_tex.h"

#include "radeon_compiler_util.h"

/* Series of transformations to be done on textures. */

static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
						int tmu)
{
	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };

	reg.File = RC_FILE_NONE;
	reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000,
				compiler->state.unit[tmu].texture_swizzle);
	return reg;
}

static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
						int tmu)
{
	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };

	reg.File = RC_FILE_NONE;
	reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
				compiler->state.unit[tmu].texture_swizzle);
	return reg;
}

static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
			    struct rc_instruction *inst,
			    unsigned state_constant)
{
	struct rc_instruction *inst_mov;

	unsigned temp = rc_find_free_temporary(&compiler->Base);

	inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);

	inst_mov->U.I.Opcode = RC_OPCODE_MUL;
	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mov->U.I.DstReg.Index = temp;
	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
	inst_mov->U.I.SrcReg[1].Index =
			rc_constants_add_state(&compiler->Base.Program.Constants,
					       state_constant, inst->U.I.TexSrcUnit);

	reset_srcreg(&inst->U.I.SrcReg[0]);
	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = temp;
}

static void projective_divide(struct r300_fragment_program_compiler *compiler,
			      struct rc_instruction *inst)
{
	struct rc_instruction *inst_mul, *inst_rcp;

	unsigned temp = rc_find_free_temporary(&compiler->Base);

	inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_rcp->U.I.DstReg.Index = temp;
	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
	inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	/* Because the input can be arbitrarily swizzled,
	 * read the component mapped to W. */
	inst_rcp->U.I.SrcReg[0].Swizzle =
		RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));

	inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
	inst_mul->U.I.DstReg.Index = temp;
	inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
	inst_mul->U.I.SrcReg[1].Index = temp;
	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;

	reset_srcreg(&inst->U.I.SrcReg[0]);
	inst->U.I.Opcode = RC_OPCODE_TEX;
	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	inst->U.I.SrcReg[0].Index = temp;
}

/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
 *  - implement texture compare (shadow extensions)
 *  - extract non-native source / destination operands
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 */
int radeonTransformTEX(
	struct radeon_compiler * c,
	struct rc_instruction * inst,
	void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;
	rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
	int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
		      compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;

	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
		inst->U.I.Opcode != RC_OPCODE_TXB &&
		inst->U.I.Opcode != RC_OPCODE_TXP &&
		inst->U.I.Opcode != RC_OPCODE_TXD &&
		inst->U.I.Opcode != RC_OPCODE_TXL &&
		inst->U.I.Opcode != RC_OPCODE_KIL)
		return 0;

	/* ARB_shadow & EXT_shadow_funcs */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;

		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
			inst->U.I.Opcode = RC_OPCODE_MOV;

			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
				inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			} else {
				inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
			}

			return 1;
		} else {
			struct rc_instruction * inst_rcp = NULL;
			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
			unsigned tmp_texsample;
			unsigned tmp_sum;
			int pass, fail;

			/* Save the output register. */
			struct rc_dst_register output_reg = inst->U.I.DstReg;
			unsigned saturate_mode = inst->U.I.SaturateMode;

			/* Redirect TEX to a new temp. */
			tmp_texsample = rc_find_free_temporary(c);
			inst->U.I.SaturateMode = 0;
			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst->U.I.DstReg.Index = tmp_texsample;
			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;

			tmp_sum = rc_find_free_temporary(c);

			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				/* Compute 1/W. */
				inst_rcp = rc_insert_new_instruction(c, inst);
				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_rcp->U.I.DstReg.Index = tmp_sum;
				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_rcp->U.I.SrcReg[0].Swizzle =
					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
			}

			/* Divide Z by W (if it's TXP) and saturate. */
			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = tmp_sum;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[0].Swizzle =
				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
				inst_mul->U.I.SrcReg[1].Index = tmp_sum;
				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
			}

			/* Add the depth texture value. */
			inst_add = rc_insert_new_instruction(c, inst_mul);
			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = tmp_sum;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[0].Index = tmp_sum;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = tmp_texsample;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;

			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
			 *   LESS:    r  < tex  <=>      -tex+r < 0
			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
			 *   GREATER: r  > tex  <=>       tex-r < 0
			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
			 *   EQUAL:   GEQUAL
			 *   NOTEQUAL:LESS
			 */

			/* This negates either r or tex: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
			else
				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;

			/* This negates the whole expresion: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
				pass = 1;
				fail = 2;
			} else {
				pass = 2;
				fail = 1;
			}

			inst_cmp = rc_insert_new_instruction(c, inst_add);
			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
			inst_cmp->U.I.SaturateMode = saturate_mode;
			inst_cmp->U.I.DstReg = output_reg;
			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
			inst_cmp->U.I.SrcReg[0].Swizzle =
					combine_swizzles(RC_SWIZZLE_WWWW,
							 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
			inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);

			assert(tmp_texsample != tmp_sum);
		}
	}

	/* R300 cannot sample from rectangles and the wrap mode fallback needs
	 * normalized coordinates anyway. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
		scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
	}

	/* Divide by W if needed. */
	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
	    (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
	     compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
		projective_divide(compiler, inst);
	}

	/* Texture wrap modes don't work on NPOT textures.
	 *
	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
	 * mirroring are not. If we need to repeat, we do:
	 *
	 * MUL temp, texcoord, <scaling factor constant>
	 * FRC temp, temp ; Discard integer portion of coords
	 *
	 * This gives us coords in [0, 1].
	 *
	 * Mirroring is trickier. We're going to start out like repeat:
	 *
	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
	 *                            ; so scale to [0, 1]
	 * FRC temp, temp ; Make the pattern repeat
	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
	 *				; The pattern is backwards, so reverse it (1-x).
	 *
	 * This gives us coords in [0, 1].
	 *
	 * ~ C & M. ;)
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    wrapmode != RC_WRAP_NONE) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		if (wrapmode == RC_WRAP_REPEAT) {
			/* Both instructions will be paired up. */
			struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
		} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
			/*
			 * Function:
			 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
			 *
			 * Code:
			 *   MUL temp, src0, 0.5
			 *   FRC temp, temp
			 *   MAD temp, temp, 2, -1
			 *   ADD temp, 1, -abs(temp)
			 */

			struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
			unsigned two, two_swizzle;

			inst_mul = rc_insert_new_instruction(c, inst->Prev);

			inst_mul->U.I.Opcode = RC_OPCODE_MUL;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = temp;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;

			inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_frc->U.I.SrcReg[0].Index = temp;
			inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;

			two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
			inst_mad = rc_insert_new_instruction(c, inst->Prev);

			inst_mad->U.I.Opcode = RC_OPCODE_MAD;
			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mad->U.I.DstReg.Index = temp;
			inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_mad->U.I.SrcReg[0].Index = temp;
			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
			inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
			inst_mad->U.I.SrcReg[1].Index = two;
			inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
			inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
			inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;

			inst_add = rc_insert_new_instruction(c, inst->Prev);

			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = temp;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = temp;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
			inst_add->U.I.SrcReg[1].Abs = 1;
			inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
		} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
			/*
			 * Mirrored clamp modes are bloody simple, we just use abs
			 * to mirror [0, 1] into [-1, 0]. This works for
			 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
			 */
			struct rc_instruction *inst_mov;

			inst_mov = rc_insert_new_instruction(c, inst->Prev);

			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = temp;
			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mov->U.I.SrcReg[0].Abs = 1;
		}

		/* Preserve W for TXP/TXB. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;
	}

	/* NPOT -> POT conversion for 3D textures. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		/* Saturate XYZ. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		/* Copy W. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;

		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
	}

	/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
	 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
		unsigned two, two_swizzle;
		struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;

		two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);

		inst_mul = rc_insert_new_instruction(c, inst);
		inst_mul->U.I.Opcode = RC_OPCODE_MUL;
		inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
		inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
		inst_mul->U.I.SrcReg[1].Index = two;
		inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;

		inst_mad = rc_insert_new_instruction(c, inst_mul);
		inst_mad->U.I.Opcode = RC_OPCODE_MAD;
		inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
		inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;

		inst_cnd = rc_insert_new_instruction(c, inst_mad);
		inst_cnd->U.I.Opcode = RC_OPCODE_CND;
		inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_cnd->U.I.DstReg = inst->U.I.DstReg;
		inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot write texture to output registers or with saturate (all chips),
	 * or with masks (non-r500). */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
		 inst->U.I.SaturateMode ||
		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_mov->U.I.DstReg = inst->U.I.DstReg;
		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot read texture coordinate from constants file */
	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
	}

	return 1;
}