/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* Copyright 2007-2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* TGSI to LLVM IR translation -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*
* Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
* Brian Paul, and others.
*/
#include "pipe/p_config.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_strings.h"
#include "lp_bld_tgsi_action.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_gather.h"
#include "lp_bld_init.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_quad.h"
#include "lp_bld_tgsi.h"
#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
#include "lp_bld_printf.h"
#include "lp_bld_sample.h"
#include "lp_bld_struct.h"
/* SM 4.0 says that subroutines can nest 32 deep and
* we need one more for our main function */
#define LP_MAX_NUM_FUNCS 33
#define DUMP_GS_EMITS 0
/*
* If non-zero, the generated LLVM IR will print intermediate results on every TGSI
* instruction.
*
* TODO:
* - take execution masks in consideration
* - debug control-flow instructions
*/
#define DEBUG_EXECUTION 0
/*
* Emit code to print a register value.
*/
static void
emit_dump_reg(struct gallivm_state *gallivm,
unsigned file,
unsigned index,
unsigned chan,
LLVMValueRef value)
{
char buf[32];
util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
tgsi_file_name(file),
index, "xyzw"[chan]);
lp_build_print_value(gallivm, buf, value);
}
/*
* Return the context for the current function.
* (always 'main', if shader doesn't do any function calls)
*/
static inline struct function_ctx *
func_ctx(struct lp_exec_mask *mask)
{
assert(mask->function_stack_size > 0);
assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
return &mask->function_stack[mask->function_stack_size - 1];
}
/*
* Returns true if we're in a loop.
* It's global, meaning that it returns true even if there's
* no loop inside the current function, but we were inside
* a loop inside another function, from which this one was called.
*/
static inline boolean
mask_has_loop(struct lp_exec_mask *mask)
{
int i;
for (i = mask->function_stack_size - 1; i >= 0; --i) {
const struct function_ctx *ctx = &mask->function_stack[i];
if (ctx->loop_stack_size > 0)
return TRUE;
}
return FALSE;
}
/*
* Returns true if we're inside a switch statement.
* It's global, meaning that it returns true even if there's
* no switch in the current function, but we were inside
* a switch inside another function, from which this one was called.
*/
static inline boolean
mask_has_switch(struct lp_exec_mask *mask)
{
int i;
for (i = mask->function_stack_size - 1; i >= 0; --i) {
const struct function_ctx *ctx = &mask->function_stack[i];
if (ctx->switch_stack_size > 0)
return TRUE;
}
return FALSE;
}
/*
* Returns true if we're inside a conditional.
* It's global, meaning that it returns true even if there's
* no conditional in the current function, but we were inside
* a conditional inside another function, from which this one was called.
*/
static inline boolean
mask_has_cond(struct lp_exec_mask *mask)
{
int i;
for (i = mask->function_stack_size - 1; i >= 0; --i) {
const struct function_ctx *ctx = &mask->function_stack[i];
if (ctx->cond_stack_size > 0)
return TRUE;
}
return FALSE;
}
/*
* Initialize a function context at the specified index.
*/
static void
lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
{
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = &mask->function_stack[function_idx];
ctx->cond_stack_size = 0;
ctx->loop_stack_size = 0;
ctx->switch_stack_size = 0;
if (function_idx == 0) {
ctx->ret_mask = mask->ret_mask;
}
ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
int_type, "looplimiter");
LLVMBuildStore(
builder,
LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
ctx->loop_limiter);
}
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
{
mask->bld = bld;
mask->has_mask = FALSE;
mask->ret_in_main = FALSE;
/* For the main function */
mask->function_stack_size = 1;
mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
mask->cond_mask = mask->switch_mask =
LLVMConstAllOnes(mask->int_vec_type);
mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
sizeof(mask->function_stack[0]));
lp_exec_mask_function_init(mask, 0);
}
static void
lp_exec_mask_fini(struct lp_exec_mask *mask)
{
FREE(mask->function_stack);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
boolean has_loop_mask = mask_has_loop(mask);
boolean has_cond_mask = mask_has_cond(mask);
boolean has_switch_mask = mask_has_switch(mask);
boolean has_ret_mask = mask->function_stack_size > 1 ||
mask->ret_in_main;
if (has_loop_mask) {
/*for loops we need to update the entire mask at runtime */
LLVMValueRef tmp;
assert(mask->break_mask);
tmp = LLVMBuildAnd(builder,
mask->cont_mask,
mask->break_mask,
"maskcb");
mask->exec_mask = LLVMBuildAnd(builder,
mask->cond_mask,
tmp,
"maskfull");
} else
mask->exec_mask = mask->cond_mask;
if (has_switch_mask) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
mask->switch_mask,
"switchmask");
}
if (has_ret_mask) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
mask->ret_mask,
"callmask");
}
mask->has_mask = (has_cond_mask ||
has_loop_mask ||
has_switch_mask ||
has_ret_mask);
}
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
LLVMValueRef val)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
ctx->cond_stack_size++;
return;
}
if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
}
ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
mask->cond_mask = LLVMBuildAnd(builder,
mask->cond_mask,
val,
"");
lp_exec_mask_update(mask);
}
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef prev_mask;
LLVMValueRef inv_mask;
assert(ctx->cond_stack_size);
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
return;
prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
}
inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
mask->cond_mask = LLVMBuildAnd(builder,
inv_mask,
prev_mask, "");
lp_exec_mask_update(mask);
}
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
{
struct function_ctx *ctx = func_ctx(mask);
assert(ctx->cond_stack_size);
--ctx->cond_stack_size;
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
return;
mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
++ctx->loop_stack_size;
return;
}
ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
ctx->break_type;
ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
++ctx->loop_stack_size;
ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
LLVMBuildBr(builder, ctx->loop_block);
LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
lp_exec_mask_update(mask);
}
static void lp_exec_break(struct lp_exec_mask *mask,
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
LLVMValueRef exec_mask = LLVMBuildNot(builder,
mask->exec_mask,
"break");
mask->break_mask = LLVMBuildAnd(builder,
mask->break_mask,
exec_mask, "break_full");
}
else {
unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
opcode == TGSI_OPCODE_CASE);
if (ctx->switch_in_default) {
/*
* stop default execution but only if this is an unconditional switch.
* (The condition here is not perfect since dead code after break is
* allowed but should be sufficient since false negatives are just
* unoptimized - so we don't have to pre-evaluate that).
*/
if(break_always && ctx->switch_pc) {
bld_base->pc = ctx->switch_pc;
return;
}
}
if (break_always) {
mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
}
else {
LLVMValueRef exec_mask = LLVMBuildNot(builder,
mask->exec_mask,
"break");
mask->switch_mask = LLVMBuildAnd(builder,
mask->switch_mask,
exec_mask, "break_switch");
}
}
lp_exec_mask_update(mask);
}
static void lp_exec_continue(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
LLVMValueRef exec_mask = LLVMBuildNot(builder,
mask->exec_mask,
"");
mask->cont_mask = LLVMBuildAnd(builder,
mask->cont_mask,
exec_mask, "");
lp_exec_mask_update(mask);
}
static void lp_exec_endloop(struct gallivm_state *gallivm,
struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
LLVMBasicBlockRef endloop;
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
mask->bld->type.width *
mask->bld->type.length);
LLVMValueRef i1cond, i2cond, icond, limiter;
assert(mask->break_mask);
assert(ctx->loop_stack_size);
if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
--ctx->loop_stack_size;
return;
}
/*
* Restore the cont_mask, but don't pop
*/
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
lp_exec_mask_update(mask);
/*
* Unlike the continue mask, the break_mask must be preserved across loop
* iterations
*/
LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
/* Decrement the loop limiter */
limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
limiter = LLVMBuildSub(
builder,
limiter,
LLVMConstInt(int_type, 1, false),
"");
LLVMBuildStore(builder, limiter, ctx->loop_limiter);
/* i1cond = (mask != 0) */
i1cond = LLVMBuildICmp(
builder,
LLVMIntNE,
LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
LLVMConstNull(reg_type), "i1cond");
/* i2cond = (looplimiter > 0) */
i2cond = LLVMBuildICmp(
builder,
LLVMIntSGT,
limiter,
LLVMConstNull(int_type), "i2cond");
/* if( i1cond && i2cond ) */
icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
LLVMBuildCondBr(builder,
icond, ctx->loop_block, endloop);
LLVMPositionBuilderAtEnd(builder, endloop);
assert(ctx->loop_stack_size);
--ctx->loop_stack_size;
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
ctx->switch_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_switch(struct lp_exec_mask *mask,
LLVMValueRef switchval)
{
struct function_ctx *ctx = func_ctx(mask);
if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
ctx->switch_stack_size++;
return;
}
ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
ctx->break_type;
ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
ctx->switch_stack_size++;
mask->switch_mask = LLVMConstNull(mask->int_vec_type);
ctx->switch_val = switchval;
ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
ctx->switch_in_default = false;
ctx->switch_pc = 0;
lp_exec_mask_update(mask);
}
static void lp_exec_endswitch(struct lp_exec_mask *mask,
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
ctx->switch_stack_size--;
return;
}
/* check if there's deferred default if so do it now */
if (ctx->switch_pc && !ctx->switch_in_default) {
LLVMValueRef prevmask, defaultmask;
unsigned tmp_pc;
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
ctx->switch_in_default = true;
lp_exec_mask_update(mask);
assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
TGSI_OPCODE_DEFAULT);
tmp_pc = bld_base->pc;
bld_base->pc = ctx->switch_pc;
/*
* re-purpose switch_pc to point to here again, since we stop execution of
* the deferred default after next break.
*/
ctx->switch_pc = tmp_pc - 1;
return;
}
else if (ctx->switch_pc && ctx->switch_in_default) {
assert(bld_base->pc == ctx->switch_pc + 1);
}
ctx->switch_stack_size--;
mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_case(struct lp_exec_mask *mask,
LLVMValueRef caseval)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef casemask, prevmask;
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
return;
}
/* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
if (!ctx->switch_in_default) {
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
ctx->switch_mask_default, "sw_default_mask");
casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
lp_exec_mask_update(mask);
}
}
/*
* Analyse default statement in a switch.
* \return true if default is last statement, false otherwise
* \param default_pc_start contains pc of instruction to jump to
* if default wasn't last but there's no
* fallthrough into default.
*/
static boolean default_analyse_is_last(struct lp_exec_mask *mask,
struct lp_build_tgsi_context * bld_base,
int *default_pc_start)
{
unsigned pc = bld_base->pc;
struct function_ctx *ctx = func_ctx(mask);
int curr_switch_stack = ctx->switch_stack_size;
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
return false;
}
/* skip over case statements which are together with default */
while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
pc++;
}
while (pc != ~0u && pc < bld_base->num_instructions) {
unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
switch (opcode) {
case TGSI_OPCODE_CASE:
if (curr_switch_stack == ctx->switch_stack_size) {
*default_pc_start = pc - 1;
return false;
}
break;
case TGSI_OPCODE_SWITCH:
curr_switch_stack++;
break;
case TGSI_OPCODE_ENDSWITCH:
if (curr_switch_stack == ctx->switch_stack_size) {
*default_pc_start = pc - 1;
return true;
}
curr_switch_stack--;
break;
}
pc++;
}
/* should never arrive here */
assert(0);
return true;
}
static void lp_exec_default(struct lp_exec_mask *mask,
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
int default_exec_pc;
boolean default_is_last;
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
return;
}
/*
* This is a messy opcode, because it may not be always at the end and
* there can be fallthrough in and out of it.
*/
default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
/*
* If it is last statement in switch (note that case statements appearing
* "at the same time" as default don't change that) everything is just fine,
* update switch mask and go on. This means we can handle default with
* fallthrough INTO it without overhead, if it is last.
*/
if (default_is_last) {
LLVMValueRef prevmask, defaultmask;
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
ctx->switch_in_default = true;
lp_exec_mask_update(mask);
}
else {
/*
* Technically, "case" immediately before default isn't really a
* fallthrough, however we still have to count them as such as we
* already have updated the masks.
* If that happens in practice could add a switch optimizer pass
* which just gets rid of all case statements appearing together with
* default (or could do switch analysis at switch start time instead).
*/
unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
opcode != TGSI_OPCODE_SWITCH);
/*
* If it is not last statement and there was no fallthrough into it,
* we record the PC and continue execution at next case (again, those
* case encountered at the same time don't count). At endswitch
* time, we update switchmask, and go back executing the code we skipped
* until the next break (possibly re-executing some code with changed mask
* if there was a fallthrough out of default).
* Finally, if it is not last statement and there was a fallthrough into it,
* do the same as with the former case, except instead of skipping the code
* just execute it without updating the mask, then go back and re-execute.
*/
ctx->switch_pc = bld_base->pc;
if (!ft_into) {
bld_base->pc = default_exec_pc;
}
}
}
/* stores val into an address pointed to by dst_ptr.
* mask->exec_mask is used to figure out which bits of val
* should be stored into the address
* (0 means don't store this bit, 1 means do store).
*/
static void lp_exec_mask_store(struct lp_exec_mask *mask,
struct lp_build_context *bld_store,
LLVMValueRef val,
LLVMValueRef dst_ptr)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
assert(lp_check_value(bld_store->type, val));
assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
if (exec_mask) {
LLVMValueRef res, dst;
dst = LLVMBuildLoad(builder, dst_ptr, "");
res = lp_build_select(bld_store, exec_mask, val, dst);
LLVMBuildStore(builder, res, dst_ptr);
} else
LLVMBuildStore(builder, val, dst_ptr);
}
static void lp_exec_mask_call(struct lp_exec_mask *mask,
int func,
int *pc)
{
if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
return;
}
lp_exec_mask_function_init(mask, mask->function_stack_size);
mask->function_stack[mask->function_stack_size].pc = *pc;
mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
mask->function_stack_size++;
*pc = func;
}
static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef exec_mask;
if (ctx->cond_stack_size == 0 &&
ctx->loop_stack_size == 0 &&
ctx->switch_stack_size == 0 &&
mask->function_stack_size == 1) {
/* returning from main() */
*pc = -1;
return;
}
if (mask->function_stack_size == 1) {
/*
* This requires special handling since we need to ensure
* we don't drop the mask even if we have no call stack
* (e.g. after a ret in a if clause after the endif)
*/
mask->ret_in_main = TRUE;
}
exec_mask = LLVMBuildNot(builder,
mask->exec_mask,
"ret");
mask->ret_mask = LLVMBuildAnd(builder,
mask->ret_mask,
exec_mask, "ret_full");
lp_exec_mask_update(mask);
}
static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
{
}
static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
{
struct function_ctx *ctx;
assert(mask->function_stack_size > 1);
assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
ctx = func_ctx(mask);
mask->function_stack_size--;
*pc = ctx->pc;
mask->ret_mask = ctx->ret_mask;
lp_exec_mask_update(mask);
}
static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned file,
int index,
unsigned chan)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
LLVMValueRef var_of_array;
switch (file) {
case TGSI_FILE_TEMPORARY:
array_of_vars = bld->temps;
var_of_array = bld->temps_array;
break;
case TGSI_FILE_OUTPUT:
array_of_vars = bld->outputs;
var_of_array = bld->outputs_array;
break;
default:
assert(0);
return NULL;
}
assert(chan < 4);
if (bld->indirect_files & (1 << file)) {
LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
}
else {
assert(index <= bld->bld_base.info->file_max[file]);
return array_of_vars[index][chan];
}
}
/**
* Return pointer to a temporary register channel (src or dest).
* Note that indirect addressing cannot be handled here.
* \param index which temporary register
* \param chan which channel of the temp register.
*/
LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
unsigned index,
unsigned chan)
{
return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
}
/**
* Return pointer to a output register channel (src or dest).
* Note that indirect addressing cannot be handled here.
* \param index which output register
* \param chan which channel of the output register.
*/
LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
unsigned chan)
{
return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
}
/*
* If we have indirect addressing in outputs copy our alloca array
* to the outputs slots specified by the caller to make sure
* our outputs are delivered consistently via the same interface.
*/
static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)
{
if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
unsigned index, chan;
assert(bld->bld_base.info->num_outputs <=
bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
}
}
}
}
/**
* Gather vector.
* XXX the lp_build_gather() function should be capable of doing this
* with a little work.
*/
static LLVMValueRef
build_gather(struct lp_build_tgsi_context *bld_base,
LLVMValueRef base_ptr,
LLVMValueRef indexes,
LLVMValueRef overflow_mask,
LLVMValueRef indexes2)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
struct lp_build_context *bld = &bld_base->base;
LLVMValueRef res;
unsigned i;
if (indexes2)
res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
else
res = bld->undef;
/*
* overflow_mask is a vector telling us which channels
* in the vector overflowed. We use the overflow behavior for
* constant buffers which is defined as:
* Out of bounds access to constant buffer returns 0 in all
* components. Out of bounds behavior is always with respect
* to the size of the buffer bound at that slot.
*/
if (overflow_mask) {
/*
* We avoid per-element control flow here (also due to llvm going crazy,
* though I suspect it's better anyway since overflow is likely rare).
* Note that since we still fetch from buffers even if num_elements was
* zero (in this case we'll fetch from index zero) the jit func callers
* MUST provide valid fake constant buffers of size 4x32 (the values do
* not matter), otherwise we'd still need (not per element though)
* control flow.
*/
indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
if (indexes2)
indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
}
/*
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
*/
for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
LLVMValueRef si, di;
LLVMValueRef index;
LLVMValueRef scalar_ptr, scalar;
di = lp_build_const_int32(bld->gallivm, i);
if (indexes2)
si = lp_build_const_int32(bld->gallivm, i >> 1);
else
si = di;
if (indexes2 && (i & 1)) {
index = LLVMBuildExtractElement(builder,
indexes2, si, "");
} else {
index = LLVMBuildExtractElement(builder,
indexes, si, "");
}
scalar_ptr = LLVMBuildGEP(builder, base_ptr,
&index, 1, "gather_ptr");
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
res = LLVMBuildInsertElement(builder, res, scalar, di, "");
}
if (overflow_mask) {
if (indexes2) {
res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
overflow_mask = LLVMBuildSExt(builder, overflow_mask,
bld_base->dbl_bld.int_vec_type, "");
res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
bld_base->dbl_bld.zero, res);
} else
res = lp_build_select(bld, overflow_mask, bld->zero, res);
}
return res;
}
/**
* Scatter/store vector.
*/
static void
emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
LLVMValueRef base_ptr,
LLVMValueRef indexes,
LLVMValueRef values,
struct lp_exec_mask *mask)
{
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned i;
LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
/*
* Loop over elements of index_vec, store scalar value.
*/
for (i = 0; i < bld->bld_base.base.type.length; i++) {
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
LLVMValueRef scalar_pred = pred ?
LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
if (0)
lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
ii, val, index, scalar_ptr);
if (scalar_pred) {
LLVMValueRef real_val, dst_val;
dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
LLVMBuildStore(builder, real_val, scalar_ptr);
}
else {
LLVMBuildStore(builder, val, scalar_ptr);
}
}
}
/**
* Read the current value of the ADDR register, convert the floats to
* ints, add the base index and return the vector of offsets.
* The offsets will be used to index into the constant buffer or
* temporary register file.
*/
static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context *bld,
unsigned reg_file, unsigned reg_index,
const struct tgsi_ind_register *indirect_reg)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
/* always use X component of address register */
unsigned swizzle = indirect_reg->Swizzle;
LLVMValueRef base;
LLVMValueRef rel;
LLVMValueRef max_index;
LLVMValueRef index;
assert(bld->indirect_files & (1 << reg_file));
base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
assert(swizzle < 4);
switch (indirect_reg->File) {
case TGSI_FILE_ADDRESS:
rel = LLVMBuildLoad(builder,
bld->addr[indirect_reg->Index][swizzle],
"load addr reg");
/* ADDR LLVM values already have LLVM integer type. */
break;
case TGSI_FILE_TEMPORARY:
rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
rel = LLVMBuildLoad(builder, rel, "load temp reg");
/* TEMP LLVM values always have LLVM float type, but for indirection, the
* value actually stored is expected to be an integer */
rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
break;
default:
assert(0);
rel = uint_bld->zero;
}
index = lp_build_add(uint_bld, base, rel);
/*
* emit_fetch_constant handles constant buffer overflow so this code
* is pointless for them.
* Furthermore the D3D10 spec in section 6.5 says:
* If the constant buffer bound to a slot is larger than the size
* declared in the shader for that slot, implementations are allowed
* to return incorrect data (not necessarily 0) for indices that are
* larger than the declared size but smaller than the buffer size.
*/
if (reg_file != TGSI_FILE_CONSTANT) {
max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
uint_bld->type,
bld->bld_base.info->file_max[reg_file]);
assert(!uint_bld->type.sign);
index = lp_build_min(uint_bld, index, max_index);
}
return index;
}
static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,
enum tgsi_opcode_type stype)
{
struct lp_build_context *bld_fetch;
switch (stype) {
case TGSI_TYPE_FLOAT:
case TGSI_TYPE_UNTYPED:
bld_fetch = &bld_base->base;
break;
case TGSI_TYPE_UNSIGNED:
bld_fetch = &bld_base->uint_bld;
break;
case TGSI_TYPE_SIGNED:
bld_fetch = &bld_base->int_bld;
break;
case TGSI_TYPE_DOUBLE:
bld_fetch = &bld_base->dbl_bld;
break;
case TGSI_TYPE_UNSIGNED64:
bld_fetch = &bld_base->uint64_bld;
break;
case TGSI_TYPE_SIGNED64:
bld_fetch = &bld_base->int64_bld;
break;
case TGSI_TYPE_VOID:
default:
assert(0);
bld_fetch = NULL;
break;
}
return bld_fetch;
}
static LLVMValueRef
get_soa_array_offsets(struct lp_build_context *uint_bld,
LLVMValueRef indirect_index,
unsigned chan_index,
boolean need_perelement_offset)
{
struct gallivm_state *gallivm = uint_bld->gallivm;
LLVMValueRef chan_vec =
lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
LLVMValueRef length_vec =
lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
LLVMValueRef index_vec;
/* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
if (need_perelement_offset) {
LLVMValueRef pixel_offsets;
unsigned i;
/* build pixel offset vector: {0, 1, 2, 3, ...} */
pixel_offsets = uint_bld->undef;
for (i = 0; i < uint_bld->type.length; i++) {
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
ii, ii, "");
}
index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
}
return index_vec;
}
static LLVMValueRef
emit_fetch_constant(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
unsigned dimension = 0;
LLVMValueRef consts_ptr;
LLVMValueRef num_consts;
LLVMValueRef res;
/* XXX: Handle fetching xyzw components as a vector */
assert(swizzle != ~0u);
if (reg->Register.Dimension) {
assert(!reg->Dimension.Indirect);
dimension = reg->Dimension.Index;
assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
}
consts_ptr = bld->consts[dimension];
num_consts = bld->consts_sizes[dimension];
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef swizzle_vec =
lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
LLVMValueRef index_vec; /* index into the const buffer */
LLVMValueRef overflow_mask;
LLVMValueRef index_vec2 = NULL;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
/* All fetches are from the same constant buffer, so
* we need to propagate the size to a vector to do a
* vector comparison */
num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
/* Construct a boolean vector telling us which channels
* overflow the bound constant buffer */
overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
indirect_index, num_consts);
/* index_vec = indirect_index * 4 + swizzle */
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef swizzle_vec2;
swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
}
/* Gather values from the constant buffer */
res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
}
else {
LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
struct lp_build_context *bld_broad = &bld_base->base;
index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
&index, 1, "");
if (stype == TGSI_TYPE_DOUBLE) {
LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
bld_broad = &bld_base->dbl_bld;
} else if (stype == TGSI_TYPE_UNSIGNED64) {
LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
bld_broad = &bld_base->uint64_bld;
} else if (stype == TGSI_TYPE_SIGNED64) {
LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
bld_broad = &bld_base->int64_bld;
}
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
res = lp_build_broadcast_scalar(bld_broad, scalar);
}
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
}
/**
* Fetch 64-bit values from two separate channels.
* 64-bit values are stored split across two channels, like xy and zw.
* This function creates a set of vec_length*2 floats,
* extracts the values from the two channels,
* puts them in the correct place, then casts to vec_length 64-bits.
*/
static LLVMValueRef
emit_fetch_64bit(
struct lp_build_tgsi_context * bld_base,
enum tgsi_opcode_type stype,
LLVMValueRef input,
LLVMValueRef input2)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
int i;
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
int len = bld_base->base.type.length * 2;
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
}
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
static LLVMValueRef
emit_fetch_immediate(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res = NULL;
if (bld->use_immediates_array || reg->Register.Indirect) {
LLVMValueRef imms_array;
LLVMTypeRef fptr_type;
/* cast imms_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the immediate register array */
LLVMValueRef index_vec2 = NULL;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
/*
* Unlike for other reg classes, adding pixel offsets is unnecessary -
* immediates are stored as full vectors (FIXME??? - might be better
* to store them the same as constants) but all elements are the same
* in any case.
*/
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle,
FALSE);
if (tgsi_type_is_64bit(stype))
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle + 1,
FALSE);
/* Gather values from the immediate register array */
res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
} else {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
res = LLVMBuildLoad(builder, imms_ptr, "");
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef lindex1;
LLVMValueRef imms_ptr2;
LLVMValueRef res2;
lindex1 = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle + 1);
imms_ptr2 = LLVMBuildGEP(builder,
bld->imms_array, &lindex1, 1, "");
res2 = LLVMBuildLoad(builder, imms_ptr2, "");
res = emit_fetch_64bit(bld_base, stype, res, res2);
}
}
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
if (tgsi_type_is_64bit(stype))
res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
}
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
}
static LLVMValueRef
emit_fetch_input(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the input reg array */
LLVMValueRef index_vec2 = NULL;
LLVMValueRef inputs_array;
LLVMTypeRef fptr_type;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle,
TRUE);
if (tgsi_type_is_64bit(stype)) {
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle + 1,
TRUE);
}
/* cast inputs_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
/* Gather values from the input register array */
res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
} else {
if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
LLVMValueRef input_ptr = LLVMBuildGEP(builder,
bld->inputs_array, &lindex, 1, "");
res = LLVMBuildLoad(builder, input_ptr, "");
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef lindex1;
LLVMValueRef input_ptr2;
LLVMValueRef res2;
lindex1 = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle + 1);
input_ptr2 = LLVMBuildGEP(builder,
bld->inputs_array, &lindex1, 1, "");
res2 = LLVMBuildLoad(builder, input_ptr2, "");
res = emit_fetch_64bit(bld_base, stype, res, res2);
}
}
else {
res = bld->inputs[reg->Register.Index][swizzle];
if (tgsi_type_is_64bit(stype))
res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
}
}
assert(res);
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
}
static LLVMValueRef
emit_fetch_gs_input(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
const struct tgsi_shader_info *info = bld->bld_base.info;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef attrib_index = NULL;
LLVMValueRef vertex_index = NULL;
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
LLVMValueRef res;
if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
/* This is really a system value not a regular input */
assert(!reg->Register.Indirect);
assert(!reg->Dimension.Indirect);
res = bld->system_values.prim_id;
if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
}
return res;
}
if (reg->Register.Indirect) {
attrib_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
} else {
attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
}
if (reg->Dimension.Indirect) {
vertex_index = get_indirect_index(bld,
reg->Register.File,
reg->Dimension.Index,
®->DimIndirect);
} else {
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
}
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
reg->Dimension.Indirect,
vertex_index,
reg->Register.Indirect,
attrib_index,
swizzle_index);
assert(res);
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
LLVMValueRef res2;
res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
reg->Dimension.Indirect,
vertex_index,
reg->Register.Indirect,
attrib_index,
swizzle_index);
assert(res2);
res = emit_fetch_64bit(bld_base, stype, res, res2);
} else if (stype == TGSI_TYPE_UNSIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
}
return res;
}
static LLVMValueRef
emit_fetch_temporary(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
LLVMValueRef temps_array;
LLVMTypeRef fptr_type;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle,
TRUE);
if (tgsi_type_is_64bit(stype)) {
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle + 1,
TRUE);
}
/* cast temps_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
/* Gather values from the temporary register array */
res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
}
else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(builder, temp_ptr, "");
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef temp_ptr2, res2;
temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
res2 = LLVMBuildLoad(builder, temp_ptr2, "");
res = emit_fetch_64bit(bld_base, stype, res, res2);
}
}
if (stype == TGSI_TYPE_SIGNED ||
stype == TGSI_TYPE_UNSIGNED ||
stype == TGSI_TYPE_DOUBLE ||
stype == TGSI_TYPE_SIGNED64 ||
stype == TGSI_TYPE_UNSIGNED64) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
}
static LLVMValueRef
emit_fetch_system_value(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
const struct tgsi_shader_info *info = bld->bld_base.info;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
enum tgsi_opcode_type atype; // Actual type of the value
assert(!reg->Register.Indirect);
switch (info->system_value_semantic_name[reg->Register.Index]) {
case TGSI_SEMANTIC_INSTANCEID:
res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
atype = TGSI_TYPE_UNSIGNED;
break;
case TGSI_SEMANTIC_VERTEXID:
res = bld->system_values.vertex_id;
atype = TGSI_TYPE_UNSIGNED;
break;
case TGSI_SEMANTIC_VERTEXID_NOBASE:
res = bld->system_values.vertex_id_nobase;
atype = TGSI_TYPE_UNSIGNED;
break;
case TGSI_SEMANTIC_BASEVERTEX:
res = bld->system_values.basevertex;
atype = TGSI_TYPE_UNSIGNED;
break;
case TGSI_SEMANTIC_PRIMID:
res = bld->system_values.prim_id;
atype = TGSI_TYPE_UNSIGNED;
break;
case TGSI_SEMANTIC_INVOCATIONID:
res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
atype = TGSI_TYPE_UNSIGNED;
break;
default:
assert(!"unexpected semantic in emit_fetch_system_value");
res = bld_base->base.zero;
atype = TGSI_TYPE_FLOAT;
break;
}
if (atype != stype) {
if (stype == TGSI_TYPE_FLOAT) {
res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
} else if (stype == TGSI_TYPE_UNSIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
}
}
return res;
}
/**
* Register fetch with derivatives.
*/
static void
emit_fetch_deriv(
struct lp_build_tgsi_soa_context *bld,
LLVMValueRef src,
LLVMValueRef *res,
LLVMValueRef *ddx,
LLVMValueRef *ddy)
{
if (res)
*res = src;
/* TODO: use interpolation coeffs for inputs */
if (ddx)
*ddx = lp_build_ddx(&bld->bld_base.base, src);
if (ddy)
*ddy = lp_build_ddy(&bld->bld_base.base, src);
}
/**
* store an array of vec-length 64-bit into two arrays of vec_length floats
* i.e.
* value is d0, d1, d2, d3 etc.
* each 64-bit has high and low pieces x, y
* so gets stored into the separate channels as:
* chan_ptr = d0.x, d1.x, d2.x, d3.x
* chan_ptr2 = d0.y, d1.y, d2.y, d3.y
*/
static void
emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
LLVMValueRef value)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *float_bld = &bld_base->base;
unsigned i;
LLVMValueRef temp, temp2;
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
for (i = 0; i < bld_base->base.type.length; i++) {
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
}
temp = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles,
bld_base->base.type.length),
"");
temp2 = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles2,
bld_base->base.type.length),
"");
lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
}
/**
* Register store.
*/
static void
emit_store_chan(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
LLVMValueRef value)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
struct lp_build_context *float_bld = &bld_base->base;
struct lp_build_context *int_bld = &bld_base->int_bld;
LLVMValueRef indirect_index = NULL;
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
/*
* Apply saturation.
*
* It is always assumed to be float.
*/
if (inst->Instruction.Saturate) {
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
value = lp_build_clamp_zero_one_nanzero(float_bld, value);
}
if (reg->Register.Indirect) {
/*
* Currently the mesa/st doesn't generate indirect stores
* to 64-bit values, it normally uses MOV to do indirect stores.
*/
assert(!tgsi_type_is_64bit(dtype));
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
} else {
assert(reg->Register.Index <=
bld_base->info->file_max[reg->Register.File]);
}
if (DEBUG_EXECUTION) {
emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
}
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
/* Outputs are always stored as floats */
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
if (reg->Register.Indirect) {
LLVMValueRef index_vec; /* indexes into the output registers */
LLVMValueRef outputs_array;
LLVMTypeRef fptr_type;
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
chan_index,
TRUE);
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
/* Scatter store values into output registers */
emit_mask_scatter(bld, outputs_array, index_vec, value,
&bld->exec_mask);
}
else {
LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
chan_index);
if (tgsi_type_is_64bit(dtype)) {
LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
chan_index + 1);
emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
value);
} else
lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
}
break;
case TGSI_FILE_TEMPORARY:
/* Temporaries are always stored as floats */
if (!tgsi_type_is_64bit(dtype))
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
else
value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
if (reg->Register.Indirect) {
LLVMValueRef index_vec; /* indexes into the temp registers */
LLVMValueRef temps_array;
LLVMTypeRef fptr_type;
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
chan_index,
TRUE);
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
/* Scatter store values into temp registers */
emit_mask_scatter(bld, temps_array, index_vec, value,
&bld->exec_mask);
}
else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
if (tgsi_type_is_64bit(dtype)) {
LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
reg->Register.Index,
chan_index + 1);
emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
value);
}
else
lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
}
break;
case TGSI_FILE_ADDRESS:
assert(dtype == TGSI_TYPE_SIGNED);
assert(LLVMTypeOf(value) == int_bld->vec_type);
value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
lp_exec_mask_store(&bld->exec_mask, int_bld, value,
bld->addr[reg->Register.Index][chan_index]);
break;
default:
assert( 0 );
}
(void)dtype;
}
/*
* Called at the beginning of the translation of each TGSI instruction, to
* emit some debug code.
*/
static void
emit_debug(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
if (DEBUG_EXECUTION) {
/*
* Dump the TGSI instruction.
*/
struct gallivm_state *gallivm = bld_base->base.gallivm;
char buf[512];
buf[0] = '$';
buf[1] = ' ';
tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
lp_build_printf(gallivm, buf);
/* Dump the execution mask.
*/
if (bld->exec_mask.has_mask) {
lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
}
}
}
static void
emit_store(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info,
unsigned index,
LLVMValueRef dst[4])
{
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
unsigned writemask = inst->Dst[index].Register.WriteMask;
while (writemask) {
unsigned chan_index = u_bit_scan(&writemask);
if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
continue;
emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
}
}
static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)
{
switch (tgsi_target) {
case TGSI_TEXTURE_BUFFER:
return PIPE_BUFFER;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
return PIPE_TEXTURE_1D;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_2D_MSAA:
return PIPE_TEXTURE_2D;
case TGSI_TEXTURE_3D:
return PIPE_TEXTURE_3D;
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
return PIPE_TEXTURE_CUBE;
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
return PIPE_TEXTURE_RECT;
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
return PIPE_TEXTURE_1D_ARRAY;
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
return PIPE_TEXTURE_2D_ARRAY;
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
return PIPE_TEXTURE_CUBE_ARRAY;
default:
assert(0);
return PIPE_BUFFER;
}
}
static enum lp_sampler_lod_property
lp_build_lod_property(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned src_op)
{
const struct tgsi_full_src_register *reg = &inst->Src[src_op];
enum lp_sampler_lod_property lod_property;
/*
* Not much we can do here. We could try catching inputs declared
* with constant interpolation but not sure it's worth it - since for
* TEX opcodes as well as FETCH/LD the lod comes from same reg as
* the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
* like the constant/immediate recognition below.
* What seems to be of more value would be to recognize temps holding
* broadcasted scalars but no way we can do it.
* Tried asking llvm but without any success (using LLVMIsConstant
* even though this isn't exactly what we'd need), even as simple as
* IMM[0] UINT32 (0,-1,0,0)
* MOV TEMP[0] IMM[0].yyyy
* SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
* doesn't work.
* This means there's ZERO chance this will ever catch a scalar lod
* with traditional tex opcodes as well as texel fetches, since the lod
* comes from the same reg as coords (except some test shaders using
* constant coords maybe).
* There's at least hope for sample opcodes as well as size queries.
*/
if (reg->Register.File == TGSI_FILE_CONSTANT ||
reg->Register.File == TGSI_FILE_IMMEDIATE) {
lod_property = LP_SAMPLER_LOD_SCALAR;
}
else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
lod_property = LP_SAMPLER_LOD_PER_QUAD;
}
}
else {
/* never use scalar (per-quad) lod the results are just too wrong. */
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
return lod_property;
}
/**
* High-level instruction translators.
*/
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
LLVMValueRef *texel,
unsigned sampler_reg,
enum lp_sampler_op_type sampler_op)
{
unsigned unit = inst->Src[sampler_reg].Register.Index;
LLVMValueRef oow = NULL;
LLVMValueRef lod = NULL;
LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
struct lp_sampler_params params;
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
unsigned num_derivs, num_offsets, i;
unsigned shadow_coord = 0;
unsigned layer_coord = 0;
unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
texel[i] = bld->bld_base.base.undef;
}
return;
}
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D_ARRAY:
layer_coord = 1;
/* fallthrough */
case TGSI_TEXTURE_1D:
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_2D_ARRAY:
layer_coord = 2;
/* fallthrough */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_SHADOW1D_ARRAY:
layer_coord = 1;
/* fallthrough */
case TGSI_TEXTURE_SHADOW1D:
shadow_coord = 2;
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_SHADOW2D_ARRAY:
layer_coord = 2;
shadow_coord = 3;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
shadow_coord = 2;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_CUBE:
num_offsets = 2;
num_derivs = 3;
break;
case TGSI_TEXTURE_3D:
num_offsets = 3;
num_derivs = 3;
break;
case TGSI_TEXTURE_SHADOWCUBE:
shadow_coord = 3;
num_offsets = 2;
num_derivs = 3;
break;
case TGSI_TEXTURE_CUBE_ARRAY:
num_offsets = 2;
num_derivs = 3;
layer_coord = 3;
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
num_offsets = 2;
num_derivs = 3;
layer_coord = 3;
shadow_coord = 4; /* shadow coord special different reg */
break;
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
default:
assert(0);
return;
}
/* Note lod and especially projected are illegal in a LOT of cases */
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
/* note that shadow cube array with bias/explicit lod does not exist */
lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
}
else {
lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
}
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
}
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
}
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
oow = lp_build_rcp(&bld->bld_base.base, oow);
}
for (i = 0; i < num_derivs; i++) {
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
}
for (i = num_derivs; i < 5; i++) {
coords[i] = bld->bld_base.base.undef;
}
/* Layer coord always goes into 3rd slot, except for cube map arrays */
if (layer_coord) {
if (layer_coord == 3) {
coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
}
else {
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
}
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
}
/* Shadow coord occupies always 5th slot. */
if (shadow_coord) {
sample_key |= LP_SAMPLER_SHADOW;
if (shadow_coord == 4) {
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
}
else {
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
}
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
unsigned dim;
sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
for (dim = 0; dim < num_derivs; ++dim) {
derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
}
params.derivs = &derivs;
/*
* could also check all src regs if constant but I doubt such
* cases exist in practice.
*/
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
lod_property = LP_SAMPLER_LOD_PER_QUAD;
}
}
else {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
}
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
/* we don't handle the 4 offset version of tg4 */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < num_offsets; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
params.type = bld->bld_base.base.type;
params.sample_key = sample_key;
params.texture_index = unit;
params.sampler_index = unit;
params.context_ptr = bld->context_ptr;
params.thread_data_ptr = bld->thread_data_ptr;
params.coords = coords;
params.offsets = offsets;
params.lod = lod;
params.texel = texel;
bld->sampler->emit_tex_sample(bld->sampler,
bld->bld_base.base.gallivm,
¶ms);
}
static void
emit_sample(struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
boolean compare,
enum lp_sampler_op_type sample_type,
LLVMValueRef *texel)
{
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
unsigned texture_unit, sampler_unit;
LLVMValueRef lod = NULL;
LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
struct lp_sampler_params params;
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
unsigned num_offsets, num_derivs, i;
unsigned layer_coord = 0;
unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
texel[i] = bld->bld_base.base.undef;
}
return;
}
/*
* unlike old-style tex opcodes the texture/sampler indices
* always come from src1 and src2 respectively.
*/
texture_unit = inst->Src[1].Register.Index;
sampler_unit = inst->Src[2].Register.Index;
/*
* Note inst->Texture.Texture will contain the number of offsets,
* however the target information is NOT there and comes from the
* declared sampler views instead.
*/
switch (bld->sv[texture_unit].Resource) {
case TGSI_TEXTURE_1D:
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_1D_ARRAY:
layer_coord = 1;
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_2D_ARRAY:
layer_coord = 2;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_CUBE:
num_offsets = 2;
num_derivs = 3;
break;
case TGSI_TEXTURE_3D:
num_offsets = 3;
num_derivs = 3;
break;
case TGSI_TEXTURE_CUBE_ARRAY:
layer_coord = 3;
num_offsets = 2;
num_derivs = 3;
break;
default:
assert(0);
return;
}
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
}
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
}
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
/* XXX might be better to explicitly pass the level zero information */
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
}
for (i = 0; i < num_derivs; i++) {
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
}
for (i = num_derivs; i < 5; i++) {
coords[i] = bld->bld_base.base.undef;
}
/* Layer coord always goes into 3rd slot, except for cube map arrays */
if (layer_coord) {
if (layer_coord == 3)
coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
else
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
}
/* Shadow coord occupies always 5th slot. */
if (compare) {
sample_key |= LP_SAMPLER_SHADOW;
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
unsigned dim;
sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
for (dim = 0; dim < num_derivs; ++dim) {
derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
}
params.derivs = &derivs;
/*
* could also check all src regs if constant but I doubt such
* cases exist in practice.
*/
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
lod_property = LP_SAMPLER_LOD_PER_QUAD;
}
}
else {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
}
/* some advanced gather instructions (txgo) would require 4 offsets */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < num_offsets; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
params.type = bld->bld_base.base.type;
params.sample_key = sample_key;
params.texture_index = texture_unit;
params.sampler_index = sampler_unit;
params.context_ptr = bld->context_ptr;
params.thread_data_ptr = bld->thread_data_ptr;
params.coords = coords;
params.offsets = offsets;
params.lod = lod;
params.texel = texel;
bld->sampler->emit_tex_sample(bld->sampler,
bld->bld_base.base.gallivm,
¶ms);
if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
swizzles[2] = inst->Src[1].Register.SwizzleZ;
swizzles[3] = inst->Src[1].Register.SwizzleW;
lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
}
}
static void
emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
LLVMValueRef *texel,
boolean is_samplei)
{
unsigned unit, target;
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
LLVMValueRef explicit_lod = NULL;
LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
struct lp_sampler_params params;
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
unsigned dims, i;
unsigned layer_coord = 0;
unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
texel[i] = coord_undef;
}
return;
}
unit = inst->Src[1].Register.Index;
if (is_samplei) {
target = bld->sv[unit].Resource;
}
else {
target = inst->Texture.Texture;
}
switch (target) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_BUFFER:
dims = 1;
break;
case TGSI_TEXTURE_1D_ARRAY:
layer_coord = 1;
dims = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_2D_MSAA:
dims = 2;
break;
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
layer_coord = 2;
dims = 2;
break;
case TGSI_TEXTURE_3D:
dims = 3;
break;
default:
assert(0);
return;
}
/* always have lod except for buffers and msaa targets ? */
if (target != TGSI_TEXTURE_BUFFER &&
target != TGSI_TEXTURE_2D_MSAA &&
target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
/*
* XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
* would be the sample index.
*/
for (i = 0; i < dims; i++) {
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
}
/* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
for (i = dims; i < 5; i++) {
coords[i] = coord_undef;
}
if (layer_coord)
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < dims; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
params.type = bld->bld_base.base.type;
params.sample_key = sample_key;
params.texture_index = unit;
/*
* sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
* and trigger some assertions with d3d10 where the sampler view number
* can exceed this.
*/
params.sampler_index = 0;
params.context_ptr = bld->context_ptr;
params.thread_data_ptr = bld->thread_data_ptr;
params.coords = coords;
params.offsets = offsets;
params.derivs = NULL;
params.lod = explicit_lod;
params.texel = texel;
bld->sampler->emit_tex_sample(bld->sampler,
bld->bld_base.base.gallivm,
¶ms);
if (is_samplei &&
(inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
swizzles[2] = inst->Src[1].Register.SwizzleZ;
swizzles[3] = inst->Src[1].Register.SwizzleW;
lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
}
}
static void
emit_size_query( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
LLVMValueRef *sizes_out,
boolean is_sviewinfo)
{
LLVMValueRef explicit_lod;
enum lp_sampler_lod_property lod_property;
unsigned has_lod;
unsigned i;
unsigned unit = inst->Src[1].Register.Index;
unsigned target, pipe_target;
struct lp_sampler_size_query_params params;
if (is_sviewinfo) {
target = bld->sv[unit].Resource;
}
else {
target = inst->Texture.Texture;
}
switch (target) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
has_lod = 0;
break;
default:
has_lod = 1;
break;
}
if (!bld->sampler) {
_debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++)
sizes_out[i] = bld->bld_base.int_bld.undef;
return;
}
if (has_lod) {
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
else {
explicit_lod = NULL;
lod_property = LP_SAMPLER_LOD_SCALAR;
}
pipe_target = tgsi_to_pipe_tex_target(target);
params.int_type = bld->bld_base.int_bld.type;
params.texture_unit = unit;
params.target = pipe_target;
params.context_ptr = bld->context_ptr;
params.is_sviewinfo = TRUE;
params.lod_property = lod_property;
params.explicit_lod = explicit_lod;
params.sizes_out = sizes_out;
bld->sampler->emit_size_query(bld->sampler,
bld->bld_base.base.gallivm,
¶ms);
}
static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
int pc)
{
unsigned i;
for (i = 0; i < 5; i++) {
unsigned opcode;
if (pc + i >= bld->bld_base.info->num_instructions)
return TRUE;
opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
if (opcode == TGSI_OPCODE_END)
return TRUE;
if (opcode == TGSI_OPCODE_TEX ||
opcode == TGSI_OPCODE_TXP ||
opcode == TGSI_OPCODE_TXD ||
opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXL ||
opcode == TGSI_OPCODE_TXF ||
opcode == TGSI_OPCODE_TXQ ||
opcode == TGSI_OPCODE_TEX2 ||
opcode == TGSI_OPCODE_TXB2 ||
opcode == TGSI_OPCODE_TXL2 ||
opcode == TGSI_OPCODE_SAMPLE ||
opcode == TGSI_OPCODE_SAMPLE_B ||
opcode == TGSI_OPCODE_SAMPLE_C ||
opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
opcode == TGSI_OPCODE_SAMPLE_D ||
opcode == TGSI_OPCODE_SAMPLE_I ||
opcode == TGSI_OPCODE_SAMPLE_I_MS ||
opcode == TGSI_OPCODE_SAMPLE_L ||
opcode == TGSI_OPCODE_SVIEWINFO ||
opcode == TGSI_OPCODE_CAL ||
opcode == TGSI_OPCODE_IF ||
opcode == TGSI_OPCODE_UIF ||
opcode == TGSI_OPCODE_BGNLOOP ||
opcode == TGSI_OPCODE_SWITCH)
return FALSE;
}
return TRUE;
}
/**
* Kill fragment if any of the src register values are negative.
*/
static void
emit_kill_if(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
int pc)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[TGSI_NUM_CHANNELS];
LLVMValueRef mask;
unsigned chan_index;
memset(&terms, 0, sizeof terms);
TGSI_FOR_EACH_CHANNEL( chan_index ) {
unsigned swizzle;
/* Unswizzle channel */
swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
/* Check if the component has not been already tested. */
assert(swizzle < TGSI_NUM_CHANNELS);
if( !terms[swizzle] )
/* TODO: change the comparison operator instead of setting the sign */
terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
}
mask = NULL;
TGSI_FOR_EACH_CHANNEL( chan_index ) {
if(terms[chan_index]) {
LLVMValueRef chan_mask;
/*
* If term < 0 then mask = 0 else mask = ~0.
*/
chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
if(mask)
mask = LLVMBuildAnd(builder, mask, chan_mask, "");
else
mask = chan_mask;
}
}
if (bld->exec_mask.has_mask) {
LLVMValueRef invmask;
invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
mask = LLVMBuildOr(builder, mask, invmask, "");
}
lp_build_mask_update(bld->mask, mask);
if (!near_end_of_shader(bld, pc))
lp_build_mask_check(bld->mask);
}
/**
* Unconditional fragment kill.
* The only predication is the execution mask which will apply if
* we're inside a loop or conditional.
*/
static void
emit_kill(struct lp_build_tgsi_soa_context *bld,
int pc)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef mask;
/* For those channels which are "alive", disable fragment shader
* execution.
*/
if (bld->exec_mask.has_mask) {
mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
}
else {
LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
mask = zero;
}
lp_build_mask_update(bld->mask, mask);
if (!near_end_of_shader(bld, pc))
lp_build_mask_check(bld->mask);
}
/**
* Emit code which will dump the value of all the temporary registers
* to stdout.
*/
static void
emit_dump_file(struct lp_build_tgsi_soa_context *bld,
unsigned file)
{
const struct tgsi_shader_info *info = bld->bld_base.info;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef reg_ptr;
int index;
int max_index = info->file_max[file];
/*
* Some register files, particularly constants, can be very large,
* and dumping everything could make this unusably slow.
*/
max_index = MIN2(max_index, 32);
for (index = 0; index <= max_index; index++) {
LLVMValueRef res;
unsigned mask;
int chan;
if (index < 8 * sizeof(unsigned) &&
(info->file_mask[file] & (1u << index)) == 0) {
/* This was not declared.*/
continue;
}
if (file == TGSI_FILE_INPUT) {
mask = info->input_usage_mask[index];
} else {
mask = TGSI_WRITEMASK_XYZW;
}
for (chan = 0; chan < 4; chan++) {
if ((mask & (1 << chan)) == 0) {
/* This channel is not used.*/
continue;
}
if (file == TGSI_FILE_CONSTANT) {
struct tgsi_full_src_register reg;
memset(®, 0, sizeof reg);
reg.Register.File = file;
reg.Register.Index = index;
reg.Register.SwizzleX = 0;
reg.Register.SwizzleY = 1;
reg.Register.SwizzleZ = 2;
reg.Register.SwizzleW = 3;
res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
if (!res) {
continue;
}
} else if (file == TGSI_FILE_INPUT) {
res = bld->inputs[index][chan];
if (!res) {
continue;
}
} else if (file == TGSI_FILE_TEMPORARY) {
reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
assert(reg_ptr);
res = LLVMBuildLoad(builder, reg_ptr, "");
} else if (file == TGSI_FILE_OUTPUT) {
reg_ptr = lp_get_output_ptr(bld, index, chan);
assert(reg_ptr);
res = LLVMBuildLoad(builder, reg_ptr, "");
} else {
assert(0);
continue;
}
emit_dump_reg(gallivm, file, index, chan, res);
}
}
}
void
lp_emit_declaration_soa(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_declaration *decl)
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
const unsigned first = decl->Range.First;
const unsigned last = decl->Range.Last;
unsigned idx, i;
assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
assert(last < LP_MAX_INLINED_TEMPS);
for (idx = first; idx <= last; ++idx) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
}
}
break;
case TGSI_FILE_OUTPUT:
if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
for (idx = first; idx <= last; ++idx) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->outputs[idx][i] = lp_build_alloca(gallivm,
vec_type, "output");
}
}
break;
case TGSI_FILE_ADDRESS:
/* ADDR registers are only allocated with an integer LLVM IR type,
* as they are guaranteed to always have integers.
* XXX: Not sure if this exception is worthwhile (or the whole idea of
* an ADDR register for that matter).
*/
assert(last < LP_MAX_TGSI_ADDRS);
for (idx = first; idx <= last; ++idx) {
assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
}
break;
case TGSI_FILE_SAMPLER_VIEW:
/*
* The target stored here MUST match whatever there actually
* is in the set sampler views (what about return type?).
*/
assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
for (idx = first; idx <= last; ++idx) {
bld->sv[idx] = decl->SamplerView;
}
break;
case TGSI_FILE_CONSTANT:
{
/*
* We could trivially fetch the per-buffer pointer when fetching the
* constant, relying on llvm to figure out it's always the same pointer
* anyway. However, doing so results in a huge (more than factor of 10)
* slowdown in llvm compilation times for some (but not all) shaders
* (more specifically, the IR optimization spends way more time in
* DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
*/
unsigned idx2D = decl->Dim.Index2D;
LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
bld->consts[idx2D] =
lp_build_array_get(gallivm, bld->consts_ptr, index2D);
bld->consts_sizes[idx2D] =
lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
}
break;
default:
/* don't need to declare other vars */
break;
}
}
void lp_emit_immediate_soa(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_immediate *imm)
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMValueRef imms[4];
unsigned i;
const uint size = imm->Immediate.NrTokens - 1;
assert(size <= 4);
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
for( i = 0; i < size; ++i )
imms[i] =
lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
break;
case TGSI_IMM_FLOAT64:
case TGSI_IMM_UINT64:
case TGSI_IMM_INT64:
case TGSI_IMM_UINT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
case TGSI_IMM_INT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
}
for( i = size; i < 4; ++i )
imms[i] = bld_base->base.undef;
if (bld->use_immediates_array) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
for (i = 0; i < 4; ++i ) {
LLVMValueRef lindex = lp_build_const_int32(
bld->bld_base.base.gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
LLVMBuildStore(builder, imms[i], imm_ptr);
}
} else {
/* simply copy the immediate values into the next immediates[] slot */
unsigned i;
assert(imm->Immediate.NrTokens - 1 <= 4);
assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
for(i = 0; i < 4; ++i )
bld->immediates[bld->num_immediates][i] = imms[i];
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
for (i = 0; i < 4; ++i ) {
LLVMValueRef lindex = lp_build_const_int32(
bld->bld_base.base.gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
LLVMBuildStore(builder,
bld->immediates[index][i],
imm_ptr);
}
}
}
bld->num_immediates++;
}
static void
ddx_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_fetch_deriv(bld, emit_data->args[0], NULL,
&emit_data->output[emit_data->chan], NULL);
}
static void
ddy_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
&emit_data->output[emit_data->chan]);
}
static void
kill_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_kill(bld, bld_base->pc - 1);
}
static void
kill_if_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
}
static void
tex_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
tex2_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
txb_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
txb2_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
txd_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
}
static void
txl_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
txl2_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
txp_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
tg4_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output, 2, LP_SAMPLER_OP_GATHER);
}
static void
lodq_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output, 1, LP_SAMPLER_OP_LODQ);
}
static void
txq_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
}
static void
txf_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
}
static void
sample_i_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
}
static void
sample_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
sample_b_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
sample_c_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
sample_c_lz_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
sample_d_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
sample_l_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
gather4_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
}
static void
sviewinfo_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
}
static void
lod_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
}
static LLVMValueRef
mask_vec(struct lp_build_tgsi_context *bld_base)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_exec_mask *exec_mask = &bld->exec_mask;
if (!exec_mask->has_mask) {
return lp_build_mask_value(bld->mask);
}
return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
exec_mask->exec_mask, "");
}
static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
LLVMValueRef ptr,
LLVMValueRef mask)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
current_vec = LLVMBuildSub(builder, current_vec, mask, "");
LLVMBuildStore(builder, current_vec, ptr);
}
static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
LLVMValueRef ptr,
LLVMValueRef mask)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
current_vec = lp_build_select(&bld_base->uint_bld,
mask,
bld_base->uint_bld.zero,
current_vec);
LLVMBuildStore(builder, current_vec, ptr);
}
static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
LLVMValueRef current_mask_vec,
LLVMValueRef total_emitted_vertices_vec)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *int_bld = &bld->bld_base.int_bld;
LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
total_emitted_vertices_vec,
bld->max_output_vertices_vec);
return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
}
static void
emit_vertex(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->emit_vertex) {
LLVMValueRef mask = mask_vec(bld_base);
LLVMValueRef total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
mask = clamp_mask_to_max_output_vertices(bld, mask,
total_emitted_vertices_vec);
gather_outputs(bld);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
bld->outputs,
total_emitted_vertices_vec);
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
mask);
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
mask);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ emit vertex masked ones = ",
mask);
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ emit vertex emitted = ",
total_emitted_vertices_vec);
#endif
}
}
static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,
LLVMValueRef mask)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->end_primitive) {
struct lp_build_context *uint_bld = &bld_base->uint_bld;
LLVMValueRef emitted_vertices_vec =
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
LLVMValueRef emitted_prims_vec =
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
emitted_vertices_vec,
uint_bld->zero);
/* We need to combine the current execution mask with the mask
telling us which, if any, execution slots actually have
unemitted primitives, this way we make sure that end_primitives
executes only on the paths that have unflushed vertices */
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
emitted_vertices_vec,
emitted_prims_vec);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim masked ones = ",
mask);
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim emitted verts1 = ",
emitted_vertices_vec);
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim emitted prims1 = ",
LLVMBuildLoad(builder,
bld->emitted_prims_vec_ptr, ""));
#endif
increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
mask);
clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
mask);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim emitted verts2 = ",
LLVMBuildLoad(builder,
bld->emitted_vertices_vec_ptr, ""));
#endif
}
}
static void
end_primitive(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
if (bld->gs_iface->end_primitive) {
LLVMValueRef mask = mask_vec(bld_base);
end_primitive_masked(bld_base, mask);
}
}
static void
cal_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
&bld_base->pc);
}
static void
ret_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
}
static void
brk_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_break(&bld->exec_mask, bld_base);
}
static void
if_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMValueRef tmp;
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
emit_data->args[0], bld->bld_base.base.zero);
lp_exec_mask_cond_push(&bld->exec_mask, tmp);
}
static void
uif_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMValueRef tmp;
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct lp_build_context *uint_bld = &bld_base->uint_bld;
tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
emit_data->args[0], uint_bld->zero);
lp_exec_mask_cond_push(&bld->exec_mask, tmp);
}
static void
case_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_case(&bld->exec_mask, emit_data->args[0]);
}
static void
default_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_default(&bld->exec_mask, bld_base);
}
static void
switch_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
}
static void
endswitch_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_endswitch(&bld->exec_mask, bld_base);
}
static void
bgnloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_bgnloop(&bld->exec_mask);
}
static void
bgnsub_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_bgnsub(&bld->exec_mask);
}
static void
else_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_cond_invert(&bld->exec_mask);
}
static void
endif_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_cond_pop(&bld->exec_mask);
}
static void
endloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
}
static void
endsub_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
}
static void
cont_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
lp_exec_continue(&bld->exec_mask);
}
static void emit_prologue(struct lp_build_tgsi_context * bld_base)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
LLVMValueRef array_size =
lp_build_const_int32(gallivm,
bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
bld->temps_array = lp_build_array_alloca(gallivm,
bld_base->base.vec_type, array_size,
"temp_array");
}
if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
LLVMValueRef array_size =
lp_build_const_int32(gallivm,
bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
bld->outputs_array = lp_build_array_alloca(gallivm,
bld_base->base.vec_type, array_size,
"output_array");
}
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
LLVMValueRef array_size =
lp_build_const_int32(gallivm,
bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
bld->imms_array = lp_build_array_alloca(gallivm,
bld_base->base.vec_type, array_size,
"imms_array");
}
/* If we have indirect addressing in inputs we need to copy them into
* our alloca array to be able to iterate over them */
if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
unsigned index, chan;
LLVMTypeRef vec_type = bld_base->base.vec_type;
LLVMValueRef array_size = lp_build_const_int32(gallivm,
bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
bld->inputs_array = lp_build_array_alloca(gallivm,
vec_type, array_size,
"input_array");
assert(bld_base->info->num_inputs
<= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
for (index = 0; index < bld_base->info->num_inputs; ++index) {
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
LLVMValueRef lindex =
lp_build_const_int32(gallivm, index * 4 + chan);
LLVMValueRef input_ptr =
LLVMBuildGEP(gallivm->builder, bld->inputs_array,
&lindex, 1, "");
LLVMValueRef value = bld->inputs[index][chan];
if (value)
LLVMBuildStore(gallivm->builder, value, input_ptr);
}
}
}
if (bld->gs_iface) {
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
bld->emitted_prims_vec_ptr =
lp_build_alloca(gallivm,
uint_bld->vec_type,
"emitted_prims_ptr");
bld->emitted_vertices_vec_ptr =
lp_build_alloca(gallivm,
uint_bld->vec_type,
"emitted_vertices_ptr");
bld->total_emitted_vertices_vec_ptr =
lp_build_alloca(gallivm,
uint_bld->vec_type,
"total_emitted_vertices_ptr");
LLVMBuildStore(gallivm->builder, uint_bld->zero,
bld->emitted_prims_vec_ptr);
LLVMBuildStore(gallivm->builder, uint_bld->zero,
bld->emitted_vertices_vec_ptr);
LLVMBuildStore(gallivm->builder, uint_bld->zero,
bld->total_emitted_vertices_vec_ptr);
}
if (DEBUG_EXECUTION) {
lp_build_printf(gallivm, "\n");
emit_dump_file(bld, TGSI_FILE_CONSTANT);
if (!bld->gs_iface)
emit_dump_file(bld, TGSI_FILE_INPUT);
}
}
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
if (DEBUG_EXECUTION) {
/* for debugging */
if (0) {
emit_dump_file(bld, TGSI_FILE_TEMPORARY);
}
emit_dump_file(bld, TGSI_FILE_OUTPUT);
lp_build_printf(bld_base->base.gallivm, "\n");
}
/* If we have indirect addressing in outputs we need to copy our alloca array
* to the outputs slots specified by the caller */
if (bld->gs_iface) {
LLVMValueRef total_emitted_vertices_vec;
LLVMValueRef emitted_prims_vec;
/* implicit end_primitives, needed in case there are any unflushed
vertices in the cache. Note must not call end_primitive here
since the exec_mask is not valid at this point. */
end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
emitted_prims_vec =
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
bld->gs_iface->gs_epilogue(bld->gs_iface,
&bld->bld_base,
total_emitted_vertices_vec,
emitted_prims_vec);
} else {
gather_outputs(bld);
}
}
void
lp_build_tgsi_soa(struct gallivm_state *gallivm,
const struct tgsi_token *tokens,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
LLVMValueRef const_sizes_ptr,
const struct lp_bld_tgsi_system_values *system_values,
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
LLVMValueRef context_ptr,
LLVMValueRef thread_data_ptr,
struct lp_build_sampler_soa *sampler,
const struct tgsi_shader_info *info,
const struct lp_build_tgsi_gs_iface *gs_iface)
{
struct lp_build_tgsi_soa_context bld;
struct lp_type res_type;
assert(type.length <= LP_MAX_VECTOR_LENGTH);
memset(&res_type, 0, sizeof res_type);
res_type.width = type.width;
res_type.length = type.length;
res_type.sign = 1;
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.bld_base.base, gallivm, type);
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
{
struct lp_type dbl_type;
dbl_type = type;
dbl_type.width *= 2;
lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
}
{
struct lp_type uint64_type;
uint64_type = lp_uint_type(type);
uint64_type.width *= 2;
lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
}
{
struct lp_type int64_type;
int64_type = lp_int_type(type);
int64_type.width *= 2;
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
}
bld.mask = mask;
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.const_sizes_ptr = const_sizes_ptr;
bld.sampler = sampler;
bld.bld_base.info = info;
bld.indirect_files = info->indirect_files;
bld.context_ptr = context_ptr;
bld.thread_data_ptr = thread_data_ptr;
/*
* If the number of temporaries is rather large then we just
* allocate them as an array right from the start and treat
* like indirect temporaries.
*/
if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
}
/*
* For performance reason immediates are always backed in a static
* array, but if their number is too great, we have to use just
* a dynamically allocated array.
*/
bld.use_immediates_array =
(info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
if (bld.use_immediates_array) {
bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
}
bld.bld_base.soa = TRUE;
bld.bld_base.emit_debug = emit_debug;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
bld.bld_base.emit_store = emit_store;
bld.bld_base.emit_declaration = lp_emit_declaration_soa;
bld.bld_base.emit_immediate = lp_emit_immediate_soa;
bld.bld_base.emit_prologue = emit_prologue;
bld.bld_base.emit_epilogue = emit_epilogue;
/* Set opcode actions */
lp_set_default_actions_cpu(&bld.bld_base);
bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
/* DX10 sampling ops */
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
if (gs_iface) {
/* There's no specific value for this because it should always
* be set, but apps using ext_geometry_shader4 quite often
* were forgetting so we're using MAX_VERTEX_VARYING from
* that spec even though we could debug_assert if it's not
* set, but that's a lot uglier. */
uint max_output_vertices;
/* inputs are always indirect with gs */
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
bld.gs_iface = gs_iface;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
max_output_vertices =
info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
if (!max_output_vertices)
max_output_vertices = 32;
bld.max_output_vertices_vec =
lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
max_output_vertices);
}
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
bld.system_values = *system_values;
lp_build_tgsi_llvm(&bld.bld_base, tokens);
if (0) {
LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
debug_printf("11111111111111111111111111111 \n");
tgsi_dump(tokens, 0);
lp_debug_dump_value(function);
debug_printf("2222222222222222222222222222 \n");
}
if (0) {
LLVMModuleRef module = LLVMGetGlobalParent(
LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
LLVMDumpModule(module);
}
lp_exec_mask_fini(&bld.exec_mask);
}