/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_vec4_tes.cpp
*
* Tessellaton evaluation shader specific code derived from the vec4_visitor class.
*/
#include "brw_vec4_tes.h"
#include "brw_cfg.h"
namespace brw {
vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_tes_prog_key *key,
struct brw_tes_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
int shader_time_index)
: vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
shader, mem_ctx, false, shader_time_index)
{
}
dst_reg *
vec4_tes_visitor::make_reg_for_system_value(int location)
{
return NULL;
}
void
vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_tess_level_outer:
case nir_intrinsic_load_tess_level_inner:
break;
default:
vec4_visitor::nir_setup_system_value_intrinsic(instr);
}
}
void
vec4_tes_visitor::setup_payload()
{
int reg = 0;
/* The payload always contains important data in r0 and r1, which contains
* the URB handles that are passed on to the URB write at the end
* of the thread.
*/
reg += 2;
reg = setup_uniforms(reg);
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != ATTR)
continue;
bool is_64bit = type_sz(inst->src[i].type) == 8;
unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
grf.swizzle = inst->src[i].swizzle;
grf.type = inst->src[i].type;
grf.abs = inst->src[i].abs;
grf.negate = inst->src[i].negate;
/* For 64-bit attributes we can end up with components XY in the
* second half of a register and components ZW in the first half
* of the next. Fix it up here.
*/
if (is_64bit && grf.subnr > 0) {
/* We can't do swizzles that mix XY and ZW channels in this case.
* Such cases should have been handled by the scalarization pass.
*/
assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
(brw_mask_for_swizzle(grf.swizzle) & 0xc));
if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
grf.subnr = 0;
grf.nr++;
grf.swizzle -= BRW_SWIZZLE_ZZZZ;
}
}
inst->src[i] = grf;
}
}
reg += 8 * prog_data->urb_read_length;
this->first_non_payload_grf = reg;
}
void
vec4_tes_visitor::emit_prolog()
{
input_read_header = src_reg(this, glsl_type::uvec4_type);
emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
this->current_annotation = NULL;
}
void
vec4_tes_visitor::emit_urb_write_header(int mrf)
{
/* No need to do anything for DS; an implied write to this MRF will be
* performed by VS_OPCODE_URB_WRITE.
*/
(void) mrf;
}
vec4_instruction *
vec4_tes_visitor::emit_urb_write_opcode(bool complete)
{
/* For DS, the URB writes end the thread. */
if (complete) {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
}
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->urb_write_flags = complete ?
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
return inst;
}
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
const struct brw_tes_prog_data *tes_prog_data =
(const struct brw_tes_prog_data *) prog_data;
switch (instr->intrinsic) {
case nir_intrinsic_load_tess_coord:
/* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
src_reg(brw_vec8_grf(1, 0))));
break;
case nir_intrinsic_load_tess_level_outer:
if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
BRW_SWIZZLE_ZWZW)));
} else {
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
BRW_SWIZZLE_WZYX)));
}
break;
case nir_intrinsic_load_tess_level_inner:
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
BRW_SWIZZLE_WZYX)));
} else {
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
src_reg(ATTR, 1, glsl_type::float_type)));
}
break;
case nir_intrinsic_load_primitive_id:
emit(TES_OPCODE_GET_PRIMITIVE_ID,
get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
break;
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input: {
src_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
src_reg header = input_read_header;
bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
unsigned first_component = nir_intrinsic_component(instr);
if (is_64bit)
first_component /= 2;
if (indirect_offset.file != BAD_FILE) {
header = src_reg(this, glsl_type::uvec4_type);
emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
input_read_header, indirect_offset);
} else {
/* Arbitrarily only push up to 24 vec4 slots worth of data,
* which is 12 registers (since each holds 2 vec4 slots).
*/
const unsigned max_push_slots = 24;
if (imm_offset < max_push_slots) {
const glsl_type *src_glsl_type =
is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
const brw_reg_type dst_reg_type =
is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
prog_data->urb_read_length =
MAX2(prog_data->urb_read_length,
DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
break;
}
}
if (!is_64bit) {
dst_reg temp(this, glsl_type::ivec4_type);
vec4_instruction *read =
emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
read->offset = imm_offset;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
src_reg src = src_reg(temp);
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
/* Copy to target. We might end up with some funky writemasks landing
* in here, but we really don't want them in the above pseudo-ops.
*/
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
dst.writemask = brw_writemask_for_size(instr->num_components);
emit(MOV(dst, src));
} else {
/* For 64-bit we need to load twice as many 32-bit components, and for
* dvec3/4 we need to emit 2 URB Read messages
*/
dst_reg temp(this, glsl_type::dvec4_type);
dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
vec4_instruction *read =
emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
read->offset = imm_offset;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
if (instr->num_components > 2) {
read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
src_reg(header));
read->offset = imm_offset + 1;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
}
src_reg temp_as_src = src_reg(temp);
temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
dst_reg shuffled(this, glsl_type::dvec4_type);
shuffle_64bit_data(shuffled, temp_as_src, false);
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
dst.writemask = brw_writemask_for_size(instr->num_components);
emit(MOV(dst, src_reg(shuffled)));
}
break;
}
default:
vec4_visitor::nir_emit_intrinsic(instr);
}
}
void
vec4_tes_visitor::emit_thread_end()
{
/* For DS, we always end the thread by emitting a single vertex.
* emit_urb_write_opcode() will take care of setting the eot flag on the
* SEND instruction.
*/
emit_vertex();
}
} /* namespace brw */