/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "main/glheader.h"
#include "main/context.h"
#include "main/imports.h"
#include "main/mtypes.h"
#include "t_context.h"
#include "t_pipeline.h"
#include "t_vp_build.h"
#include "t_vertex.h"
void _tnl_install_pipeline( struct gl_context *ctx,
const struct tnl_pipeline_stage **stages )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint i;
tnl->pipeline.new_state = ~0;
/* Create a writeable copy of each stage.
*/
for (i = 0 ; i < MAX_PIPELINE_STAGES && stages[i] ; i++) {
struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
memcpy(s, stages[i], sizeof(*s));
if (s->create)
s->create(ctx, s);
}
tnl->pipeline.nr_stages = i;
}
void _tnl_destroy_pipeline( struct gl_context *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint i;
for (i = 0 ; i < tnl->pipeline.nr_stages ; i++) {
struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
if (s->destroy)
s->destroy(s);
}
tnl->pipeline.nr_stages = 0;
}
static GLuint check_input_changes( struct gl_context *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint i;
for (i = 0; i <= _TNL_LAST_MAT; i++) {
if (tnl->vb.AttribPtr[i]->size != tnl->pipeline.last_attrib_size[i] ||
tnl->vb.AttribPtr[i]->stride != tnl->pipeline.last_attrib_stride[i]) {
tnl->pipeline.last_attrib_size[i] = tnl->vb.AttribPtr[i]->size;
tnl->pipeline.last_attrib_stride[i] = tnl->vb.AttribPtr[i]->stride;
tnl->pipeline.input_changes |= 1<<i;
}
}
return tnl->pipeline.input_changes;
}
static GLuint check_output_changes( struct gl_context *ctx )
{
#if 0
TNLcontext *tnl = TNL_CONTEXT(ctx);
for (i = 0; i < VARYING_SLOT_MAX; i++) {
if (tnl->vb.ResultPtr[i]->size != tnl->last_result_size[i] ||
tnl->vb.ResultPtr[i]->stride != tnl->last_result_stride[i]) {
tnl->last_result_size[i] = tnl->vb.ResultPtr[i]->size;
tnl->last_result_stride[i] = tnl->vb.ResultPtr[i]->stride;
tnl->pipeline.output_changes |= 1<<i;
}
}
if (tnl->pipeline.output_changes)
tnl->Driver.NotifyOutputChanges( ctx, tnl->pipeline.output_changes );
return tnl->pipeline.output_changes;
#else
return ~0;
#endif
}
/**
* START/END_FAST_MATH macros:
*
* START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
* original mode to a temporary).
* END_FAST_MATH: Restore x86 FPU to original mode.
*/
#if defined(__GNUC__) && defined(__i386__)
/*
* Set the x86 FPU control word to guarentee only 32 bits of precision
* are stored in registers. Allowing the FPU to store more introduces
* differences between situations where numbers are pulled out of memory
* vs. situations where the compiler is able to optimize register usage.
*
* In the worst case, we force the compiler to use a memory access to
* truncate the float, by specifying the 'volatile' keyword.
*/
/* Hardware default: All exceptions masked, extended double precision,
* round to nearest (IEEE compliant):
*/
#define DEFAULT_X86_FPU 0x037f
/* All exceptions masked, single precision, round to nearest:
*/
#define FAST_X86_FPU 0x003f
/* The fldcw instruction will cause any pending FP exceptions to be
* raised prior to entering the block, and we clear any pending
* exceptions before exiting the block. Hence, asm code has free
* reign over the FPU while in the fast math block.
*/
#if defined(NO_FAST_MATH)
#define START_FAST_MATH(x) \
do { \
static GLuint mask = DEFAULT_X86_FPU; \
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
__asm__ ( "fldcw %0" : : "m" (mask) ); \
} while (0)
#else
#define START_FAST_MATH(x) \
do { \
static GLuint mask = FAST_X86_FPU; \
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
__asm__ ( "fldcw %0" : : "m" (mask) ); \
} while (0)
#endif
/* Restore original FPU mode, and clear any exceptions that may have
* occurred in the FAST_MATH block.
*/
#define END_FAST_MATH(x) \
do { \
__asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \
} while (0)
#elif defined(_MSC_VER) && defined(_M_IX86)
#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
#define FAST_X86_FPU 0x003f /* See GCC comments above */
#if defined(NO_FAST_MATH)
#define START_FAST_MATH(x) do {\
static GLuint mask = DEFAULT_X86_FPU;\
__asm fnstcw word ptr [x]\
__asm fldcw word ptr [mask]\
} while(0)
#else
#define START_FAST_MATH(x) do {\
static GLuint mask = FAST_X86_FPU;\
__asm fnstcw word ptr [x]\
__asm fldcw word ptr [mask]\
} while(0)
#endif
#define END_FAST_MATH(x) do {\
__asm fnclex\
__asm fldcw word ptr [x]\
} while(0)
#else
#define START_FAST_MATH(x) x = 0
#define END_FAST_MATH(x) (void)(x)
#endif
void _tnl_run_pipeline( struct gl_context *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
unsigned short __tmp;
GLuint i;
if (!tnl->vb.Count)
return;
/* Check for changed input sizes or change in stride to/from zero
* (ie const or non-const).
*/
if (check_input_changes( ctx ) || tnl->pipeline.new_state) {
if (ctx->VertexProgram._MaintainTnlProgram)
_tnl_UpdateFixedFunctionProgram( ctx );
for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
if (s->validate)
s->validate( ctx, s );
}
tnl->pipeline.new_state = 0;
tnl->pipeline.input_changes = 0;
/* Pipeline can only change its output in response to either a
* statechange or an input size/stride change. No other changes
* are allowed.
*/
if (check_output_changes( ctx ))
_tnl_notify_pipeline_output_change( ctx );
}
#ifndef _OPENMP
/* Don't adjust FPU precision mode in case multiple threads are to be used.
* This would require that the additional threads also changed the FPU mode
* which is quite a mess as this had to be done in all parallelized sections;
* otherwise the master thread and all other threads are running in different
* modes, producing inconsistent results.
* Note that all x64 implementations don't define/use START_FAST_MATH, so
* this is "hack" is only used in i386 mode
*/
START_FAST_MATH(__tmp);
#endif
for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
if (!s->run( ctx, s ))
break;
}
#ifndef _OPENMP
END_FAST_MATH(__tmp);
#endif
}
/* The default pipeline. This is useful for software rasterizers, and
* simple hardware rasterizers. For customization, I don't recommend
* tampering with the internals of these stages in the way that
* drivers did in Mesa 3.4. These stages are basically black boxes,
* and should be left intact.
*
* To customize the pipeline, consider:
*
* - removing redundant stages (making sure that the software rasterizer
* can cope with this on fallback paths). An example is fog
* coordinate generation, which is not required in the FX driver.
*
* - replacing general-purpose machine-independent stages with
* general-purpose machine-specific stages. There is no example of
* this to date, though it must be borne in mind that all subsequent
* stages that reference the output of the new stage must cope with
* any machine-specific data introduced. This may not be easy
* unless there are no such stages (ie the new stage is the last in
* the pipe).
*
* - inserting optimized (but specialized) stages ahead of the
* general-purpose fallback implementation. For example, the old
* fastpath mechanism, which only works when the VB->Elts input is
* available, can be duplicated by placing the fastpath stage at the
* head of this pipeline. Such specialized stages are currently
* constrained to have no outputs (ie. they must either finish the *
* pipeline by returning GL_FALSE from run(), or do nothing).
*
* Some work can be done to lift some of the restrictions in the final
* case, if it becomes necessary to do so.
*/
const struct tnl_pipeline_stage *_tnl_default_pipeline[] = {
&_tnl_vertex_transform_stage,
&_tnl_normal_transform_stage,
&_tnl_lighting_stage,
&_tnl_texgen_stage,
&_tnl_texture_transform_stage,
&_tnl_point_attenuation_stage,
&_tnl_vertex_program_stage,
&_tnl_fog_coordinate_stage,
&_tnl_render_stage,
NULL
};
const struct tnl_pipeline_stage *_tnl_vp_pipeline[] = {
&_tnl_vertex_program_stage,
&_tnl_render_stage,
NULL
};