/*
* Copyright (C) 2009-2010 Francisco Jerez.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <stdbool.h>
#include "main/state.h"
#include "nouveau_driver.h"
#include "nouveau_context.h"
#include "nouveau_fbo.h"
#include "nouveau_util.h"
#include "nv_object.xml.h"
#include "nv10_3d.xml.h"
#include "nv04_driver.h"
#include "nv10_driver.h"
static GLboolean
use_fast_zclear(struct gl_context *ctx, GLbitfield buffers)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
if (buffers & BUFFER_BIT_STENCIL) {
/*
* The stencil test is bypassed when fast Z clears are
* enabled.
*/
nctx->hierz.clear_blocked = GL_TRUE;
context_dirty(ctx, ZCLEAR);
return GL_FALSE;
}
return !nctx->hierz.clear_blocked &&
fb->_Xmax == fb->Width && fb->_Xmin == 0 &&
fb->_Ymax == fb->Height && fb->_Ymin == 0;
}
GLboolean
nv10_use_viewport_zclear(struct gl_context *ctx)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct gl_renderbuffer *depthRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
return context_chipset(ctx) < 0x17 &&
!nctx->hierz.clear_blocked && depthRb &&
(_mesa_get_format_bits(depthRb->Format,
GL_DEPTH_BITS) >= 24);
}
float
nv10_transform_depth(struct gl_context *ctx, float z)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
if (nv10_use_viewport_zclear(ctx))
return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
else
return ctx->DrawBuffer->_DepthMaxF * z;
}
static void
nv10_zclear(struct gl_context *ctx, GLbitfield *buffers)
{
/*
* Pre-nv17 cards don't have native support for fast Z clears,
* but in some cases we can still "clear" the Z buffer without
* actually blitting to it if we're willing to sacrifice a few
* bits of depth precision.
*
* Each time a clear is requested we modify the viewport
* transform in such a way that the old contents of the depth
* buffer are clamped to the requested clear value when
* they're read by the GPU.
*/
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
struct nouveau_surface *s = &to_nouveau_renderbuffer(
fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
if (nv10_use_viewport_zclear(ctx)) {
int x, y, w, h;
float z = ctx->Depth.Clear;
uint32_t value = pack_zs_f(s->format, z, 0);
get_scissors(fb, &x, &y, &w, &h);
*buffers &= ~BUFFER_BIT_DEPTH;
if (use_fast_zclear(ctx, *buffers)) {
if (nfb->hierz.clear_value != value) {
/* Don't fast clear if we're changing
* the depth value. */
nfb->hierz.clear_value = value;
} else if (z == 0.0) {
nctx->hierz.clear_seq++;
context_dirty(ctx, ZCLEAR);
if ((nctx->hierz.clear_seq & 7) != 0 &&
nctx->hierz.clear_seq != 1)
/* We didn't wrap around -- no need to
* clear the depth buffer for real. */
return;
} else if (z == 1.0) {
nctx->hierz.clear_seq--;
context_dirty(ctx, ZCLEAR);
if ((nctx->hierz.clear_seq & 7) != 7)
/* No wrap around */
return;
}
}
value = pack_zs_f(s->format,
(z + (nctx->hierz.clear_seq & 7)) / 8, 0);
context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
}
}
static void
nv17_zclear(struct gl_context *ctx, GLbitfield *buffers)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct nouveau_pushbuf *push = context_push(ctx);
struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
ctx->DrawBuffer);
struct nouveau_surface *s = &to_nouveau_renderbuffer(
nfb->base.Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
/* Clear the hierarchical depth buffer */
BEGIN_NV04(push, NV17_3D(HIERZ_FILL_VALUE), 1);
PUSH_DATA (push, pack_zs_f(s->format, ctx->Depth.Clear, 0));
BEGIN_NV04(push, NV17_3D(HIERZ_BUFFER_CLEAR), 1);
PUSH_DATA (push, 1);
/* Mark the depth buffer as cleared */
if (use_fast_zclear(ctx, *buffers)) {
if (nctx->hierz.clear_seq)
*buffers &= ~BUFFER_BIT_DEPTH;
nfb->hierz.clear_value =
pack_zs_f(s->format, ctx->Depth.Clear, 0);
nctx->hierz.clear_seq++;
context_dirty(ctx, ZCLEAR);
}
}
static void
nv10_clear(struct gl_context *ctx, GLbitfield buffers)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct nouveau_pushbuf *push = context_push(ctx);
nouveau_validate_framebuffer(ctx);
nouveau_pushbuf_bufctx(push, nctx->hw.bufctx);
if (nouveau_pushbuf_validate(push)) {
nouveau_pushbuf_bufctx(push, NULL);
return;
}
if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
if (context_chipset(ctx) >= 0x17)
nv17_zclear(ctx, &buffers);
else
nv10_zclear(ctx, &buffers);
/* Emit the zclear state if it's dirty */
_mesa_update_state(ctx);
}
nouveau_pushbuf_bufctx(push, NULL);
nouveau_clear(ctx, buffers);
}
static void
nv10_hwctx_init(struct gl_context *ctx)
{
struct nouveau_pushbuf *push = context_push(ctx);
struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
struct nv04_fifo *fifo = hw->chan->data;
int i;
BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
PUSH_DATA (push, hw->eng3d->handle);
BEGIN_NV04(push, NV10_3D(DMA_NOTIFY), 1);
PUSH_DATA (push, hw->ntfy->handle);
BEGIN_NV04(push, NV10_3D(DMA_TEXTURE0), 3);
PUSH_DATA (push, fifo->vram);
PUSH_DATA (push, fifo->gart);
PUSH_DATA (push, fifo->gart);
BEGIN_NV04(push, NV10_3D(DMA_COLOR), 2);
PUSH_DATA (push, fifo->vram);
PUSH_DATA (push, fifo->vram);
BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(RT_HORIZ), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_HORIZ(0)), 1);
PUSH_DATA (push, 0x7ff << 16 | 0x800);
BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_VERT(0)), 1);
PUSH_DATA (push, 0x7ff << 16 | 0x800);
for (i = 1; i < 8; i++) {
BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_HORIZ(i)), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_VERT(i)), 1);
PUSH_DATA (push, 0);
}
BEGIN_NV04(push, SUBC_3D(0x290), 1);
PUSH_DATA (push, 0x10 << 16 | 1);
BEGIN_NV04(push, SUBC_3D(0x3f4), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
PUSH_DATA (push, 0);
if (context_chipset(ctx) >= 0x17) {
BEGIN_NV04(push, NV17_3D(UNK01AC), 2);
PUSH_DATA (push, fifo->vram);
PUSH_DATA (push, fifo->vram);
BEGIN_NV04(push, SUBC_3D(0xd84), 1);
PUSH_DATA (push, 0x3);
BEGIN_NV04(push, NV17_3D(COLOR_MASK_ENABLE), 1);
PUSH_DATA (push, 1);
}
if (context_chipset(ctx) >= 0x11) {
BEGIN_NV04(push, SUBC_3D(0x120), 3);
PUSH_DATA (push, 0);
PUSH_DATA (push, 1);
PUSH_DATA (push, 2);
BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
PUSH_DATA (push, 0);
}
BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
PUSH_DATA (push, 0);
/* Set state */
BEGIN_NV04(push, NV10_3D(FOG_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_FUNC), 2);
PUSH_DATA (push, 0x207);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(TEX_ENABLE(0)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(BLEND_FUNC_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(DITHER_ENABLE), 2);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(LINE_SMOOTH_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(VERTEX_WEIGHT_ENABLE), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(BLEND_FUNC_SRC), 4);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0x8006);
BEGIN_NV04(push, NV10_3D(STENCIL_MASK), 8);
PUSH_DATA (push, 0xff);
PUSH_DATA (push, 0x207);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0xff);
PUSH_DATA (push, 0x1e00);
PUSH_DATA (push, 0x1e00);
PUSH_DATA (push, 0x1e00);
PUSH_DATA (push, 0x1d01);
BEGIN_NV04(push, NV10_3D(NORMALIZE_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(FOG_ENABLE), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(LIGHT_MODEL), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(SEPARATE_SPECULAR_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(ENABLED_LIGHTS), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_POINT_ENABLE), 3);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(DEPTH_FUNC), 1);
PUSH_DATA (push, 0x201);
BEGIN_NV04(push, NV10_3D(DEPTH_WRITE_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(DEPTH_TEST_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_FACTOR), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(POINT_SIZE), 1);
PUSH_DATA (push, 8);
BEGIN_NV04(push, NV10_3D(POINT_PARAMETERS_ENABLE), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(LINE_WIDTH), 1);
PUSH_DATA (push, 8);
BEGIN_NV04(push, NV10_3D(LINE_SMOOTH_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(POLYGON_MODE_FRONT), 2);
PUSH_DATA (push, 0x1b02);
PUSH_DATA (push, 0x1b02);
BEGIN_NV04(push, NV10_3D(CULL_FACE), 2);
PUSH_DATA (push, 0x405);
PUSH_DATA (push, 0x901);
BEGIN_NV04(push, NV10_3D(POLYGON_SMOOTH_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(CULL_FACE_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(TEX_GEN_MODE(0, 0)), 8);
for (i = 0; i < 8; i++)
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(TEX_MATRIX_ENABLE(0)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(FOG_COEFF(0)), 3);
PUSH_DATA (push, 0x3fc00000); /* -1.50 */
PUSH_DATA (push, 0xbdb8aa0a); /* -0.09 */
PUSH_DATA (push, 0); /* 0.00 */
BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(FOG_MODE), 2);
PUSH_DATA (push, 0x802);
PUSH_DATA (push, 2);
/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
* using texturing, except when using the texture matrix
*/
BEGIN_NV04(push, NV10_3D(VIEW_MATRIX_ENABLE), 1);
PUSH_DATA (push, 6);
BEGIN_NV04(push, NV10_3D(COLOR_MASK), 1);
PUSH_DATA (push, 0x01010101);
/* Set vertex component */
BEGIN_NV04(push, NV10_3D(VERTEX_COL_4F_R), 4);
PUSH_DATAf(push, 1.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 1.0);
BEGIN_NV04(push, NV10_3D(VERTEX_COL2_3F_R), 3);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV10_3D(VERTEX_NOR_3F_X), 3);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 1.0);
BEGIN_NV04(push, NV10_3D(VERTEX_TX0_4F_S), 4);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 1.0);
BEGIN_NV04(push, NV10_3D(VERTEX_TX1_4F_S), 4);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 1.0);
BEGIN_NV04(push, NV10_3D(VERTEX_FOG_1F), 1);
PUSH_DATAf(push, 0.0);
BEGIN_NV04(push, NV10_3D(EDGEFLAG_ENABLE), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV10_3D(DEPTH_RANGE_NEAR), 2);
PUSH_DATAf(push, 0.0);
PUSH_DATAf(push, 16777216.0);
PUSH_KICK (push);
}
static void
nv10_context_destroy(struct gl_context *ctx)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
nv04_surface_takedown(ctx);
nv10_swtnl_destroy(ctx);
nv10_vbo_destroy(ctx);
nouveau_object_del(&nctx->hw.eng3d);
nouveau_context_deinit(ctx);
FREE(ctx);
}
static struct gl_context *
nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visual,
struct gl_context *share_ctx)
{
struct nouveau_context *nctx;
struct gl_context *ctx;
unsigned celsius_class;
int ret;
nctx = CALLOC_STRUCT(nouveau_context);
if (!nctx)
return NULL;
ctx = &nctx->base;
if (!nouveau_context_init(ctx, screen, visual, share_ctx))
goto fail;
ctx->Extensions.ARB_texture_env_crossbar = true;
ctx->Extensions.ARB_texture_env_combine = true;
ctx->Extensions.ARB_texture_env_dot3 = true;
ctx->Extensions.NV_fog_distance = true;
ctx->Extensions.NV_texture_rectangle = true;
if (ctx->Mesa_DXTn) {
ctx->Extensions.EXT_texture_compression_s3tc = true;
ctx->Extensions.S3_s3tc = true;
}
/* GL constants. */
ctx->Const.MaxTextureLevels = 12;
ctx->Const.MaxTextureCoordUnits = NV10_TEXTURE_UNITS;
ctx->Const.MaxTextureImageUnits = NV10_TEXTURE_UNITS;
ctx->Const.MaxTextureUnits = NV10_TEXTURE_UNITS;
ctx->Const.MaxTextureMaxAnisotropy = 2;
ctx->Const.MaxTextureLodBias = 15;
ctx->Driver.Clear = nv10_clear;
/* 2D engine. */
ret = nv04_surface_init(ctx);
if (!ret)
goto fail;
/* 3D engine. */
if (context_chipset(ctx) >= 0x17)
celsius_class = NV17_3D_CLASS;
else if (context_chipset(ctx) >= 0x11)
celsius_class = NV15_3D_CLASS;
else
celsius_class = NV10_3D_CLASS;
ret = nouveau_object_new(context_chan(ctx), 0xbeef0001, celsius_class,
NULL, 0, &nctx->hw.eng3d);
if (ret)
goto fail;
nv10_hwctx_init(ctx);
nv10_vbo_init(ctx);
nv10_swtnl_init(ctx);
return ctx;
fail:
nv10_context_destroy(ctx);
return NULL;
}
const struct nouveau_driver nv10_driver = {
.context_create = nv10_context_create,
.context_destroy = nv10_context_destroy,
.surface_copy = nv04_surface_copy,
.surface_fill = nv04_surface_fill,
.emit = (nouveau_state_func[]) {
nv10_emit_alpha_func,
nv10_emit_blend_color,
nv10_emit_blend_equation,
nv10_emit_blend_func,
nv10_emit_clip_plane,
nv10_emit_clip_plane,
nv10_emit_clip_plane,
nv10_emit_clip_plane,
nv10_emit_clip_plane,
nv10_emit_clip_plane,
nv10_emit_color_mask,
nv10_emit_color_material,
nv10_emit_cull_face,
nv10_emit_front_face,
nv10_emit_depth,
nv10_emit_dither,
nv10_emit_frag,
nv10_emit_framebuffer,
nv10_emit_fog,
nv10_emit_light_enable,
nv10_emit_light_model,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_light_source,
nv10_emit_line_stipple,
nv10_emit_line_mode,
nv10_emit_logic_opcode,
nv10_emit_material_ambient,
nouveau_emit_nothing,
nv10_emit_material_diffuse,
nouveau_emit_nothing,
nv10_emit_material_specular,
nouveau_emit_nothing,
nv10_emit_material_shininess,
nouveau_emit_nothing,
nv10_emit_modelview,
nv10_emit_point_mode,
nv10_emit_point_parameter,
nv10_emit_polygon_mode,
nv10_emit_polygon_offset,
nv10_emit_polygon_stipple,
nv10_emit_projection,
nv10_emit_render_mode,
nv10_emit_scissor,
nv10_emit_shade_model,
nv10_emit_stencil_func,
nv10_emit_stencil_mask,
nv10_emit_stencil_op,
nv10_emit_tex_env,
nv10_emit_tex_env,
nouveau_emit_nothing,
nouveau_emit_nothing,
nv10_emit_tex_gen,
nv10_emit_tex_gen,
nouveau_emit_nothing,
nouveau_emit_nothing,
nv10_emit_tex_mat,
nv10_emit_tex_mat,
nouveau_emit_nothing,
nouveau_emit_nothing,
nv10_emit_tex_obj,
nv10_emit_tex_obj,
nouveau_emit_nothing,
nouveau_emit_nothing,
nv10_emit_viewport,
nv10_emit_zclear
},
.num_emit = NUM_NV10_STATE,
};