/*
 * Copyright © 2011 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "intel_batchbuffer.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"

static void emit_depthbuffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   struct gl_framebuffer *fb = ctx->DrawBuffer;

   /* _NEW_BUFFERS */
   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
   struct intel_mipmap_tree *depth_mt = NULL,
			    *stencil_mt = NULL,
			    *hiz_mt = NULL;

   /* Amount by which drawing should be offset in order to draw to the
    * appropriate miplevel/zoffset/cubeface.  We will extract these values
    * from depth_irb or stencil_irb once we determine which is present.
    */
   uint32_t draw_x = 0, draw_y = 0;

   /* Masks used to determine how much of the draw_x and draw_y offsets should
    * be performed using the fine adjustment of "depth coordinate offset X/Y"
    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
    * performed by changing the base addresses of the buffers.
    *
    * Since the HiZ, depth, and stencil buffers all use the same "depth
    * coordinate offset X/Y" values, we need to make sure that the coarse
    * adjustment will be possible to apply to all three buffers.  Since coarse
    * adjustment can only be applied in multiples of the tile size, we will OR
    * together the tile masks of all the buffers to determine which offsets to
    * perform as fine adjustments.
    */
   uint32_t tile_mask_x = 0, tile_mask_y = 0;

   if (drb)
      depth_mt = drb->mt;

   if (depth_mt) {
      hiz_mt = depth_mt->hiz_mt;

      intel_region_get_tile_masks(depth_mt->region,
                                  &tile_mask_x, &tile_mask_y, false);

      if (hiz_mt) {
         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
         intel_region_get_tile_masks(hiz_mt->region,
                                     &hiz_tile_mask_x, &hiz_tile_mask_y,
                                     false);

         /* Each HiZ row represents 2 rows of pixels */
         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;

         tile_mask_x |= hiz_tile_mask_x;
         tile_mask_y |= hiz_tile_mask_y;
      }
   }

   if (srb) {
      stencil_mt = srb->mt;
      if (stencil_mt->stencil_mt)
	 stencil_mt = stencil_mt->stencil_mt;

      assert(stencil_mt->format == MESA_FORMAT_S8);

      /* Stencil buffer uses 64x64 tiles. */
      tile_mask_x |= 63;
      tile_mask_y |= 63;
   }

   /* Gen7 doesn't support packed depth/stencil */
   assert(stencil_mt == NULL || depth_mt != stencil_mt);
   assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));

   intel_emit_depth_stall_flushes(intel);

   if (depth_mt == NULL) {
      uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
      uint32_t dw3 = 0;
      uint32_t tile_x = 0, tile_y = 0;

      if (stencil_mt == NULL) {
	 dw1 |= (BRW_SURFACE_NULL << 29);
      } else {
	 /* _NEW_STENCIL: enable stencil buffer writes */
	 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);

         draw_x = srb->draw_x;
         draw_y = srb->draw_y;
         tile_x = draw_x & tile_mask_x;
         tile_y = draw_y & tile_mask_y;

         /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
          * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
          * Coordinate Offset X/Y":
          *
          *   "The 3 LSBs of both offsets must be zero to ensure correct
          *   alignment"
          *
          * We have no guarantee that tile_x and tile_y are correctly aligned,
          * since they are determined by the mipmap layout, which is only
          * aligned to multiples of 4.
          *
          * So, to avoid hanging the GPU, just smash the low order 3 bits of
          * tile_x and tile_y to 0.  This is a temporary workaround until we
          * come up with a better solution.
          */
         tile_x &= ~7;
         tile_y &= ~7;

	 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
	 dw1 |= (BRW_SURFACE_2D << 29);
	 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
	       ((srb->Base.Base.Height + tile_y - 1) << 18);
      }

      BEGIN_BATCH(7);
      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      OUT_BATCH(dw1);
      OUT_BATCH(0);
      OUT_BATCH(dw3);
      OUT_BATCH(0);
      OUT_BATCH(tile_x | (tile_y << 16));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      struct intel_region *region = depth_mt->region;
      uint32_t tile_x, tile_y, offset;

      draw_x = drb->draw_x;
      draw_y = drb->draw_y;
      tile_x = draw_x & tile_mask_x;
      tile_y = draw_y & tile_mask_y;

      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
       * Coordinate Offset X/Y":
       *
       *   "The 3 LSBs of both offsets must be zero to ensure correct
       *   alignment"
       *
       * We have no guarantee that tile_x and tile_y are correctly aligned,
       * since they are determined by the mipmap layout, which is only aligned
       * to multiples of 4.
       *
       * So, to avoid hanging the GPU, just smash the low order 3 bits of
       * tile_x and tile_y to 0.  This is a temporary workaround until we come
       * up with a better solution.
       */
      tile_x &= ~7;
      tile_y &= ~7;

      offset = intel_region_get_aligned_offset(region,
                                               draw_x & ~tile_mask_x,
                                               draw_y & ~tile_mask_y,
                                               false);

      assert(region->tiling == I915_TILING_Y);

      /* _NEW_DEPTH, _NEW_STENCIL */
      BEGIN_BATCH(7);
      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      OUT_BATCH(((region->pitch * region->cpp) - 1) |
		(brw_depthbuffer_format(brw) << 18) |
		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
		((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
		((ctx->Depth.Mask != 0) << 28) |
		(BRW_SURFACE_2D << 29));
      OUT_RELOC(region->bo,
	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		offset);
      OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
                (((drb->Base.Base.Height + tile_y) - 1) << 18));
      OUT_BATCH(0);
      OUT_BATCH(tile_x | (tile_y << 16));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   if (hiz_mt == NULL) {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      uint32_t hiz_offset =
         intel_region_get_aligned_offset(hiz_mt->region,
                                         draw_x & ~tile_mask_x,
                                         (draw_y & ~tile_mask_y) / 2,
                                         false);
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
      OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
      OUT_RELOC(hiz_mt->region->bo,
                I915_GEM_DOMAIN_RENDER,
                I915_GEM_DOMAIN_RENDER,
                hiz_offset);
      ADVANCE_BATCH();
   }

   if (stencil_mt == NULL) {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;

      /* Note: We can't compute the stencil offset using
       * intel_region_get_aligned_offset(), because the stencil region claims
       * that the region is untiled; in fact it's W tiled.
       */
      uint32_t stencil_offset =
         (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
         (draw_x & ~tile_mask_x) * 64;

      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
      /* The stencil buffer has quirky pitch requirements.  From the Graphics
       * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
       * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
       * field "Surface Pitch":
       *
       *    The pitch must be set to 2x the value computed based on width, as
       *    the stencil buffer is stored with two rows interleaved.
       *
       * (Note that it is not 100% clear whether this intended to apply to
       * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
       * imply that it doesn't), however the comment appears on a "DevIVB+"
       * page (which would imply that it does).  Experiments with the hardware
       * indicate that it does.
       */
      OUT_BATCH(enabled |
	        (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
      OUT_RELOC(stencil_mt->region->bo,
	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		stencil_offset);
      ADVANCE_BATCH();
   }

   BEGIN_BATCH(3);
   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
   OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
   OUT_BATCH(1);
   ADVANCE_BATCH();
}

/**
 * \see brw_context.state.depth_region
 */
const struct brw_tracked_state gen7_depthbuffer = {
   .dirty = {
      .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL),
      .brw = BRW_NEW_BATCH,
      .cache = 0,
   },
   .emit = emit_depthbuffer,
};