/******************************************************************************
*
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/**
*******************************************************************************
* @file
* ihevc_sao.c
*
* @brief
* Contains leaf level function definitions for sample adaptive offset process
*
* @author
* Srinivas T
*
* @par List of Functions:
* - ihevc_sao_band_offset_luma()
* - ihevc_sao_band_offset_chroma()
* - ihevc_sao_edge_offset_class0()
* - ihevc_sao_edge_offset_class0_chroma()
* - ihevc_sao_edge_offset_class1()
* - ihevc_sao_edge_offset_class1_chroma()
* - ihevc_sao_edge_offset_class2()
* - ihevc_sao_edge_offset_class2_chroma()
* - ihevc_sao_edge_offset_class3()
* - ihevc_sao_edge_offset_class3_chroma()
* @remarks
* None
*
*******************************************************************************
*/
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "ihevc_typedefs.h"
#include "ihevc_macros.h"
#include "ihevc_platform_macros.h"
#include "ihevc_func_selector.h"
#include "ihevc_defs.h"
#include "ihevc_structs.h"
#include "ihevc_sao.h"
#define NUM_BAND_TABLE 32
const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
/**
* au4_avail is an array of flags - one for each neighboring block specifying if the block is available
* au4_avail[0] - left
* au4_avail[1] - right
* au4_avail[2] - top
* au4_avail[3] - bottom
* au4_avail[4] - top-left
* au4_avail[5] - top-right
* au4_avail[6] - bottom-left
* au4_avail[7] - bottom-right
*/
void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
WORD32 sao_band_pos,
WORD8 *pi1_sao_offset,
WORD32 wd,
WORD32 ht)
{
WORD32 band_shift;
WORD32 band_table[NUM_BAND_TABLE];
WORD32 i;
WORD32 row, col;
/* Updating left and top and top-left */
for(row = 0; row < ht; row++)
{
pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
}
pu1_src_top_left[0] = pu1_src_top[wd - 1];
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
}
band_shift = BIT_DEPTH_LUMA - 5;
for(i = 0; i < NUM_BAND_TABLE; i++)
{
band_table[i] = 0;
}
for(i = 0; i < 4; i++)
{
band_table[(i + sao_band_pos) & 31] = i + 1;
}
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
WORD32 band_idx;
band_idx = band_table[pu1_src[col] >> band_shift];
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
}
pu1_src += src_strd;
}
}
/* input 'wd' has to be for the interleaved block and not for each color component */
void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
WORD32 sao_band_pos_u,
WORD32 sao_band_pos_v,
WORD8 *pi1_sao_offset_u,
WORD8 *pi1_sao_offset_v,
WORD32 wd,
WORD32 ht)
{
WORD32 band_shift;
WORD32 band_table_u[NUM_BAND_TABLE];
WORD32 band_table_v[NUM_BAND_TABLE];
WORD32 i;
WORD32 row, col;
/* Updating left and top and top-left */
for(row = 0; row < ht; row++)
{
pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
}
pu1_src_top_left[0] = pu1_src_top[wd - 2];
pu1_src_top_left[1] = pu1_src_top[wd - 1];
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
}
band_shift = BIT_DEPTH_CHROMA - 5;
for(i = 0; i < NUM_BAND_TABLE; i++)
{
band_table_u[i] = 0;
band_table_v[i] = 0;
}
for(i = 0; i < 4; i++)
{
band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
}
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
WORD32 band_idx;
WORD8 *pi1_sao_offset;
pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
}
pu1_src += src_strd;
}
}
/* Horizontal filtering */
void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
WORD8 u1_sign_left, u1_sign_right;
WORD32 bit_depth;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_LUMA;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update top and top-left arrays */
*pu1_src_top_left = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* Update masks based on the availability flags */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
if(0 == pu1_avail[1])
{
au1_mask[wd - 1] = 0;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
edge_idx = 2 + u1_sign_left + u1_sign_right;
u1_sign_left = -u1_sign_right;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
}
/* Update left array */
for(row = 0; row < ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
}
/* input 'wd' has to be for the interleaved block and not for each color component */
void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset_u,
WORD8 *pi1_sao_offset_v,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
WORD8 u1_sign_left_u, u1_sign_right_u;
WORD8 u1_sign_left_v, u1_sign_right_v;
WORD32 bit_depth;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_CHROMA;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
pu1_src_top_left[0] = pu1_src_top[wd - 2];
pu1_src_top_left[1] = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* Update masks based on the availability flags */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
if(0 == pu1_avail[1])
{
au1_mask[(wd - 1) >> 1] = 0;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
WORD8 *pi1_sao_offset;
if(0 == col % 2)
{
pi1_sao_offset = pi1_sao_offset_u;
u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
u1_sign_left_u = -u1_sign_right_u;
}
else
{
pi1_sao_offset = pi1_sao_offset_v;
u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
u1_sign_left_v = -u1_sign_right_v;
}
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
}
for(row = 0; row < 2 * ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
}
/* Vertical filtering */
void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
WORD8 au1_sign_up[MAX_CTB_SIZE];
WORD8 u1_sign_down;
WORD32 bit_depth;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_LUMA;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
*pu1_src_top_left = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* Update height and source pointers based on the availability flags */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
ht--;
for(col = 0; col < wd; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
}
}
else
{
for(col = 0; col < wd; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
}
}
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
au1_sign_up[col] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}
/* input 'wd' has to be for the interleaved block and not for each color component */
void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset_u,
WORD8 *pi1_sao_offset_v,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
WORD8 au1_sign_up[MAX_CTB_SIZE];
WORD8 u1_sign_down;
WORD32 bit_depth;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_CHROMA;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
pu1_src_top_left[0] = pu1_src_top[wd - 2];
pu1_src_top_left[1] = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* Update height and source pointers based on the availability flags */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
ht--;
for(col = 0; col < wd; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
}
}
else
{
for(col = 0; col < wd; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
}
}
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
WORD8 *pi1_sao_offset;
pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
au1_sign_up[col] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}
/* 135 degree filtering */
void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
UWORD8 u1_src_top_left_tmp;
WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
WORD8 u1_sign_down;
WORD8 *pu1_sign_up;
WORD8 *pu1_sign_up_tmp;
UWORD8 *pu1_src_left_cpy;
WORD32 bit_depth;
UWORD8 u1_pos_0_0_tmp;
UWORD8 u1_pos_wd_ht_tmp;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_LUMA;
pu1_sign_up = au1_sign_up;
pu1_sign_up_tmp = au1_sign_up_tmp;
pu1_src_left_cpy = pu1_src_left;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
u1_src_top_left_tmp = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* If top-left is available, process separately */
if(0 != pu1_avail[4])
{
WORD32 edge_idx;
edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_0_tmp = pu1_src[0];
}
}
else
{
u1_pos_0_0_tmp = pu1_src[0];
}
/* If bottom-right is available, process separately */
if(0 != pu1_avail[7])
{
WORD32 edge_idx;
edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
}
}
else
{
u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
}
/* If Left is not available */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
/* If Top is not available */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
ht--;
pu1_src_left_cpy += 1;
for(col = 1; col < wd; col++)
{
pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
}
}
else
{
for(col = 1; col < wd; col++)
{
pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
}
}
/* If Right is not available */
if(0 == pu1_avail[1])
{
au1_mask[wd - 1] = 0;
}
/* If Bottom is not available */
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
pu1_sign_up_tmp[col + 1] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
/* Swapping pu1_sign_up_tmp and pu1_sign_up */
{
WORD8 *pu1_swap_tmp = pu1_sign_up;
pu1_sign_up = pu1_sign_up_tmp;
pu1_sign_up_tmp = pu1_swap_tmp;
}
pu1_src += src_strd;
}
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
}
if(0 == pu1_avail[2])
ht++;
if(0 == pu1_avail[3])
ht++;
*pu1_src_top_left = u1_src_top_left_tmp;
for(row = 0; row < ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}
/* 135 degree filtering */
void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset_u,
WORD8 *pi1_sao_offset_v,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
UWORD8 au1_src_top_left_tmp[2];
WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
WORD8 u1_sign_down;
WORD8 *pu1_sign_up;
WORD8 *pu1_sign_up_tmp;
UWORD8 *pu1_src_left_cpy;
WORD32 bit_depth;
UWORD8 u1_pos_0_0_tmp_u;
UWORD8 u1_pos_0_0_tmp_v;
UWORD8 u1_pos_wd_ht_tmp_u;
UWORD8 u1_pos_wd_ht_tmp_v;
UNUSED(pu1_src_top_right);
UNUSED(pu1_src_bot_left);
bit_depth = BIT_DEPTH_CHROMA;
pu1_sign_up = au1_sign_up;
pu1_sign_up_tmp = au1_sign_up_tmp;
pu1_src_left_cpy = pu1_src_left;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* If top-left is available, process separately */
if(0 != pu1_avail[4])
{
WORD32 edge_idx;
/* U */
edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_0_tmp_u = pu1_src[0];
}
/* V */
edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_0_tmp_v = pu1_src[1];
}
}
else
{
u1_pos_0_0_tmp_u = pu1_src[0];
u1_pos_0_0_tmp_v = pu1_src[1];
}
/* If bottom-right is available, process separately */
if(0 != pu1_avail[7])
{
WORD32 edge_idx;
/* U */
edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
}
/* V */
edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
}
}
else
{
u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
}
/* If Left is not available */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
/* If Top is not available */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
pu1_src_left_cpy += 2;
ht--;
for(col = 2; col < wd; col++)
{
pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
}
}
else
{
for(col = 2; col < wd; col++)
{
pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
}
}
/* If Right is not available */
if(0 == pu1_avail[1])
{
au1_mask[(wd - 1) >> 1] = 0;
}
/* If Bottom is not available */
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
WORD8 *pi1_sao_offset;
pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
pu1_sign_up_tmp[col + 2] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
/* Swapping pu1_sign_up_tmp and pu1_sign_up */
{
WORD8 *pu1_swap_tmp = pu1_sign_up;
pu1_sign_up = pu1_sign_up_tmp;
pu1_sign_up_tmp = pu1_swap_tmp;
}
pu1_src += src_strd;
}
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
}
if(0 == pu1_avail[2])
ht++;
if(0 == pu1_avail[3])
ht++;
pu1_src_top_left[0] = au1_src_top_left_tmp[0];
pu1_src_top_left[1] = au1_src_top_left_tmp[1];
for(row = 0; row < 2 * ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}
/* 45 degree filtering */
void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
UWORD8 u1_src_top_left_tmp;
WORD8 au1_sign_up[MAX_CTB_SIZE];
UWORD8 *pu1_src_left_cpy;
WORD8 u1_sign_down;
WORD32 bit_depth;
UWORD8 u1_pos_0_ht_tmp;
UWORD8 u1_pos_wd_0_tmp;
bit_depth = BIT_DEPTH_LUMA;
pu1_src_left_cpy = pu1_src_left;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
u1_src_top_left_tmp = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* If top-right is available, process separately */
if(0 != pu1_avail[5])
{
WORD32 edge_idx;
edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_0_tmp = pu1_src[wd - 1];
}
}
else
{
u1_pos_wd_0_tmp = pu1_src[wd - 1];
}
/* If bottom-left is available, process separately */
if(0 != pu1_avail[6])
{
WORD32 edge_idx;
edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
}
}
else
{
u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
}
/* If Left is not available */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
/* If Top is not available */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
ht--;
pu1_src_left_cpy += 1;
for(col = 0; col < wd - 1; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
}
}
else
{
for(col = 0; col < wd - 1; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
}
}
/* If Right is not available */
if(0 == pu1_avail[1])
{
au1_mask[wd - 1] = 0;
}
/* If Bottom is not available */
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
pu1_src[col - 1 + src_strd]));
edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
if(col > 0)
au1_sign_up[col - 1] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp;
}
if(0 == pu1_avail[2])
ht++;
if(0 == pu1_avail[3])
ht++;
*pu1_src_top_left = u1_src_top_left_tmp;
for(row = 0; row < ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}
void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
WORD32 src_strd,
UWORD8 *pu1_src_left,
UWORD8 *pu1_src_top,
UWORD8 *pu1_src_top_left,
UWORD8 *pu1_src_top_right,
UWORD8 *pu1_src_bot_left,
UWORD8 *pu1_avail,
WORD8 *pi1_sao_offset_u,
WORD8 *pi1_sao_offset_v,
WORD32 wd,
WORD32 ht)
{
WORD32 row, col;
UWORD8 au1_mask[MAX_CTB_SIZE];
UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
UWORD8 au1_src_top_left_tmp[2];
WORD8 au1_sign_up[MAX_CTB_SIZE];
UWORD8 *pu1_src_left_cpy;
WORD8 u1_sign_down;
WORD32 bit_depth;
UWORD8 u1_pos_wd_0_tmp_u;
UWORD8 u1_pos_wd_0_tmp_v;
UWORD8 u1_pos_0_ht_tmp_u;
UWORD8 u1_pos_0_ht_tmp_v;
bit_depth = BIT_DEPTH_CHROMA;
pu1_src_left_cpy = pu1_src_left;
/* Initialize the mask values */
memset(au1_mask, 0xFF, MAX_CTB_SIZE);
/* Update left, top and top-left arrays */
au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
for(row = 0; row < ht; row++)
{
au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
}
for(col = 0; col < wd; col++)
{
au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
}
/* If top-right is available, process separately */
if(0 != pu1_avail[5])
{
WORD32 edge_idx;
/* U */
edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
}
/* V */
edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
}
}
else
{
u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
}
/* If bottom-left is available, process separately */
if(0 != pu1_avail[6])
{
WORD32 edge_idx;
/* U */
edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
}
/* V */
edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
if(0 != edge_idx)
{
u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
}
else
{
u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
}
}
else
{
u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
}
/* If Left is not available */
if(0 == pu1_avail[0])
{
au1_mask[0] = 0;
}
/* If Top is not available */
if(0 == pu1_avail[2])
{
pu1_src += src_strd;
ht--;
pu1_src_left_cpy += 2;
for(col = 0; col < wd - 2; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
}
}
else
{
for(col = 0; col < wd - 2; col++)
{
au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
}
}
/* If Right is not available */
if(0 == pu1_avail[1])
{
au1_mask[(wd - 1) >> 1] = 0;
}
/* If Bottom is not available */
if(0 == pu1_avail[3])
{
ht--;
}
/* Processing is done on the intermediate buffer and the output is written to the source buffer */
{
for(row = 0; row < ht; row++)
{
au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
for(col = 0; col < wd; col++)
{
WORD32 edge_idx;
WORD8 *pi1_sao_offset;
pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
pu1_src[col - 2 + src_strd]));
edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
if(col > 1)
au1_sign_up[col - 2] = -u1_sign_down;
edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
if(0 != edge_idx)
{
pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
}
}
pu1_src += src_strd;
}
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
pu1_src[(pu1_avail[3] ? (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
}
if(0 == pu1_avail[2])
ht++;
if(0 == pu1_avail[3])
ht++;
pu1_src_top_left[0] = au1_src_top_left_tmp[0];
pu1_src_top_left[1] = au1_src_top_left_tmp[1];
for(row = 0; row < 2 * ht; row++)
{
pu1_src_left[row] = au1_src_left_tmp[row];
}
for(col = 0; col < wd; col++)
{
pu1_src_top[col] = au1_src_top_tmp[col];
}
}