/****************************************************************************** * * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ /** ******************************************************************************* * @file * ihevc_deblk.c * * @brief * Contains definition for the ctb level deblk function * * @author * Srinivas T * * @par List of Functions: * - ihevc_deblk() * * @remarks * None * ******************************************************************************* */ #include <stdio.h> #include <stddef.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include "ihevc_typedefs.h" #include "iv.h" #include "ivd.h" #include "ihevcd_cxa.h" #include "ithread.h" #include "ihevc_defs.h" #include "ihevc_debug.h" #include "ihevc_defs.h" #include "ihevc_structs.h" #include "ihevc_macros.h" #include "ihevc_platform_macros.h" #include "ihevc_cabac_tables.h" #include "ihevc_error.h" #include "ihevc_common_tables.h" #include "ihevcd_trace.h" #include "ihevcd_defs.h" #include "ihevcd_function_selector.h" #include "ihevcd_structs.h" #include "ihevcd_error.h" #include "ihevcd_nal.h" #include "ihevcd_bitstream.h" #include "ihevcd_job_queue.h" #include "ihevcd_utils.h" #include "ihevcd_debug.h" #include "ihevc_deblk.h" #include "ihevc_deblk_tables.h" #include "ihevcd_profile.h" /** ******************************************************************************* * * @brief * Deblock CTB level function. * * @par Description: * For a given CTB, deblocking on both vertical and * horizontal edges is done. Both the luma and chroma * blocks are processed * * @param[in] ps_deblk * Pointer to the deblock context * * @returns * * @remarks * None * ******************************************************************************* */ void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk, WORD32 i4_is_last_ctb_x, WORD32 i4_is_last_ctb_y) { WORD32 ctb_size; WORD32 log2_ctb_size; UWORD32 u4_bs; WORD32 bs_tz; /*Leading zeros in boundary strength*/ WORD32 qp_p, qp_q; WORD32 filter_p, filter_q; UWORD8 *pu1_src; WORD32 qp_strd; UWORD32 *pu4_vert_bs, *pu4_horz_bs; UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs; WORD32 bs_strd; WORD32 src_strd; UWORD8 *pu1_qp; UWORD16 *pu2_ctb_no_loop_filter_flag; UWORD16 au2_ctb_no_loop_filter_flag[9]; WORD32 col, row; /* Flag to indicate if QP is constant in CTB * 0 - top_left, 1 - top, 2 - left, 3 - current */ UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 }; WORD32 ctb_indx; WORD32 chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu; sps_t *ps_sps; pps_t *ps_pps; codec_t *ps_codec; slice_header_t *ps_slice_hdr; PROFILE_DISABLE_DEBLK(); ps_sps = ps_deblk->ps_sps; ps_pps = ps_deblk->ps_pps; ps_codec = ps_deblk->ps_codec; ps_slice_hdr = ps_deblk->ps_slice_hdr; log2_ctb_size = ps_sps->i1_log2_ctb_size; ctb_size = (1 << ps_sps->i1_log2_ctb_size); /* strides are in units of number of bytes */ /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs + (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) + ps_deblk->i4_ctb_y * bs_strd); pu4_ctb_vert_bs = pu4_vert_bs; pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs + (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) + ps_deblk->i4_ctb_y * bs_strd); pu4_ctb_horz_bs = pu4_horz_bs; qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag; ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y; if(i4_is_last_ctb_y) { pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd); pu4_ctb_vert_bs = pu4_vert_bs; /* ctb_size/8 is the number of edges per CTB * ctb_size/4 is the number of BS values needed per edge * divided by 8 for the number of bytes * 2 is the number of bits needed for each BS value */ memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7)); pu1_qp += (qp_strd << (log2_ctb_size - 3)); pu2_ctb_no_loop_filter_flag += (ctb_size >> 3); ctb_indx += ps_sps->i2_pic_wd_in_ctb; } if(i4_is_last_ctb_x) { pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7))); pu4_ctb_horz_bs = pu4_horz_bs; memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7)); pu1_qp += (ctb_size >> 3); for(row = 0; row < (ctb_size >> 3) + 1; row++) au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3); pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag; ctb_indx += 1; } u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7)); if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) { u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7)); } if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y)) { u4_qp_const_in_ctb[0] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] & (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7)); } if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) { u4_qp_const_in_ctb[1] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] & (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7)); } src_strd = ps_codec->i4_strd; /* Luma Vertical Edge */ if(0 == i4_is_last_ctb_x) { /* Top CTB's slice header */ slice_header_t *ps_slice_hdr_top; { WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; if(i4_is_last_ctb_y) cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb; ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb]; } pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size)); pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0; /** Deblocking is done on a shifted CTB - * Vertical edge processing is done by shifting the CTB up by four pixels */ pu1_src -= 4 * src_strd; for(col = 0; col < ctb_size / 8; col++) { WORD32 shift = 0; /* downshift vert_bs by ctb_size/2 for each column * shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1); * which will reduce to the following assuming ctb size is one of 16, 32 and 64 * and deblocking is done on 8x8 grid */ if(6 != log2_ctb_size) shift = (col & 1) << (log2_ctb_size - 1); /* BS for the column - Last row is excluded and the top row is included*/ u4_bs = (pu4_vert_bs[0] >> shift) << 2; if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) { /* Picking the last BS of the previous CTB corresponding to the same column */ UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd); UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2); u4_bs |= u4_top_bs & 3; } for(row = 0; row < ctb_size / 4;) { WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; /* Trailing zeros are computed and the corresponding rows are not processed */ bs_tz = CTZ(u4_bs) >> 1; if(0 != bs_tz) { u4_bs = u4_bs >> (bs_tz << 1); if((row + bs_tz) >= (ctb_size / 4)) pu1_src += 4 * (ctb_size / 4 - row) * src_strd; else pu1_src += 4 * bs_tz * src_strd; row += bs_tz; continue; } if(0 == row) { i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2; i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2; if(0 == col) { qp_p = u4_qp_const_in_ctb[0] ? pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : pu1_qp[-qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[col - 1 - qp_strd]; } qp_q = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[col - qp_strd]; } else { if(0 == col) { qp_p = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[((row - 1) >> 1) * qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[((row - 1) >> 1) * qp_strd + col - 1]; } qp_q = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[((row - 1) >> 1) * qp_strd + col]; } filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1; filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2; /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ filter_p = !filter_p; filter_q = !filter_q; if(filter_p || filter_q) { DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, ps_slice_hdr->i1_beta_offset_div2, ps_slice_hdr->i1_tc_offset_div2, filter_p, filter_q); ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, i1_beta_offset_div2, i1_tc_offset_div2, filter_p, filter_q); } pu1_src += 4 * src_strd; u4_bs = u4_bs >> 2; row++; } if((64 == ctb_size) || ((32 == ctb_size) && (col & 1))) { pu4_vert_bs++; } pu1_src -= (src_strd << log2_ctb_size); pu1_src += 8; } pu4_vert_bs = pu4_ctb_vert_bs; } /* Luma Horizontal Edge */ if(0 == i4_is_last_ctb_y) { /* Left CTB's slice header */ slice_header_t *ps_slice_hdr_left; { WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; if(i4_is_last_ctb_x) cur_ctb_indx += 1; ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1]; } pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size); pu1_src += i4_is_last_ctb_x ? ctb_size : 0; /** Deblocking is done on a shifted CTB - * Horizontal edge processing is done by shifting the CTB left by four pixels */ pu1_src -= 4; for(row = 0; row < ctb_size / 8; row++) { WORD32 shift = 0; /* downshift vert_bs by ctb_size/2 for each column * shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2; * which will reduce to the following assuming ctb size is one of 16, 32 and 64 * and deblocking is done on 8x8 grid */ if(6 != log2_ctb_size) shift = (row & 1) << (log2_ctb_size - 1); /* BS for the row - Last column is excluded and the left column is included*/ u4_bs = (pu4_horz_bs[0] >> shift) << 2; if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) { /** Picking the last BS of the previous CTB corresponding to the same row * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2); */ UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7))); UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2); u4_bs |= u4_left_bs & 3; } for(col = 0; col < ctb_size / 4;) { WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; bs_tz = CTZ(u4_bs) >> 1; if(0 != bs_tz) { u4_bs = u4_bs >> (bs_tz << 1); if((col + bs_tz) >= (ctb_size / 4)) pu1_src += 4 * (ctb_size / 4 - col); else pu1_src += 4 * bs_tz; col += bs_tz; continue; } if(0 == col) { i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2; i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2; if(0 == row) { qp_p = u4_qp_const_in_ctb[0] ? pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : pu1_qp[-qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[(row - 1) * qp_strd - 1]; } qp_q = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[row * qp_strd - 1]; } else { if(0 == row) { qp_p = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[((col - 1) >> 1) - qp_strd]; } else { qp_p = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd]; } qp_q = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[((col - 1) >> 1) + row * qp_strd]; } filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1; filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1; /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ filter_p = !filter_p; filter_q = !filter_q; if(filter_p || filter_q) { DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, ps_slice_hdr->i1_beta_offset_div2, ps_slice_hdr->i1_tc_offset_div2, filter_p, filter_q); ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, i1_beta_offset_div2, i1_tc_offset_div2, filter_p, filter_q); } pu1_src += 4; u4_bs = u4_bs >> 2; col++; } if((64 == ctb_size) || ((32 == ctb_size) && (row & 1))) { pu4_horz_bs++; } pu1_src -= ctb_size; pu1_src += (src_strd << 3); } pu4_horz_bs = pu4_ctb_horz_bs; } /* Chroma Veritcal Edge */ if(0 == i4_is_last_ctb_x) { /* Top CTB's slice header */ slice_header_t *ps_slice_hdr_top; { WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; if(i4_is_last_ctb_y) cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb; ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb]; } pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size); pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0; /** Deblocking is done on a shifted CTB - * Vertical edge processing is done by shifting the CTB up by four pixels */ pu1_src -= 4 * src_strd; for(col = 0; col < ctb_size / 16; col++) { /* BS for the column - Last row is excluded and the top row is included*/ u4_bs = pu4_vert_bs[0] << 2; if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) { /* Picking the last BS of the previous CTB corresponding to the same column */ UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd); UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2); u4_bs |= u4_top_bs & 3; } /* Every alternate boundary strength value is used for chroma */ u4_bs &= 0x22222222; for(row = 0; row < ctb_size / 8;) { WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; bs_tz = CTZ(u4_bs) >> 2; if(0 != bs_tz) { if((row + bs_tz) >= (ctb_size / 8)) pu1_src += 4 * (ctb_size / 8 - row) * src_strd; else pu1_src += 4 * bs_tz * src_strd; row += bs_tz; u4_bs = u4_bs >> (bs_tz << 2); continue; } if(0 == row) { i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2; if(0 == col) { qp_p = u4_qp_const_in_ctb[0] ? pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : pu1_qp[-qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[2 * col - 1 - qp_strd]; } qp_q = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[2 * col - qp_strd]; } else { if(0 == col) { qp_p = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[(row - 1) * qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[(row - 1) * qp_strd + 2 * col - 1]; } qp_q = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[(row - 1) * qp_strd + 2 * col]; } filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1; filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2; /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ filter_p = !filter_p; filter_q = !filter_q; if(filter_p || filter_q) { ASSERT(1 == ((u4_bs & 3) >> 1)); DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, ps_pps->i1_pic_cb_qp_offset, ps_pps->i1_pic_cr_qp_offset, ps_slice_hdr->i1_tc_offset_div2, filter_p, filter_q); if(chroma_yuv420sp_vu) { ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src, src_strd, qp_q, qp_p, ps_pps->i1_pic_cr_qp_offset, ps_pps->i1_pic_cb_qp_offset, i1_tc_offset_div2, filter_q, filter_p); } else { ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src, src_strd, qp_p, qp_q, ps_pps->i1_pic_cb_qp_offset, ps_pps->i1_pic_cr_qp_offset, i1_tc_offset_div2, filter_p, filter_q); } } pu1_src += 4 * src_strd; u4_bs = u4_bs >> 4; row++; } pu4_vert_bs += (64 == ctb_size) ? 2 : 1; pu1_src -= ((src_strd / 2) << log2_ctb_size); pu1_src += 16; } } /* Chroma Horizontal Edge */ if(0 == i4_is_last_ctb_y) { /* Left CTB's slice header */ slice_header_t *ps_slice_hdr_left; { WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; if(i4_is_last_ctb_x) cur_ctb_indx += 1; ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1]; } pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size); pu1_src += i4_is_last_ctb_x ? ctb_size : 0; /** Deblocking is done on a shifted CTB - * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */ pu1_src -= 8; for(row = 0; row < ctb_size / 16; row++) { /* BS for the row - Last column is excluded and the left column is included*/ u4_bs = pu4_horz_bs[0] << 2; if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) { /** Picking the last BS of the previous CTB corresponding to the same row * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2); */ UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7))); UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2); u4_bs |= u4_left_bs & 3; } /* Every alternate boundary strength value is used for chroma */ u4_bs &= 0x22222222; for(col = 0; col < ctb_size / 8;) { WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; bs_tz = CTZ(u4_bs) >> 2; if(0 != bs_tz) { u4_bs = u4_bs >> (bs_tz << 2); if((col + bs_tz) >= (ctb_size / 8)) pu1_src += 8 * (ctb_size / 8 - col); else pu1_src += 8 * bs_tz; col += bs_tz; continue; } if(0 == col) { i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2; if(0 == row) { qp_p = u4_qp_const_in_ctb[0] ? pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : pu1_qp[-qp_strd - 1]; } else { qp_p = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[(2 * row - 1) * qp_strd - 1]; } qp_q = u4_qp_const_in_ctb[2] ? pu1_qp[-ctb_size / 8] : pu1_qp[(2 * row) * qp_strd - 1]; } else { if(0 == row) { qp_p = u4_qp_const_in_ctb[1] ? pu1_qp[-ctb_size / 8 * qp_strd] : pu1_qp[col - 1 - qp_strd]; } else { qp_p = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[(col - 1) + (2 * row - 1) * qp_strd]; } qp_q = u4_qp_const_in_ctb[3] ? pu1_qp[0] : pu1_qp[(col - 1) + 2 * row * qp_strd]; } filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1; filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1; /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ filter_p = !filter_p; filter_q = !filter_q; if(filter_p || filter_q) { ASSERT(1 == ((u4_bs & 3) >> 1)); DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd, u4_bs & 3, qp_p, qp_q, ps_pps->i1_pic_cb_qp_offset, ps_pps->i1_pic_cr_qp_offset, ps_slice_hdr->i1_tc_offset_div2, filter_p, filter_q); if(chroma_yuv420sp_vu) { ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src, src_strd, qp_q, qp_p, ps_pps->i1_pic_cr_qp_offset, ps_pps->i1_pic_cb_qp_offset, i1_tc_offset_div2, filter_q, filter_p); } else { ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src, src_strd, qp_p, qp_q, ps_pps->i1_pic_cb_qp_offset, ps_pps->i1_pic_cr_qp_offset, i1_tc_offset_div2, filter_p, filter_q); } } pu1_src += 8; u4_bs = u4_bs >> 4; col++; } pu4_horz_bs += (64 == ctb_size) ? 2 : 1; pu1_src -= ctb_size; pu1_src += 8 * src_strd; } } }